diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000..9fdc1ddc9a --- /dev/null +++ b/.clang-format @@ -0,0 +1,109 @@ +Language: Cpp +BasedOnStyle: WebKit +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: None +AlignConsecutiveAssignments: None +AlignConsecutiveBitFields: Consecutive +AlignConsecutiveDeclarations: None +AlignConsecutiveMacros: None +AlignEscapedNewlines: DontAlign +AlignOperands: Align +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortCaseLabelsOnASingleLine: true +AllowShortEnumsOnASingleLine: true +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: WithoutElse +AllowShortLambdasOnASingleLine: Inline +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: true +AttributeMacros: + - JEMALLOC_NOTHROW + - FEX_ALIGNED + - FEX_ANNOTATE + - FEX_DEFAULT_VISIBILITY + - FEX_NAKED + - FEX_PACKED + - FEXCORE_PRESERVE_ALL_ATTR + - GLIBC_ALIAS_FUNCTION +BinPackArguments: true +BinPackParameters: true +BitFieldColonSpacing: Both +BreakAfterAttributes: Always # clang 16 required +BreakBeforeBraces: Attach +BreakBeforeBinaryOperators: None +BreakBeforeInlineASMColon: OnlyMultiline # clang 16 required +BreakBeforeTernaryOperators: false +BreakConstructorInitializers: BeforeComma +BreakInheritanceList: BeforeColon +ColumnLimit: 140 +CompactNamespaces: false +ConstructorInitializerIndentWidth: 2 +ContinuationIndentWidth: 2 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +EmptyLineAfterAccessModifier: Leave +EmptyLineBeforeAccessModifier: Leave +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +IncludeBlocks: Preserve +IndentAccessModifiers: false +IndentCaseBlocks: false +IndentCaseLabels: false +IndentExternBlock: AfterExternBlock +IndentGotoLabels: false +IndentPPDirectives: None +IndentRequires: false +IndentWidth: 2 +InsertBraces: true +KeepEmptyLinesAtTheStartOfBlocks: true +LambdaBodyIndentation: OuterScope +LineEnding: LF # clang 16 required +MaxEmptyLinesToKeep: 2 +NamespaceIndentation: Inner +QualifierAlignment: Left +PackConstructorInitializers: Never +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 2 +PenaltyBreakOpenParenthesis: 2 +PenaltyBreakString: 10 +PenaltyBreakTemplateDeclaration: 8 +PenaltyExcessCharacter: 2 +PenaltyReturnTypeOnItsOwnLine: 16 +PointerAlignment: Left +RemoveBracesLLVM: false +ReferenceAlignment: Left +ReflowComments: true +RequiresClausePosition: WithPreceding +SeparateDefinitionBlocks: Leave +SortIncludes: Never +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: false +SpaceAroundPointerQualifiers: Default +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: Custom +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterFunctionDeclarationName: false + AfterFunctionDefinitionName: false + AfterOverloadedOperator: false + AfterRequiresInClause: true + BeforeNonEmptyParentheses: false +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Leave +SpacesInCStyleCastParentheses: false +SpacesInConditionalStatement: false +SpacesInParentheses: false +Standard: c++20 +UseTab: Never diff --git a/.clang-format-ignore b/.clang-format-ignore new file mode 100644 index 0000000000..c255ca720d --- /dev/null +++ b/.clang-format-ignore @@ -0,0 +1,16 @@ +# This file is used to ignore files and directories from clang-format + +# Ignore build folder +build/* + +# Ignore all files in the External directory +External/* + +# SoftFloat-3e code doesn't belong to us +FEXCore/Source/Common/SoftFloat-3e/* +Source/Common/cpp-optparse/* + +# Files with human-indented tables for readability - don't mess with these +FEXCore/Source/Interface/Core/X86Tables/X87Tables.cpp +FEXCore/Source/Interface/Core/X86Tables/XOPTables.cpp +FEXCore/Source/Interface/Core/X86Tables/* diff --git a/FEXCore/Source/Common/JitSymbols.cpp b/FEXCore/Source/Common/JitSymbols.cpp index 662215c0d0..79e3ff31f2 100644 --- a/FEXCore/Source/Common/JitSymbols.cpp +++ b/FEXCore/Source/Common/JitSymbols.cpp @@ -7,133 +7,142 @@ #include namespace FEXCore { - JITSymbols::JITSymbols() { - } +JITSymbols::JITSymbols() {} - JITSymbols::~JITSymbols() { - if (fd != -1) { - close(fd); - } +JITSymbols::~JITSymbols() { + if (fd != -1) { + close(fd); } +} - void JITSymbols::InitFile() { - // We can't use FILE here since we must be robust against forking processes closing our FD from under us. +void JITSymbols::InitFile() { + // We can't use FILE here since we must be robust against forking processes closing our FD from under us. #ifdef __ANDROID__ - // Android simpleperf looks in /data/local/tmp instead of /tmp - const auto PerfMap = fextl::fmt::format("/data/local/tmp/perf-{}.map", getpid()); + // Android simpleperf looks in /data/local/tmp instead of /tmp + const auto PerfMap = fextl::fmt::format("/data/local/tmp/perf-{}.map", getpid()); #else - const auto PerfMap = fextl::fmt::format("/tmp/perf-{}.map", getpid()); + const auto PerfMap = fextl::fmt::format("/tmp/perf-{}.map", getpid()); #endif - fd = open(PerfMap.c_str(), O_CREAT | O_TRUNC | O_WRONLY | O_APPEND, 0644); - } + fd = open(PerfMap.c_str(), O_CREAT | O_TRUNC | O_WRONLY | O_APPEND, 0644); +} - void JITSymbols::RegisterNamedRegion(const void *HostAddr, uint32_t CodeSize, std::string_view Name) { - if (fd == -1) return; +void JITSymbols::RegisterNamedRegion(const void* HostAddr, uint32_t CodeSize, std::string_view Name) { + if (fd == -1) { + return; + } - // Linux perf format is very straightforward - // ` \n` - const auto Buffer = fextl::fmt::format("{} {:x} {}\n", HostAddr, CodeSize, Name); - auto Result = write(fd, Buffer.c_str(), Buffer.size()); - if (Result == -1 && errno == EBADF) { - fd = -1; - } + // Linux perf format is very straightforward + // ` \n` + const auto Buffer = fextl::fmt::format("{} {:x} {}\n", HostAddr, CodeSize, Name); + auto Result = write(fd, Buffer.c_str(), Buffer.size()); + if (Result == -1 && errno == EBADF) { + fd = -1; } +} - void JITSymbols::RegisterJITSpace(const void *HostAddr, uint32_t CodeSize) { - if (fd == -1) return; +void JITSymbols::RegisterJITSpace(const void* HostAddr, uint32_t CodeSize) { + if (fd == -1) { + return; + } - // Linux perf format is very straightforward - // ` \n` - const auto Buffer = fextl::fmt::format("{} {:x} FEXJIT\n", HostAddr, CodeSize); - auto Result = write(fd, Buffer.c_str(), Buffer.size()); - if (Result == -1 && errno == EBADF) { - fd = -1; - } + // Linux perf format is very straightforward + // ` \n` + const auto Buffer = fextl::fmt::format("{} {:x} FEXJIT\n", HostAddr, CodeSize); + auto Result = write(fd, Buffer.c_str(), Buffer.size()); + if (Result == -1 && errno == EBADF) { + fd = -1; } +} - // Buffered JIT symbols. - void JITSymbols::Register(Core::JITSymbolBuffer *Buffer, const void *HostAddr, uint64_t GuestAddr, uint32_t CodeSize) { - if (fd == -1) return; - - // Calculate remaining sizes. - const auto RemainingSize = Buffer->BUFFER_SIZE - Buffer->Offset; - const auto CurrentBufferOffset = &Buffer->Buffer[Buffer->Offset]; - - // Linux perf format is very straightforward - // ` \n` - const auto FMTResult = fmt::format_to_n(CurrentBufferOffset, RemainingSize, "{} {:x} JIT_0x{:x}_{}\n", HostAddr, CodeSize, GuestAddr, HostAddr); - if (FMTResult.out >= &Buffer->Buffer[Buffer->BUFFER_SIZE]) { - // Couldn't fit, need to force a write. - WriteBuffer(Buffer, true); - // Rerun - Register(Buffer, HostAddr, GuestAddr, CodeSize); - return; - } +// Buffered JIT symbols. +void JITSymbols::Register(Core::JITSymbolBuffer* Buffer, const void* HostAddr, uint64_t GuestAddr, uint32_t CodeSize) { + if (fd == -1) { + return; + } - Buffer->Offset += FMTResult.size; - WriteBuffer(Buffer); + // Calculate remaining sizes. + const auto RemainingSize = Buffer->BUFFER_SIZE - Buffer->Offset; + const auto CurrentBufferOffset = &Buffer->Buffer[Buffer->Offset]; + + // Linux perf format is very straightforward + // ` \n` + const auto FMTResult = fmt::format_to_n(CurrentBufferOffset, RemainingSize, "{} {:x} JIT_0x{:x}_{}\n", HostAddr, CodeSize, GuestAddr, HostAddr); + if (FMTResult.out >= &Buffer->Buffer[Buffer->BUFFER_SIZE]) { + // Couldn't fit, need to force a write. + WriteBuffer(Buffer, true); + // Rerun + Register(Buffer, HostAddr, GuestAddr, CodeSize); + return; } - void JITSymbols::Register(Core::JITSymbolBuffer *Buffer, const void *HostAddr, uint32_t CodeSize, std::string_view Name, uintptr_t Offset) { - if (fd == -1) return; - - // Calculate remaining sizes. - const auto RemainingSize = Buffer->BUFFER_SIZE - Buffer->Offset; - const auto CurrentBufferOffset = &Buffer->Buffer[Buffer->Offset]; - - // Linux perf format is very straightforward - // ` \n` - const auto FMTResult = fmt::format_to_n(CurrentBufferOffset, RemainingSize, "{} {:x} {}+0x{:x} ({})\n", HostAddr, CodeSize, Name, Offset, HostAddr); - if (FMTResult.out >= &Buffer->Buffer[Buffer->BUFFER_SIZE]) { - // Couldn't fit, need to force a write. - WriteBuffer(Buffer, true); - // Rerun - Register(Buffer, HostAddr, CodeSize, Name, Offset); - return; - } + Buffer->Offset += FMTResult.size; + WriteBuffer(Buffer); +} - Buffer->Offset += FMTResult.size; - WriteBuffer(Buffer); +void JITSymbols::Register(Core::JITSymbolBuffer* Buffer, const void* HostAddr, uint32_t CodeSize, std::string_view Name, uintptr_t Offset) { + if (fd == -1) { + return; } - void JITSymbols::RegisterNamedRegion(Core::JITSymbolBuffer *Buffer, const void *HostAddr, uint32_t CodeSize, std::string_view Name) { - if (fd == -1) return; - - // Calculate remaining sizes. - const auto RemainingSize = Buffer->BUFFER_SIZE - Buffer->Offset; - const auto CurrentBufferOffset = &Buffer->Buffer[Buffer->Offset]; - - // Linux perf format is very straightforward - // ` \n` - const auto FMTResult = fmt::format_to_n(CurrentBufferOffset, RemainingSize, "{} {:x} {}\n", HostAddr, CodeSize, Name); - if (FMTResult.out >= &Buffer->Buffer[Buffer->BUFFER_SIZE]) { - // Couldn't fit, need to force a write. - WriteBuffer(Buffer, true); - // Rerun - RegisterNamedRegion(Buffer, HostAddr, CodeSize, Name); - return; - } + // Calculate remaining sizes. + const auto RemainingSize = Buffer->BUFFER_SIZE - Buffer->Offset; + const auto CurrentBufferOffset = &Buffer->Buffer[Buffer->Offset]; + + // Linux perf format is very straightforward + // ` \n` + const auto FMTResult = + fmt::format_to_n(CurrentBufferOffset, RemainingSize, "{} {:x} {}+0x{:x} ({})\n", HostAddr, CodeSize, Name, Offset, HostAddr); + if (FMTResult.out >= &Buffer->Buffer[Buffer->BUFFER_SIZE]) { + // Couldn't fit, need to force a write. + WriteBuffer(Buffer, true); + // Rerun + Register(Buffer, HostAddr, CodeSize, Name, Offset); + return; + } + + Buffer->Offset += FMTResult.size; + WriteBuffer(Buffer); +} - Buffer->Offset += FMTResult.size; - WriteBuffer(Buffer); +void JITSymbols::RegisterNamedRegion(Core::JITSymbolBuffer* Buffer, const void* HostAddr, uint32_t CodeSize, std::string_view Name) { + if (fd == -1) { + return; } - void JITSymbols::WriteBuffer(Core::JITSymbolBuffer *Buffer, bool ForceWrite) { - auto Now = std::chrono::steady_clock::now(); - if (!ForceWrite) { - if (((Buffer->LastWrite - Now) < Buffer->MAXIMUM_THRESHOLD) && - Buffer->Offset < Buffer->NEEDS_WRITE_DISTANCE) { - // Still buffering, no need to write. - return; - } - } + // Calculate remaining sizes. + const auto RemainingSize = Buffer->BUFFER_SIZE - Buffer->Offset; + const auto CurrentBufferOffset = &Buffer->Buffer[Buffer->Offset]; + + // Linux perf format is very straightforward + // ` \n` + const auto FMTResult = fmt::format_to_n(CurrentBufferOffset, RemainingSize, "{} {:x} {}\n", HostAddr, CodeSize, Name); + if (FMTResult.out >= &Buffer->Buffer[Buffer->BUFFER_SIZE]) { + // Couldn't fit, need to force a write. + WriteBuffer(Buffer, true); + // Rerun + RegisterNamedRegion(Buffer, HostAddr, CodeSize, Name); + return; + } + + Buffer->Offset += FMTResult.size; + WriteBuffer(Buffer); +} - Buffer->LastWrite = Now; - auto Result = write(fd, Buffer->Buffer, Buffer->Offset); - if (Result == -1 && errno == EBADF) { - fd = -1; +void JITSymbols::WriteBuffer(Core::JITSymbolBuffer* Buffer, bool ForceWrite) { + auto Now = std::chrono::steady_clock::now(); + if (!ForceWrite) { + if (((Buffer->LastWrite - Now) < Buffer->MAXIMUM_THRESHOLD) && Buffer->Offset < Buffer->NEEDS_WRITE_DISTANCE) { + // Still buffering, no need to write. + return; } + } - Buffer->Offset = 0; + Buffer->LastWrite = Now; + auto Result = write(fd, Buffer->Buffer, Buffer->Offset); + if (Result == -1 && errno == EBADF) { + fd = -1; } + + Buffer->Offset = 0; +} } // namespace FEXCore diff --git a/FEXCore/Source/Interface/Config/Config.cpp b/FEXCore/Source/Interface/Config/Config.cpp index 9f7b512cc3..bedcc5e31c 100644 --- a/FEXCore/Source/Interface/Config/Config.cpp +++ b/FEXCore/Source/Interface/Config/Config.cpp @@ -29,7 +29,7 @@ #include namespace FEXCore::Context { - class Context; +class Context; } namespace FEXCore::Config { @@ -40,472 +40,451 @@ namespace DefaultValues { #define OPT_STRARRAY(group, enum, json, default) OPT_STR(group, enum, json, default) #define OPT_STRENUM(group, enum, json, default) const uint64_t P(enum) = FEXCore::ToUnderlying(P(default)); #include +} // namespace DefaultValues + +enum Paths { + PATH_DATA_DIR = 0, + PATH_CONFIG_DIR_LOCAL, + PATH_CONFIG_DIR_GLOBAL, + PATH_CONFIG_FILE_LOCAL, + PATH_CONFIG_FILE_GLOBAL, + PATH_LAST, +}; +static std::array Paths; + +void SetDataDirectory(const std::string_view Path) { + Paths[PATH_DATA_DIR] = Path; } - enum Paths { - PATH_DATA_DIR = 0, - PATH_CONFIG_DIR_LOCAL, - PATH_CONFIG_DIR_GLOBAL, - PATH_CONFIG_FILE_LOCAL, - PATH_CONFIG_FILE_GLOBAL, - PATH_LAST, - }; - static std::array Paths; +void SetConfigDirectory(const std::string_view Path, bool Global) { + Paths[PATH_CONFIG_DIR_LOCAL + Global] = Path; +} - void SetDataDirectory(const std::string_view Path) { - Paths[PATH_DATA_DIR] = Path; - } +void SetConfigFileLocation(const std::string_view Path, bool Global) { + Paths[PATH_CONFIG_FILE_LOCAL + Global] = Path; +} - void SetConfigDirectory(const std::string_view Path, bool Global) { - Paths[PATH_CONFIG_DIR_LOCAL + Global] = Path; - } +const fextl::string& GetDataDirectory() { + return Paths[PATH_DATA_DIR]; +} - void SetConfigFileLocation(const std::string_view Path, bool Global) { - Paths[PATH_CONFIG_FILE_LOCAL + Global] = Path; - } +const fextl::string& GetConfigDirectory(bool Global) { + return Paths[PATH_CONFIG_DIR_LOCAL + Global]; +} - fextl::string const& GetDataDirectory() { - return Paths[PATH_DATA_DIR]; - } +const fextl::string& GetConfigFileLocation(bool Global) { + return Paths[PATH_CONFIG_FILE_LOCAL + Global]; +} - fextl::string const& GetConfigDirectory(bool Global) { - return Paths[PATH_CONFIG_DIR_LOCAL + Global]; - } +fextl::string GetApplicationConfig(const std::string_view Program, bool Global) { + fextl::string ConfigFile = GetConfigDirectory(Global); - fextl::string const& GetConfigFileLocation(bool Global) { - return Paths[PATH_CONFIG_FILE_LOCAL + Global]; + if (!Global && !FHU::Filesystem::Exists(ConfigFile) && !FHU::Filesystem::CreateDirectories(ConfigFile)) { + LogMan::Msg::DFmt("Couldn't create config directory: '{}'", ConfigFile); + // Let's go local in this case + return fextl::fmt::format("./{}.json", Program); } - fextl::string GetApplicationConfig(const std::string_view Program, bool Global) { - fextl::string ConfigFile = GetConfigDirectory(Global); + ConfigFile += "AppConfig/"; - if (!Global && - !FHU::Filesystem::Exists(ConfigFile) && - !FHU::Filesystem::CreateDirectories(ConfigFile)) { - LogMan::Msg::DFmt("Couldn't create config directory: '{}'", ConfigFile); - // Let's go local in this case - return fextl::fmt::format("./{}.json", Program); - } - - ConfigFile += "AppConfig/"; - - // Attempt to create the local folder if it doesn't exist - if (!Global && - !FHU::Filesystem::Exists(ConfigFile) && - !FHU::Filesystem::CreateDirectories(ConfigFile)) { - // Let's go local in this case - return fextl::fmt::format("./{}.json", Program); - } - - return fextl::fmt::format("{}{}.json", ConfigFile, Program); + // Attempt to create the local folder if it doesn't exist + if (!Global && !FHU::Filesystem::Exists(ConfigFile) && !FHU::Filesystem::CreateDirectories(ConfigFile)) { + // Let's go local in this case + return fextl::fmt::format("./{}.json", Program); } - void SetConfig(FEXCore::Context::Context *CTX, ConfigOption Option, uint64_t Config) { - } + return fextl::fmt::format("{}{}.json", ConfigFile, Program); +} - void SetConfig(FEXCore::Context::Context *CTX, ConfigOption Option, fextl::string const &Config) { - } +void SetConfig(FEXCore::Context::Context* CTX, ConfigOption Option, uint64_t Config) {} - uint64_t GetConfig(FEXCore::Context::Context *CTX, ConfigOption Option) { - return 0; - } +void SetConfig(FEXCore::Context::Context* CTX, ConfigOption Option, const fextl::string& Config) {} - static fextl::map> ConfigLayers; - static FEXCore::Config::Layer *Meta{}; - - constexpr std::array LoadOrder = { - FEXCore::Config::LayerType::LAYER_GLOBAL_MAIN, - FEXCore::Config::LayerType::LAYER_MAIN, - FEXCore::Config::LayerType::LAYER_GLOBAL_STEAM_APP, - FEXCore::Config::LayerType::LAYER_GLOBAL_APP, - FEXCore::Config::LayerType::LAYER_LOCAL_STEAM_APP, - FEXCore::Config::LayerType::LAYER_LOCAL_APP, - FEXCore::Config::LayerType::LAYER_ARGUMENTS, - FEXCore::Config::LayerType::LAYER_ENVIRONMENT, - FEXCore::Config::LayerType::LAYER_TOP - }; +uint64_t GetConfig(FEXCore::Context::Context* CTX, ConfigOption Option) { + return 0; +} - Layer::Layer(const LayerType _Type) - : Type {_Type} { +static fextl::map> ConfigLayers; +static FEXCore::Config::Layer* Meta {}; + +constexpr std::array LoadOrder = {FEXCore::Config::LayerType::LAYER_GLOBAL_MAIN, + FEXCore::Config::LayerType::LAYER_MAIN, + FEXCore::Config::LayerType::LAYER_GLOBAL_STEAM_APP, + FEXCore::Config::LayerType::LAYER_GLOBAL_APP, + FEXCore::Config::LayerType::LAYER_LOCAL_STEAM_APP, + FEXCore::Config::LayerType::LAYER_LOCAL_APP, + FEXCore::Config::LayerType::LAYER_ARGUMENTS, + FEXCore::Config::LayerType::LAYER_ENVIRONMENT, + FEXCore::Config::LayerType::LAYER_TOP}; + +Layer::Layer(const LayerType _Type) + : Type {_Type} {} + +Layer::~Layer() {} + +class MetaLayer final : public FEXCore::Config::Layer { +public: + MetaLayer(const LayerType _Type) + : FEXCore::Config::Layer(_Type) {} + ~MetaLayer() {} + void Load(); + +private: + void MergeConfigMap(const LayerOptions& Options); + void MergeEnvironmentVariables(const ConfigOption& Option, const LayerValue& Value); +}; + +void MetaLayer::Load() { + OptionMap.clear(); + + for (auto CurrentLayer = LoadOrder.begin(); CurrentLayer != LoadOrder.end(); ++CurrentLayer) { + auto it = ConfigLayers.find(*CurrentLayer); + if (it != ConfigLayers.end() && *CurrentLayer != Type) { + // Merge this layer's options to this layer + MergeConfigMap(it->second->GetOptionMap()); + } } +} - Layer::~Layer() { - } - class MetaLayer final : public FEXCore::Config::Layer { - public: - MetaLayer(const LayerType _Type) - : FEXCore::Config::Layer (_Type) { - } - ~MetaLayer() { - } - void Load(); +void MetaLayer::MergeEnvironmentVariables(const ConfigOption& Option, const LayerValue& Value) { + // Environment variables need a bit of additional work + // We want to merge the arrays rather than overwrite entirely + auto MetaEnvironment = OptionMap.find(Option); + if (MetaEnvironment == OptionMap.end()) { + // Doesn't exist, just insert + OptionMap.insert_or_assign(Option, Value); + return; + } + + // If an environment variable exists in both current meta and in the incoming layer then the meta layer value is overwritten + fextl::unordered_map LookupMap; + const auto AddToMap = [&LookupMap](const FEXCore::Config::LayerValue& Value) { + for (const auto& EnvVar : Value) { + const auto ItEq = EnvVar.find_first_of('='); + if (ItEq == fextl::string::npos) { + // Broken environment variable + // Skip + continue; + } + auto Key = fextl::string(EnvVar.begin(), EnvVar.begin() + ItEq); + auto Value = fextl::string(EnvVar.begin() + ItEq + 1, EnvVar.end()); - private: - void MergeConfigMap(const LayerOptions &Options); - void MergeEnvironmentVariables(ConfigOption const &Option, LayerValue const &Value); + // Add the key to the map, overwriting whatever previous value was there + LookupMap.insert_or_assign(std::move(Key), std::move(Value)); + } }; - void MetaLayer::Load() { - OptionMap.clear(); + AddToMap(MetaEnvironment->second); + AddToMap(Value); - for (auto CurrentLayer = LoadOrder.begin(); CurrentLayer != LoadOrder.end(); ++CurrentLayer) { - auto it = ConfigLayers.find(*CurrentLayer); - if (it != ConfigLayers.end() && *CurrentLayer != Type) { - // Merge this layer's options to this layer - MergeConfigMap(it->second->GetOptionMap()); - } - } + // Now with the two layers merged in the map + // Add all the values to the option + Erase(Option); + for (auto& Val : LookupMap) { + // Set will emplace multiple options in to its list + Set(Option, Val.first + "=" + Val.second); } +} - - void MetaLayer::MergeEnvironmentVariables(ConfigOption const &Option, LayerValue const &Value) { - // Environment variables need a bit of additional work - // We want to merge the arrays rather than overwrite entirely - auto MetaEnvironment = OptionMap.find(Option); - if (MetaEnvironment == OptionMap.end()) { - // Doesn't exist, just insert - OptionMap.insert_or_assign(Option, Value); - return; +void MetaLayer::MergeConfigMap(const LayerOptions& Options) { + // Insert this layer's options, overlaying previous options that exist here + for (auto& it : Options) { + if (it.first == FEXCore::Config::ConfigOption::CONFIG_ENV || it.first == FEXCore::Config::ConfigOption::CONFIG_HOSTENV) { + MergeEnvironmentVariables(it.first, it.second); + } else { + OptionMap.insert_or_assign(it.first, it.second); } + } +} - // If an environment variable exists in both current meta and in the incoming layer then the meta layer value is overwritten - fextl::unordered_map LookupMap; - const auto AddToMap = [&LookupMap](FEXCore::Config::LayerValue const &Value) { - for (const auto &EnvVar : Value) { - const auto ItEq = EnvVar.find_first_of('='); - if (ItEq == fextl::string::npos) { - // Broken environment variable - // Skip - continue; - } - auto Key = fextl::string(EnvVar.begin(), EnvVar.begin() + ItEq); - auto Value = fextl::string(EnvVar.begin() + ItEq + 1, EnvVar.end()); - - // Add the key to the map, overwriting whatever previous value was there - LookupMap.insert_or_assign(std::move(Key), std::move(Value)); - } - }; - - AddToMap(MetaEnvironment->second); - AddToMap(Value); +void Initialize() { + AddLayer(fextl::make_unique(FEXCore::Config::LayerType::LAYER_TOP)); + Meta = ConfigLayers.begin()->second.get(); +} - // Now with the two layers merged in the map - // Add all the values to the option - Erase(Option); - for (auto &Val : LookupMap) { - // Set will emplace multiple options in to its list - Set(Option, Val.first + "=" + Val.second); - } - } +void Shutdown() { + ConfigLayers.clear(); + Meta = nullptr; +} - void MetaLayer::MergeConfigMap(const LayerOptions &Options) { - // Insert this layer's options, overlaying previous options that exist here - for (auto &it : Options) { - if (it.first == FEXCore::Config::ConfigOption::CONFIG_ENV || - it.first == FEXCore::Config::ConfigOption::CONFIG_HOSTENV) { - MergeEnvironmentVariables(it.first, it.second); - } - else { - OptionMap.insert_or_assign(it.first, it.second); - } +void Load() { + for (auto CurrentLayer = LoadOrder.begin(); CurrentLayer != LoadOrder.end(); ++CurrentLayer) { + auto it = ConfigLayers.find(*CurrentLayer); + if (it != ConfigLayers.end()) { + it->second->Load(); } } +} - void Initialize() { - AddLayer(fextl::make_unique(FEXCore::Config::LayerType::LAYER_TOP)); - Meta = ConfigLayers.begin()->second.get(); +fextl::string ExpandPath(const fextl::string& ContainerPrefix, fextl::string PathName) { + if (PathName.empty()) { + return {}; } - void Shutdown() { - ConfigLayers.clear(); - Meta = nullptr; - } - void Load() { - for (auto CurrentLayer = LoadOrder.begin(); CurrentLayer != LoadOrder.end(); ++CurrentLayer) { - auto it = ConfigLayers.find(*CurrentLayer); - if (it != ConfigLayers.end()) { - it->second->Load(); - } + // Expand home if it exists + if (FHU::Filesystem::IsRelative(PathName)) { + fextl::string Home = getenv("HOME") ?: ""; + // Home expansion only works if it is the first character + // This matches bash behaviour + if (PathName.at(0) == '~') { + PathName.replace(0, 1, Home); + return PathName; } - } - fextl::string ExpandPath(fextl::string const &ContainerPrefix, fextl::string PathName) { - if (PathName.empty()) { - return {}; + // Expand relative path to absolute + char ExistsTempPath[PATH_MAX]; + char* RealPath = FHU::Filesystem::Absolute(PathName.c_str(), ExistsTempPath); + if (RealPath) { + PathName = RealPath; } - - // Expand home if it exists - if (FHU::Filesystem::IsRelative(PathName)) { - fextl::string Home = getenv("HOME") ?: ""; - // Home expansion only works if it is the first character - // This matches bash behaviour - if (PathName.at(0) == '~') { - PathName.replace(0, 1, Home); - return PathName; - } - - // Expand relative path to absolute - char ExistsTempPath[PATH_MAX]; - char *RealPath = FHU::Filesystem::Absolute(PathName.c_str(), ExistsTempPath); - if (RealPath) { - PathName = RealPath; - } - - // Only return if it exists - if (FHU::Filesystem::Exists(PathName)) { - return PathName; - } + // Only return if it exists + if (FHU::Filesystem::Exists(PathName)) { + return PathName; } - else { - // If the containerprefix and pathname isn't empty - // Then we check if the pathname exists in our current namespace - // If the path DOESN'T exist but DOES exist with the prefix applied - // then redirect to the prefix - // - // This might not be expected behaviour for some edge cases but since - // all paths aren't mounted inside the container, then it'll be fine - // - // Main catch case for this is the default thunk install folders - // HostThunks: $CMAKE_INSTALL_PREFIX/lib/fex-emu/HostThunks/ - // GuestThunks: $CMAKE_INSTALL_PREFIX/share/fex-emu/GuestThunks/ - if (!ContainerPrefix.empty() && !PathName.empty()) { - if (!FHU::Filesystem::Exists(PathName)) { - auto ContainerPath = ContainerPrefix + PathName; - if (FHU::Filesystem::Exists(ContainerPath)) { - return ContainerPath; - } + } else { + // If the containerprefix and pathname isn't empty + // Then we check if the pathname exists in our current namespace + // If the path DOESN'T exist but DOES exist with the prefix applied + // then redirect to the prefix + // + // This might not be expected behaviour for some edge cases but since + // all paths aren't mounted inside the container, then it'll be fine + // + // Main catch case for this is the default thunk install folders + // HostThunks: $CMAKE_INSTALL_PREFIX/lib/fex-emu/HostThunks/ + // GuestThunks: $CMAKE_INSTALL_PREFIX/share/fex-emu/GuestThunks/ + if (!ContainerPrefix.empty() && !PathName.empty()) { + if (!FHU::Filesystem::Exists(PathName)) { + auto ContainerPath = ContainerPrefix + PathName; + if (FHU::Filesystem::Exists(ContainerPath)) { + return ContainerPath; } } } - return {}; } + return {}; +} - constexpr char ContainerManager[] = "/run/host/container-manager"; - - fextl::string FindContainer() { - // We only support pressure-vessel at the moment - if (FHU::Filesystem::Exists(ContainerManager)) { - fextl::vector Manager{}; - if (FEXCore::FileLoading::LoadFile(Manager, ContainerManager)) { - // Trim the whitespace, may contain a newline - fextl::string ManagerStr = Manager.data(); - ManagerStr = FEXCore::StringUtils::Trim(ManagerStr); - return ManagerStr; - } +constexpr char ContainerManager[] = "/run/host/container-manager"; + +fextl::string FindContainer() { + // We only support pressure-vessel at the moment + if (FHU::Filesystem::Exists(ContainerManager)) { + fextl::vector Manager {}; + if (FEXCore::FileLoading::LoadFile(Manager, ContainerManager)) { + // Trim the whitespace, may contain a newline + fextl::string ManagerStr = Manager.data(); + ManagerStr = FEXCore::StringUtils::Trim(ManagerStr); + return ManagerStr; } - return {}; } + return {}; +} - fextl::string FindContainerPrefix() { - // We only support pressure-vessel at the moment - if (FHU::Filesystem::Exists(ContainerManager)) { - fextl::vector Manager{}; - if (FEXCore::FileLoading::LoadFile(Manager, ContainerManager)) { - // Trim the whitespace, may contain a newline - fextl::string ManagerStr = Manager.data(); - ManagerStr = FEXCore::StringUtils::Trim(ManagerStr); - if (strncmp(ManagerStr.data(), "pressure-vessel", Manager.size()) == 0) { - // We are running inside of pressure vessel - // Our $CMAKE_INSTALL_PREFIX paths are now inside of /run/host/$CMAKE_INSTALL_PREFIX - return "/run/host/"; - } +fextl::string FindContainerPrefix() { + // We only support pressure-vessel at the moment + if (FHU::Filesystem::Exists(ContainerManager)) { + fextl::vector Manager {}; + if (FEXCore::FileLoading::LoadFile(Manager, ContainerManager)) { + // Trim the whitespace, may contain a newline + fextl::string ManagerStr = Manager.data(); + ManagerStr = FEXCore::StringUtils::Trim(ManagerStr); + if (strncmp(ManagerStr.data(), "pressure-vessel", Manager.size()) == 0) { + // We are running inside of pressure vessel + // Our $CMAKE_INSTALL_PREFIX paths are now inside of /run/host/$CMAKE_INSTALL_PREFIX + return "/run/host/"; } } - return {}; } + return {}; +} - void ReloadMetaLayer() { - Meta->Load(); +void ReloadMetaLayer() { + Meta->Load(); - // Do configuration option fix ups after everything is reloaded - if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_CORE)) { - // Sanitize Core option - FEX_CONFIG_OPT(Core, CORE); + // Do configuration option fix ups after everything is reloaded + if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_CORE)) { + // Sanitize Core option + FEX_CONFIG_OPT(Core, CORE); #if (_M_X86_64) - constexpr uint32_t MaxCoreNumber = 1; + constexpr uint32_t MaxCoreNumber = 1; #else - constexpr uint32_t MaxCoreNumber = 0; + constexpr uint32_t MaxCoreNumber = 0; #endif - if (Core > MaxCoreNumber) { - // Sanitize the core option by setting the core to the JIT if invalid - FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_CORE, fextl::fmt::format("{}", static_cast(FEXCore::Config::CONFIG_IRJIT))); - } + if (Core > MaxCoreNumber) { + // Sanitize the core option by setting the core to the JIT if invalid + FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_CORE, fextl::fmt::format("{}", static_cast(FEXCore::Config::CONFIG_IRJIT))); } + } - if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_CACHEOBJECTCODECOMPILATION)) { - FEX_CONFIG_OPT(CacheObjectCodeCompilation, CACHEOBJECTCODECOMPILATION); - FEX_CONFIG_OPT(Core, CORE); - } + if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_CACHEOBJECTCODECOMPILATION)) { + FEX_CONFIG_OPT(CacheObjectCodeCompilation, CACHEOBJECTCODECOMPILATION); + FEX_CONFIG_OPT(Core, CORE); + } - fextl::string ContainerPrefix { FindContainerPrefix() }; - auto ExpandPathIfExists = [&ContainerPrefix](FEXCore::Config::ConfigOption Config, fextl::string PathName) { - auto NewPath = ExpandPath(ContainerPrefix, PathName); - if (!NewPath.empty()) { - FEXCore::Config::EraseSet(Config, NewPath); - } - }; - - if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_ROOTFS)) { - FEX_CONFIG_OPT(PathName, ROOTFS); - auto ExpandedString = ExpandPath(ContainerPrefix,PathName()); - if (!ExpandedString.empty()) { - // Adjust the path if it ended up being relative - FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_ROOTFS, ExpandedString); - } - else if (!PathName().empty()) { - // If the filesystem doesn't exist then let's see if it exists in the fex-emu folder - fextl::string NamedRootFS = GetDataDirectory() + "RootFS/" + PathName(); - if (FHU::Filesystem::Exists(NamedRootFS)) { - FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_ROOTFS, NamedRootFS); - } - } - } - if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_THUNKHOSTLIBS)) { - FEX_CONFIG_OPT(PathName, THUNKHOSTLIBS); - ExpandPathIfExists(FEXCore::Config::CONFIG_THUNKHOSTLIBS, PathName()); + fextl::string ContainerPrefix {FindContainerPrefix()}; + auto ExpandPathIfExists = [&ContainerPrefix](FEXCore::Config::ConfigOption Config, fextl::string PathName) { + auto NewPath = ExpandPath(ContainerPrefix, PathName); + if (!NewPath.empty()) { + FEXCore::Config::EraseSet(Config, NewPath); } - if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_THUNKGUESTLIBS)) { - FEX_CONFIG_OPT(PathName, THUNKGUESTLIBS); - ExpandPathIfExists(FEXCore::Config::CONFIG_THUNKGUESTLIBS, PathName()); - } - if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_THUNKCONFIG)) { - FEX_CONFIG_OPT(PathName, THUNKCONFIG); - auto ExpandedString = ExpandPath(ContainerPrefix, PathName()); - if (!ExpandedString.empty()) { - // Adjust the path if it ended up being relative - FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_THUNKCONFIG, ExpandedString); - } - else if (!PathName().empty()) { - // If the filesystem doesn't exist then let's see if it exists in the fex-emu folder - fextl::string NamedConfig = GetDataDirectory() + "ThunkConfigs/" + PathName(); - if (FHU::Filesystem::Exists(NamedConfig)) { - FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_THUNKCONFIG, NamedConfig); - } + }; + + if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_ROOTFS)) { + FEX_CONFIG_OPT(PathName, ROOTFS); + auto ExpandedString = ExpandPath(ContainerPrefix, PathName()); + if (!ExpandedString.empty()) { + // Adjust the path if it ended up being relative + FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_ROOTFS, ExpandedString); + } else if (!PathName().empty()) { + // If the filesystem doesn't exist then let's see if it exists in the fex-emu folder + fextl::string NamedRootFS = GetDataDirectory() + "RootFS/" + PathName(); + if (FHU::Filesystem::Exists(NamedRootFS)) { + FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_ROOTFS, NamedRootFS); } } - if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_OUTPUTLOG)) { - FEX_CONFIG_OPT(PathName, OUTPUTLOG); - if (PathName() != "stdout" && PathName() != "stderr" && PathName() != "server") { - ExpandPathIfExists(FEXCore::Config::CONFIG_OUTPUTLOG, PathName()); + } + if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_THUNKHOSTLIBS)) { + FEX_CONFIG_OPT(PathName, THUNKHOSTLIBS); + ExpandPathIfExists(FEXCore::Config::CONFIG_THUNKHOSTLIBS, PathName()); + } + if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_THUNKGUESTLIBS)) { + FEX_CONFIG_OPT(PathName, THUNKGUESTLIBS); + ExpandPathIfExists(FEXCore::Config::CONFIG_THUNKGUESTLIBS, PathName()); + } + if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_THUNKCONFIG)) { + FEX_CONFIG_OPT(PathName, THUNKCONFIG); + auto ExpandedString = ExpandPath(ContainerPrefix, PathName()); + if (!ExpandedString.empty()) { + // Adjust the path if it ended up being relative + FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_THUNKCONFIG, ExpandedString); + } else if (!PathName().empty()) { + // If the filesystem doesn't exist then let's see if it exists in the fex-emu folder + fextl::string NamedConfig = GetDataDirectory() + "ThunkConfigs/" + PathName(); + if (FHU::Filesystem::Exists(NamedConfig)) { + FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_THUNKCONFIG, NamedConfig); } } - - if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_DUMPIR) && - !FEXCore::Config::Exists(FEXCore::Config::CONFIG_PASSMANAGERDUMPIR)) { - // If DumpIR is set but no PassManagerDumpIR configuration is set, then default to `afteropt` - FEX_CONFIG_OPT(PathName, DUMPIR); - if (PathName() != "no") { - EraseSet(FEXCore::Config::ConfigOption::CONFIG_PASSMANAGERDUMPIR, fextl::fmt::format("{}", static_cast(FEXCore::Config::PassManagerDumpIR::AFTEROPT))); - } + } + if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_OUTPUTLOG)) { + FEX_CONFIG_OPT(PathName, OUTPUTLOG); + if (PathName() != "stdout" && PathName() != "stderr" && PathName() != "server") { + ExpandPathIfExists(FEXCore::Config::CONFIG_OUTPUTLOG, PathName()); } + } - if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_SINGLESTEP)) { - // Single stepping also enforces single instruction size blocks - Set(FEXCore::Config::ConfigOption::CONFIG_MAXINST, "1"); + if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_DUMPIR) && !FEXCore::Config::Exists(FEXCore::Config::CONFIG_PASSMANAGERDUMPIR)) { + // If DumpIR is set but no PassManagerDumpIR configuration is set, then default to `afteropt` + FEX_CONFIG_OPT(PathName, DUMPIR); + if (PathName() != "no") { + EraseSet(FEXCore::Config::ConfigOption::CONFIG_PASSMANAGERDUMPIR, + fextl::fmt::format("{}", static_cast(FEXCore::Config::PassManagerDumpIR::AFTEROPT))); } } - void AddLayer(fextl::unique_ptr _Layer) { - ConfigLayers.emplace(_Layer->GetLayerType(), std::move(_Layer)); + if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_SINGLESTEP)) { + // Single stepping also enforces single instruction size blocks + Set(FEXCore::Config::ConfigOption::CONFIG_MAXINST, "1"); } +} - bool Exists(ConfigOption Option) { - return Meta->OptionExists(Option); - } +void AddLayer(fextl::unique_ptr _Layer) { + ConfigLayers.emplace(_Layer->GetLayerType(), std::move(_Layer)); +} - std::optional All(ConfigOption Option) { - return Meta->All(Option); - } +bool Exists(ConfigOption Option) { + return Meta->OptionExists(Option); +} - std::optional Get(ConfigOption Option) { - return Meta->Get(Option); - } +std::optional All(ConfigOption Option) { + return Meta->All(Option); +} - void Set(ConfigOption Option, std::string_view Data) { - Meta->Set(Option, Data); - } +std::optional Get(ConfigOption Option) { + return Meta->Get(Option); +} - void Erase(ConfigOption Option) { - Meta->Erase(Option); - } +void Set(ConfigOption Option, std::string_view Data) { + Meta->Set(Option, Data); +} - void EraseSet(ConfigOption Option, std::string_view Data) { - Meta->EraseSet(Option, Data); - } +void Erase(ConfigOption Option) { + Meta->Erase(Option); +} - template - T Value::Get(FEXCore::Config::ConfigOption Option) { - T Result; - auto Value = FEXCore::Config::Get(Option); +void EraseSet(ConfigOption Option, std::string_view Data) { + Meta->EraseSet(Option, Data); +} - if (!FEXCore::StrConv::Conv(**Value, &Result)) { - LOGMAN_MSG_A_FMT("Attempted to convert invalid value"); - } - return Result; +template +T Value::Get(FEXCore::Config::ConfigOption Option) { + T Result; + auto Value = FEXCore::Config::Get(Option); + + if (!FEXCore::StrConv::Conv(**Value, &Result)) { + LOGMAN_MSG_A_FMT("Attempted to convert invalid value"); } + return Result; +} - template - T Value::GetIfExists(FEXCore::Config::ConfigOption Option, T Default) { - T Result; - auto Value = FEXCore::Config::Get(Option); +template +T Value::GetIfExists(FEXCore::Config::ConfigOption Option, T Default) { + T Result; + auto Value = FEXCore::Config::Get(Option); - if (Value && FEXCore::StrConv::Conv(**Value, &Result)) { - return Result; - } - else { - return Default; - } + if (Value && FEXCore::StrConv::Conv(**Value, &Result)) { + return Result; + } else { + return Default; } +} - template<> - fextl::string Value::GetIfExists(FEXCore::Config::ConfigOption Option, fextl::string Default) { - auto Value = FEXCore::Config::Get(Option); - if (Value) { - return **Value; - } - else { - return Default; - } +template<> +fextl::string Value::GetIfExists(FEXCore::Config::ConfigOption Option, fextl::string Default) { + auto Value = FEXCore::Config::Get(Option); + if (Value) { + return **Value; + } else { + return Default; } +} - template<> - fextl::string Value::GetIfExists(FEXCore::Config::ConfigOption Option, std::string_view Default) { - auto Value = FEXCore::Config::Get(Option); - if (Value) { - return **Value; - } - else { - return fextl::string(Default); - } +template<> +fextl::string Value::GetIfExists(FEXCore::Config::ConfigOption Option, std::string_view Default) { + auto Value = FEXCore::Config::Get(Option); + if (Value) { + return **Value; + } else { + return fextl::string(Default); } +} - template bool Value::GetIfExists(FEXCore::Config::ConfigOption Option, bool Default); - template int8_t Value::GetIfExists(FEXCore::Config::ConfigOption Option, int8_t Default); - template uint8_t Value::GetIfExists(FEXCore::Config::ConfigOption Option, uint8_t Default); - template int16_t Value::GetIfExists(FEXCore::Config::ConfigOption Option, int16_t Default); - template uint16_t Value::GetIfExists(FEXCore::Config::ConfigOption Option, uint16_t Default); - template int32_t Value::GetIfExists(FEXCore::Config::ConfigOption Option, int32_t Default); - template uint32_t Value::GetIfExists(FEXCore::Config::ConfigOption Option, uint32_t Default); - template int64_t Value::GetIfExists(FEXCore::Config::ConfigOption Option, int64_t Default); - template uint64_t Value::GetIfExists(FEXCore::Config::ConfigOption Option, uint64_t Default); - - // Constructor - template Value::Value(FEXCore::Config::ConfigOption _Option, fextl::string Default); - template Value::Value(FEXCore::Config::ConfigOption _Option, bool Default); - template Value::Value(FEXCore::Config::ConfigOption _Option, uint8_t Default); - template Value::Value(FEXCore::Config::ConfigOption _Option, uint64_t Default); - - template - void Value::GetListIfExists(FEXCore::Config::ConfigOption Option, fextl::list *List) { - auto Value = FEXCore::Config::All(Option); - List->clear(); - if (Value) { - *List = **Value; - } +template bool Value::GetIfExists(FEXCore::Config::ConfigOption Option, bool Default); +template int8_t Value::GetIfExists(FEXCore::Config::ConfigOption Option, int8_t Default); +template uint8_t Value::GetIfExists(FEXCore::Config::ConfigOption Option, uint8_t Default); +template int16_t Value::GetIfExists(FEXCore::Config::ConfigOption Option, int16_t Default); +template uint16_t Value::GetIfExists(FEXCore::Config::ConfigOption Option, uint16_t Default); +template int32_t Value::GetIfExists(FEXCore::Config::ConfigOption Option, int32_t Default); +template uint32_t Value::GetIfExists(FEXCore::Config::ConfigOption Option, uint32_t Default); +template int64_t Value::GetIfExists(FEXCore::Config::ConfigOption Option, int64_t Default); +template uint64_t Value::GetIfExists(FEXCore::Config::ConfigOption Option, uint64_t Default); + +// Constructor +template Value::Value(FEXCore::Config::ConfigOption _Option, fextl::string Default); +template Value::Value(FEXCore::Config::ConfigOption _Option, bool Default); +template Value::Value(FEXCore::Config::ConfigOption _Option, uint8_t Default); +template Value::Value(FEXCore::Config::ConfigOption _Option, uint64_t Default); + +template +void Value::GetListIfExists(FEXCore::Config::ConfigOption Option, fextl::list* List) { + auto Value = FEXCore::Config::All(Option); + List->clear(); + if (Value) { + *List = **Value; } - template void Value::GetListIfExists(FEXCore::Config::ConfigOption Option, fextl::list *List); } - +template void Value::GetListIfExists(FEXCore::Config::ConfigOption Option, fextl::list* List); +} // namespace FEXCore::Config diff --git a/FEXCore/Source/Interface/Context/Context.cpp b/FEXCore/Source/Interface/Context/Context.cpp index 05d1d8e5d1..33885d42ea 100644 --- a/FEXCore/Source/Interface/Context/Context.cpp +++ b/FEXCore/Source/Interface/Context/Context.cpp @@ -13,57 +13,57 @@ #include namespace FEXCore::Context { - void InitializeStaticTables(OperatingMode Mode) { - X86Tables::InitializeInfoTables(Mode); - IR::InstallOpcodeHandlers(Mode); - } +void InitializeStaticTables(OperatingMode Mode) { + X86Tables::InitializeInfoTables(Mode); + IR::InstallOpcodeHandlers(Mode); +} - fextl::unique_ptr FEXCore::Context::Context::CreateNewContext() { - return fextl::make_unique(); - } +fextl::unique_ptr FEXCore::Context::Context::CreateNewContext() { + return fextl::make_unique(); +} - void FEXCore::Context::ContextImpl::SetExitHandler(ExitHandler handler) { - CustomExitHandler = std::move(handler); - } +void FEXCore::Context::ContextImpl::SetExitHandler(ExitHandler handler) { + CustomExitHandler = std::move(handler); +} - ExitHandler FEXCore::Context::ContextImpl::GetExitHandler() const { - return CustomExitHandler; - } +ExitHandler FEXCore::Context::ContextImpl::GetExitHandler() const { + return CustomExitHandler; +} - void FEXCore::Context::ContextImpl::CompileRIP(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP) { - CompileBlock(Thread->CurrentFrame, GuestRIP); - } +void FEXCore::Context::ContextImpl::CompileRIP(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestRIP) { + CompileBlock(Thread->CurrentFrame, GuestRIP); +} - void FEXCore::Context::ContextImpl::CompileRIPCount(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP, uint64_t MaxInst) { - CompileBlock(Thread->CurrentFrame, GuestRIP, MaxInst); - } +void FEXCore::Context::ContextImpl::CompileRIPCount(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestRIP, uint64_t MaxInst) { + CompileBlock(Thread->CurrentFrame, GuestRIP, MaxInst); +} - void FEXCore::Context::ContextImpl::SetCustomCPUBackendFactory(CustomCPUFactoryType Factory) { - CustomCPUFactory = std::move(Factory); - } +void FEXCore::Context::ContextImpl::SetCustomCPUBackendFactory(CustomCPUFactoryType Factory) { + CustomCPUFactory = std::move(Factory); +} - HostFeatures FEXCore::Context::ContextImpl::GetHostFeatures() const { - return HostFeatures; - } +HostFeatures FEXCore::Context::ContextImpl::GetHostFeatures() const { + return HostFeatures; +} - void FEXCore::Context::ContextImpl::SetSignalDelegator(FEXCore::SignalDelegator *_SignalDelegation) { - SignalDelegation = _SignalDelegation; - } +void FEXCore::Context::ContextImpl::SetSignalDelegator(FEXCore::SignalDelegator* _SignalDelegation) { + SignalDelegation = _SignalDelegation; +} - void FEXCore::Context::ContextImpl::SetSyscallHandler(FEXCore::HLE::SyscallHandler *Handler) { - SyscallHandler = Handler; - SourcecodeResolver = Handler->GetSourcecodeResolver(); - } +void FEXCore::Context::ContextImpl::SetSyscallHandler(FEXCore::HLE::SyscallHandler* Handler) { + SyscallHandler = Handler; + SourcecodeResolver = Handler->GetSourcecodeResolver(); +} - FEXCore::CPUID::FunctionResults FEXCore::Context::ContextImpl::RunCPUIDFunction(uint32_t Function, uint32_t Leaf) { - return CPUID.RunFunction(Function, Leaf); - } +FEXCore::CPUID::FunctionResults FEXCore::Context::ContextImpl::RunCPUIDFunction(uint32_t Function, uint32_t Leaf) { + return CPUID.RunFunction(Function, Leaf); +} - FEXCore::CPUID::XCRResults FEXCore::Context::ContextImpl::RunXCRFunction(uint32_t Function) { - return CPUID.RunXCRFunction(Function); - } +FEXCore::CPUID::XCRResults FEXCore::Context::ContextImpl::RunXCRFunction(uint32_t Function) { + return CPUID.RunXCRFunction(Function); +} - FEXCore::CPUID::FunctionResults FEXCore::Context::ContextImpl::RunCPUIDFunctionName(uint32_t Function, uint32_t Leaf, uint32_t CPU) { - return CPUID.RunFunctionName(Function, Leaf, CPU); - } +FEXCore::CPUID::FunctionResults FEXCore::Context::ContextImpl::RunCPUIDFunctionName(uint32_t Function, uint32_t Leaf, uint32_t CPU) { + return CPUID.RunFunctionName(Function, Leaf, CPU); } +} // namespace FEXCore::Context diff --git a/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.cpp b/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.cpp index 6687e97fc7..a42455d513 100644 --- a/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.cpp +++ b/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.cpp @@ -31,24 +31,31 @@ namespace FEXCore::CPU { namespace x64 { // All but x19 and x29 are caller saved constexpr std::array SRA = { - FEXCore::ARMEmitter::Reg::r4, FEXCore::ARMEmitter::Reg::r5, - FEXCore::ARMEmitter::Reg::r6, FEXCore::ARMEmitter::Reg::r7, - FEXCore::ARMEmitter::Reg::r8, FEXCore::ARMEmitter::Reg::r9, - FEXCore::ARMEmitter::Reg::r10, FEXCore::ARMEmitter::Reg::r11, - FEXCore::ARMEmitter::Reg::r12, FEXCore::ARMEmitter::Reg::r13, - FEXCore::ARMEmitter::Reg::r14, FEXCore::ARMEmitter::Reg::r15, - FEXCore::ARMEmitter::Reg::r16, FEXCore::ARMEmitter::Reg::r17, - FEXCore::ARMEmitter::Reg::r19, FEXCore::ARMEmitter::Reg::r29, + FEXCore::ARMEmitter::Reg::r4, + FEXCore::ARMEmitter::Reg::r5, + FEXCore::ARMEmitter::Reg::r6, + FEXCore::ARMEmitter::Reg::r7, + FEXCore::ARMEmitter::Reg::r8, + FEXCore::ARMEmitter::Reg::r9, + FEXCore::ARMEmitter::Reg::r10, + FEXCore::ARMEmitter::Reg::r11, + FEXCore::ARMEmitter::Reg::r12, + FEXCore::ARMEmitter::Reg::r13, + FEXCore::ARMEmitter::Reg::r14, + FEXCore::ARMEmitter::Reg::r15, + FEXCore::ARMEmitter::Reg::r16, + FEXCore::ARMEmitter::Reg::r17, + FEXCore::ARMEmitter::Reg::r19, + FEXCore::ARMEmitter::Reg::r29, // PF/AF must be last. - REG_PF, REG_AF, + REG_PF, + REG_AF, }; constexpr std::array RA = { // All these callee saved - FEXCore::ARMEmitter::Reg::r20, FEXCore::ARMEmitter::Reg::r21, - FEXCore::ARMEmitter::Reg::r22, FEXCore::ARMEmitter::Reg::r23, - FEXCore::ARMEmitter::Reg::r24, FEXCore::ARMEmitter::Reg::r25, - FEXCore::ARMEmitter::Reg::r30, + FEXCore::ARMEmitter::Reg::r20, FEXCore::ARMEmitter::Reg::r21, FEXCore::ARMEmitter::Reg::r22, FEXCore::ARMEmitter::Reg::r23, + FEXCore::ARMEmitter::Reg::r24, FEXCore::ARMEmitter::Reg::r25, FEXCore::ARMEmitter::Reg::r30, }; constexpr std::array, 3> RAPair = {{ @@ -59,145 +66,134 @@ namespace x64 { // All are caller saved constexpr std::array SRAFPR = { - FEXCore::ARMEmitter::VReg::v16, FEXCore::ARMEmitter::VReg::v17, - FEXCore::ARMEmitter::VReg::v18, FEXCore::ARMEmitter::VReg::v19, - FEXCore::ARMEmitter::VReg::v20, FEXCore::ARMEmitter::VReg::v21, - FEXCore::ARMEmitter::VReg::v22, FEXCore::ARMEmitter::VReg::v23, - FEXCore::ARMEmitter::VReg::v24, FEXCore::ARMEmitter::VReg::v25, - FEXCore::ARMEmitter::VReg::v26, FEXCore::ARMEmitter::VReg::v27, - FEXCore::ARMEmitter::VReg::v28, FEXCore::ARMEmitter::VReg::v29, - FEXCore::ARMEmitter::VReg::v30, FEXCore::ARMEmitter::VReg::v31 - }; + FEXCore::ARMEmitter::VReg::v16, FEXCore::ARMEmitter::VReg::v17, FEXCore::ARMEmitter::VReg::v18, FEXCore::ARMEmitter::VReg::v19, + FEXCore::ARMEmitter::VReg::v20, FEXCore::ARMEmitter::VReg::v21, FEXCore::ARMEmitter::VReg::v22, FEXCore::ARMEmitter::VReg::v23, + FEXCore::ARMEmitter::VReg::v24, FEXCore::ARMEmitter::VReg::v25, FEXCore::ARMEmitter::VReg::v26, FEXCore::ARMEmitter::VReg::v27, + FEXCore::ARMEmitter::VReg::v28, FEXCore::ARMEmitter::VReg::v29, FEXCore::ARMEmitter::VReg::v30, FEXCore::ARMEmitter::VReg::v31}; // v8..v15 = (lower 64bits) Callee saved constexpr std::array RAFPR = { // v0 ~ v1 are used as temps. // FEXCore::ARMEmitter::VReg::v0, FEXCore::ARMEmitter::VReg::v1, - FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, - FEXCore::ARMEmitter::VReg::v4, FEXCore::ARMEmitter::VReg::v5, - FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7, - FEXCore::ARMEmitter::VReg::v8, FEXCore::ARMEmitter::VReg::v9, - FEXCore::ARMEmitter::VReg::v10, FEXCore::ARMEmitter::VReg::v11, - FEXCore::ARMEmitter::VReg::v12, FEXCore::ARMEmitter::VReg::v13, + FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, FEXCore::ARMEmitter::VReg::v4, FEXCore::ARMEmitter::VReg::v5, + FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7, FEXCore::ARMEmitter::VReg::v8, FEXCore::ARMEmitter::VReg::v9, + FEXCore::ARMEmitter::VReg::v10, FEXCore::ARMEmitter::VReg::v11, FEXCore::ARMEmitter::VReg::v12, FEXCore::ARMEmitter::VReg::v13, FEXCore::ARMEmitter::VReg::v14, FEXCore::ARMEmitter::VReg::v15, }; // I wish this could get constexpr generated from SRA's definition but impossible until libstdc++12, libc++15. // SRA GPRs that need to be spilled when calling a function with `preserve_all` ABI. constexpr std::array PreserveAll_SRA = { - FEXCore::ARMEmitter::Reg::r4, FEXCore::ARMEmitter::Reg::r5, - FEXCore::ARMEmitter::Reg::r6, FEXCore::ARMEmitter::Reg::r7, - FEXCore::ARMEmitter::Reg::r8, - FEXCore::ARMEmitter::Reg::r16, FEXCore::ARMEmitter::Reg::r17, + FEXCore::ARMEmitter::Reg::r4, FEXCore::ARMEmitter::Reg::r5, FEXCore::ARMEmitter::Reg::r6, FEXCore::ARMEmitter::Reg::r7, + FEXCore::ARMEmitter::Reg::r8, FEXCore::ARMEmitter::Reg::r16, FEXCore::ARMEmitter::Reg::r17, }; - constexpr uint32_t PreserveAll_SRAMask = { - []() -> uint32_t { - uint32_t Mask{}; - for (auto Reg : PreserveAll_SRA) { - switch (Reg.Idx()) { - case 0: - case 1: - case 2: - case 3: - case 4: - case 5: - case 6: - case 7: - case 8: - case 16: - case 17: - Mask |= (1U << Reg.Idx()); - break; - default: break; - } + constexpr uint32_t PreserveAll_SRAMask = {[]() -> uint32_t { + uint32_t Mask {}; + for (auto Reg : PreserveAll_SRA) { + switch (Reg.Idx()) { + case 0: + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + case 16: + case 17: Mask |= (1U << Reg.Idx()); break; + default: break; } + } - return Mask; - }() - }; + return Mask; + }()}; // Dynamic GPRs constexpr std::array PreserveAll_Dynamic = { // Only LR needs to get saved. - FEXCore::ARMEmitter::Reg::r30 - }; + FEXCore::ARMEmitter::Reg::r30}; // SRA FPRs that need to be spilled when calling a function with `preserve_all` ABI. constexpr std::array PreserveAll_SRAFPR = { // None. }; - constexpr uint32_t PreserveAll_SRAFPRMask = { - []() -> uint32_t { - uint32_t Mask{}; - for (auto Reg : PreserveAll_SRAFPR) { - Mask |= (1U << Reg.Idx()); - } - return Mask; - }() - }; + constexpr uint32_t PreserveAll_SRAFPRMask = {[]() -> uint32_t { + uint32_t Mask {}; + for (auto Reg : PreserveAll_SRAFPR) { + Mask |= (1U << Reg.Idx()); + } + return Mask; + }()}; // Dynamic FPRs // - v0-v7 constexpr std::array PreserveAll_DynamicFPR = { // v0 ~ v1 are temps - FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, - FEXCore::ARMEmitter::VReg::v4, FEXCore::ARMEmitter::VReg::v5, - FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7, + FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, FEXCore::ARMEmitter::VReg::v4, + FEXCore::ARMEmitter::VReg::v5, FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7, }; // SRA FPRs that need to be spilled when the host supports SVE-256bit with `preserve_all` ABI. // This is /all/ of the SRA registers constexpr std::array PreserveAll_SRAFPRSVE = SRAFPR; - constexpr uint32_t PreserveAll_SRAFPRSVEMask = { - []() -> uint32_t { - uint32_t Mask{}; - for (auto Reg : PreserveAll_SRAFPRSVE) { - Mask |= (1U << Reg.Idx()); - } - return Mask; - }() - }; + constexpr uint32_t PreserveAll_SRAFPRSVEMask = {[]() -> uint32_t { + uint32_t Mask {}; + for (auto Reg : PreserveAll_SRAFPRSVE) { + Mask |= (1U << Reg.Idx()); + } + return Mask; + }()}; // Dynamic FPRs when the host supports SVE-256bit. constexpr std::array PreserveAll_DynamicFPRSVE = { // v0 ~ v1 are used as temps. - FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, - FEXCore::ARMEmitter::VReg::v4, FEXCore::ARMEmitter::VReg::v5, - FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7, - FEXCore::ARMEmitter::VReg::v8, FEXCore::ARMEmitter::VReg::v9, - FEXCore::ARMEmitter::VReg::v10, FEXCore::ARMEmitter::VReg::v11, - FEXCore::ARMEmitter::VReg::v12, FEXCore::ARMEmitter::VReg::v13, + FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, FEXCore::ARMEmitter::VReg::v4, FEXCore::ARMEmitter::VReg::v5, + FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7, FEXCore::ARMEmitter::VReg::v8, FEXCore::ARMEmitter::VReg::v9, + FEXCore::ARMEmitter::VReg::v10, FEXCore::ARMEmitter::VReg::v11, FEXCore::ARMEmitter::VReg::v12, FEXCore::ARMEmitter::VReg::v13, FEXCore::ARMEmitter::VReg::v14, FEXCore::ARMEmitter::VReg::v15, }; -} +} // namespace x64 namespace x32 { // All but x19 and x29 are caller saved constexpr std::array SRA = { - FEXCore::ARMEmitter::Reg::r4, FEXCore::ARMEmitter::Reg::r5, - FEXCore::ARMEmitter::Reg::r6, FEXCore::ARMEmitter::Reg::r7, - FEXCore::ARMEmitter::Reg::r8, FEXCore::ARMEmitter::Reg::r9, - FEXCore::ARMEmitter::Reg::r10, FEXCore::ARMEmitter::Reg::r11, + FEXCore::ARMEmitter::Reg::r4, + FEXCore::ARMEmitter::Reg::r5, + FEXCore::ARMEmitter::Reg::r6, + FEXCore::ARMEmitter::Reg::r7, + FEXCore::ARMEmitter::Reg::r8, + FEXCore::ARMEmitter::Reg::r9, + FEXCore::ARMEmitter::Reg::r10, + FEXCore::ARMEmitter::Reg::r11, // PF/AF must be last. - REG_PF, REG_AF, + REG_PF, + REG_AF, }; constexpr std::array RA = { // All these callee saved - FEXCore::ARMEmitter::Reg::r20, FEXCore::ARMEmitter::Reg::r21, - FEXCore::ARMEmitter::Reg::r22, FEXCore::ARMEmitter::Reg::r23, - FEXCore::ARMEmitter::Reg::r24, FEXCore::ARMEmitter::Reg::r25, + FEXCore::ARMEmitter::Reg::r20, + FEXCore::ARMEmitter::Reg::r21, + FEXCore::ARMEmitter::Reg::r22, + FEXCore::ARMEmitter::Reg::r23, + FEXCore::ARMEmitter::Reg::r24, + FEXCore::ARMEmitter::Reg::r25, // Registers only available on 32-bit // All these are caller saved (except for r19). - FEXCore::ARMEmitter::Reg::r12, FEXCore::ARMEmitter::Reg::r13, - FEXCore::ARMEmitter::Reg::r14, FEXCore::ARMEmitter::Reg::r15, - FEXCore::ARMEmitter::Reg::r16, FEXCore::ARMEmitter::Reg::r17, - FEXCore::ARMEmitter::Reg::r29, FEXCore::ARMEmitter::Reg::r30, + FEXCore::ARMEmitter::Reg::r12, + FEXCore::ARMEmitter::Reg::r13, + FEXCore::ARMEmitter::Reg::r14, + FEXCore::ARMEmitter::Reg::r15, + FEXCore::ARMEmitter::Reg::r16, + FEXCore::ARMEmitter::Reg::r17, + FEXCore::ARMEmitter::Reg::r29, + FEXCore::ARMEmitter::Reg::r30, FEXCore::ARMEmitter::Reg::r19, }; @@ -215,10 +211,8 @@ namespace x32 { // All are caller saved constexpr std::array SRAFPR = { - FEXCore::ARMEmitter::VReg::v16, FEXCore::ARMEmitter::VReg::v17, - FEXCore::ARMEmitter::VReg::v18, FEXCore::ARMEmitter::VReg::v19, - FEXCore::ARMEmitter::VReg::v20, FEXCore::ARMEmitter::VReg::v21, - FEXCore::ARMEmitter::VReg::v22, FEXCore::ARMEmitter::VReg::v23, + FEXCore::ARMEmitter::VReg::v16, FEXCore::ARMEmitter::VReg::v17, FEXCore::ARMEmitter::VReg::v18, FEXCore::ARMEmitter::VReg::v19, + FEXCore::ARMEmitter::VReg::v20, FEXCore::ARMEmitter::VReg::v21, FEXCore::ARMEmitter::VReg::v22, FEXCore::ARMEmitter::VReg::v23, }; // v8..v15 = (lower 64bits) Callee saved @@ -226,118 +220,94 @@ namespace x32 { // v0 ~ v1 are used as temps. // FEXCore::ARMEmitter::VReg::v0, FEXCore::ARMEmitter::VReg::v1, - FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, - FEXCore::ARMEmitter::VReg::v4, FEXCore::ARMEmitter::VReg::v5, - FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7, - FEXCore::ARMEmitter::VReg::v8, FEXCore::ARMEmitter::VReg::v9, - FEXCore::ARMEmitter::VReg::v10, FEXCore::ARMEmitter::VReg::v11, - FEXCore::ARMEmitter::VReg::v12, FEXCore::ARMEmitter::VReg::v13, + FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, FEXCore::ARMEmitter::VReg::v4, FEXCore::ARMEmitter::VReg::v5, + FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7, FEXCore::ARMEmitter::VReg::v8, FEXCore::ARMEmitter::VReg::v9, + FEXCore::ARMEmitter::VReg::v10, FEXCore::ARMEmitter::VReg::v11, FEXCore::ARMEmitter::VReg::v12, FEXCore::ARMEmitter::VReg::v13, FEXCore::ARMEmitter::VReg::v14, FEXCore::ARMEmitter::VReg::v15, - FEXCore::ARMEmitter::VReg::v24, FEXCore::ARMEmitter::VReg::v25, - FEXCore::ARMEmitter::VReg::v26, FEXCore::ARMEmitter::VReg::v27, - FEXCore::ARMEmitter::VReg::v28, FEXCore::ARMEmitter::VReg::v29, - FEXCore::ARMEmitter::VReg::v30, FEXCore::ARMEmitter::VReg::v31 - }; + FEXCore::ARMEmitter::VReg::v24, FEXCore::ARMEmitter::VReg::v25, FEXCore::ARMEmitter::VReg::v26, FEXCore::ARMEmitter::VReg::v27, + FEXCore::ARMEmitter::VReg::v28, FEXCore::ARMEmitter::VReg::v29, FEXCore::ARMEmitter::VReg::v30, FEXCore::ARMEmitter::VReg::v31}; // I wish this could get constexpr generated from SRA's definition but impossible until libstdc++12, libc++15. // SRA GPRs that need to be spilled when calling a function with `preserve_all` ABI. constexpr std::array PreserveAll_SRA = { - FEXCore::ARMEmitter::Reg::r4, FEXCore::ARMEmitter::Reg::r5, - FEXCore::ARMEmitter::Reg::r6, FEXCore::ARMEmitter::Reg::r7, - FEXCore::ARMEmitter::Reg::r8, + FEXCore::ARMEmitter::Reg::r4, FEXCore::ARMEmitter::Reg::r5, FEXCore::ARMEmitter::Reg::r6, + FEXCore::ARMEmitter::Reg::r7, FEXCore::ARMEmitter::Reg::r8, }; - constexpr uint32_t PreserveAll_SRAMask = { - []() -> uint32_t { - uint32_t Mask{}; - for (auto Reg : PreserveAll_SRA) { - switch (Reg.Idx()) { - case 0: - case 1: - case 2: - case 3: - case 4: - case 5: - case 6: - case 7: - case 8: - case 16: - case 17: - Mask |= (1U << Reg.Idx()); - break; - default: break; - } + constexpr uint32_t PreserveAll_SRAMask = {[]() -> uint32_t { + uint32_t Mask {}; + for (auto Reg : PreserveAll_SRA) { + switch (Reg.Idx()) { + case 0: + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + case 16: + case 17: Mask |= (1U << Reg.Idx()); break; + default: break; } + } - return Mask; - }() - }; + return Mask; + }()}; // Dynamic GPRs constexpr std::array PreserveAll_Dynamic = { - FEXCore::ARMEmitter::Reg::r16, FEXCore::ARMEmitter::Reg::r17, - FEXCore::ARMEmitter::Reg::r30 - }; + FEXCore::ARMEmitter::Reg::r16, FEXCore::ARMEmitter::Reg::r17, FEXCore::ARMEmitter::Reg::r30}; // SRA FPRs that need to be spilled when calling a function with `preserve_all` ABI. constexpr std::array PreserveAll_SRAFPR = { // None. }; - constexpr uint32_t PreserveAll_SRAFPRMask = { - []() -> uint32_t { - uint32_t Mask{}; - for (auto Reg : PreserveAll_SRAFPR) { - Mask |= (1U << Reg.Idx()); - } - return Mask; - }() - }; + constexpr uint32_t PreserveAll_SRAFPRMask = {[]() -> uint32_t { + uint32_t Mask {}; + for (auto Reg : PreserveAll_SRAFPR) { + Mask |= (1U << Reg.Idx()); + } + return Mask; + }()}; // Dynamic FPRs // - v0-v7 constexpr std::array PreserveAll_DynamicFPR = { // v0 ~ v1 are temps - FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, - FEXCore::ARMEmitter::VReg::v4, FEXCore::ARMEmitter::VReg::v5, - FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7, + FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, FEXCore::ARMEmitter::VReg::v4, + FEXCore::ARMEmitter::VReg::v5, FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7, }; // SRA FPRs that need to be spilled when the host supports SVE-256bit with `preserve_all` ABI. // This is /all/ of the SRA registers constexpr std::array PreserveAll_SRAFPRSVE = SRAFPR; - constexpr uint32_t PreserveAll_SRAFPRSVEMask = { - []() -> uint32_t { - uint32_t Mask{}; - for (auto Reg : PreserveAll_SRAFPRSVE) { - Mask |= (1U << Reg.Idx()); - } - return Mask; - }() - }; + constexpr uint32_t PreserveAll_SRAFPRSVEMask = {[]() -> uint32_t { + uint32_t Mask {}; + for (auto Reg : PreserveAll_SRAFPRSVE) { + Mask |= (1U << Reg.Idx()); + } + return Mask; + }()}; // Dynamic FPRs when the host supports SVE-256bit. constexpr std::array PreserveAll_DynamicFPRSVE = { // v0 ~ v1 are used as temps. - FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, - FEXCore::ARMEmitter::VReg::v4, FEXCore::ARMEmitter::VReg::v5, - FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7, - FEXCore::ARMEmitter::VReg::v8, FEXCore::ARMEmitter::VReg::v9, - FEXCore::ARMEmitter::VReg::v10, FEXCore::ARMEmitter::VReg::v11, - FEXCore::ARMEmitter::VReg::v12, FEXCore::ARMEmitter::VReg::v13, + FEXCore::ARMEmitter::VReg::v2, FEXCore::ARMEmitter::VReg::v3, FEXCore::ARMEmitter::VReg::v4, FEXCore::ARMEmitter::VReg::v5, + FEXCore::ARMEmitter::VReg::v6, FEXCore::ARMEmitter::VReg::v7, FEXCore::ARMEmitter::VReg::v8, FEXCore::ARMEmitter::VReg::v9, + FEXCore::ARMEmitter::VReg::v10, FEXCore::ARMEmitter::VReg::v11, FEXCore::ARMEmitter::VReg::v12, FEXCore::ARMEmitter::VReg::v13, FEXCore::ARMEmitter::VReg::v14, FEXCore::ARMEmitter::VReg::v15, - FEXCore::ARMEmitter::VReg::v24, FEXCore::ARMEmitter::VReg::v25, - FEXCore::ARMEmitter::VReg::v26, FEXCore::ARMEmitter::VReg::v27, - FEXCore::ARMEmitter::VReg::v28, FEXCore::ARMEmitter::VReg::v29, - FEXCore::ARMEmitter::VReg::v30, FEXCore::ARMEmitter::VReg::v31 - }; -} + FEXCore::ARMEmitter::VReg::v24, FEXCore::ARMEmitter::VReg::v25, FEXCore::ARMEmitter::VReg::v26, FEXCore::ARMEmitter::VReg::v27, + FEXCore::ARMEmitter::VReg::v28, FEXCore::ARMEmitter::VReg::v29, FEXCore::ARMEmitter::VReg::v30, FEXCore::ARMEmitter::VReg::v31}; +} // namespace x32 // We want vixl to not allocate a default buffer. Jit and dispatcher will manually create one. -Arm64Emitter::Arm64Emitter(FEXCore::Context::ContextImpl *ctx, void* EmissionPtr, size_t size) +Arm64Emitter::Arm64Emitter(FEXCore::Context::ContextImpl* ctx, void* EmissionPtr, size_t size) : Emitter(static_cast(EmissionPtr), size) , EmitterCTX {ctx} #ifdef VIXL_SIMULATOR @@ -370,8 +340,7 @@ Arm64Emitter::Arm64Emitter(FEXCore::Context::ContextImpl *ctx, void* EmissionPtr GeneralPairRegisters = x64::RAPair; StaticFPRegisters = x64::SRAFPR; GeneralFPRegisters = x64::RAFPR; - } - else { + } else { ConfiguredDynamicRegisterBase = std::span(x32::RA.begin() + 6, 8); StaticRegisters = x32::SRA; @@ -387,11 +356,13 @@ void Arm64Emitter::LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, ui bool Is64Bit = s == ARMEmitter::Size::i64Bit; int Segments = Is64Bit ? 4 : 2; - if (Is64Bit && ((~Constant)>> 16) == 0) { + if (Is64Bit && ((~Constant) >> 16) == 0) { movn(s, Reg, (~Constant) & 0xFFFF); if (NOPPad) { - nop(); nop(); nop(); + nop(); + nop(); + nop(); } return; } @@ -407,12 +378,14 @@ void Arm64Emitter::LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, ui movn(s, Reg.W(), (~Constant) & 0xFFFF); if (NOPPad) { - nop(); nop(); nop(); + nop(); + nop(); + nop(); } return; } - int RequiredMoveSegments{}; + int RequiredMoveSegments {}; // Count the number of move segments // We only want to use ADRP+ADD if we have more than 1 segment @@ -431,7 +404,9 @@ void Arm64Emitter::LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, ui if (IsImm) { orr(s, Reg, ARMEmitter::Reg::zr, Constant); if (NOPPad) { - nop(); nop(); nop(); + nop(); + nop(); + nop(); } return; } @@ -455,23 +430,20 @@ void Arm64Emitter::LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, ui // If this is 4k page aligned then we only need ADRP if ((AlignedOffset & 0xFFF) == 0) { adrp(Reg, AlignedOffset >> 12); - } - else { + } else { // If the constant is within 1MB of PC then we can still use ADR to load in a single instruction // 21-bit signed integer here int64_t SmallOffset = static_cast(Constant) - static_cast(PC); if (vixl::IsInt21(SmallOffset)) { adr(Reg, SmallOffset); - } - else { + } else { // Need to use ADRP + ADD adrp(Reg, AlignedOffset >> 12); add(s, Reg, Reg, Constant & 0xFFF); NumMoves = 2; } } - } - else { + } else { int CurrentSegment = 0; for (; CurrentSegment < Segments; ++CurrentSegment) { uint16_t Part = (Constant >> (CurrentSegment * 16)) & 0xFFFF; @@ -517,18 +489,14 @@ void Arm64Emitter::PushCalleeSavedRegisters() { {ARMEmitter::XReg::x29, ARMEmitter::XReg::x30}, }}; - for (auto &RegPair : CalleeSaved) { + for (auto& RegPair : CalleeSaved) { stp(RegPair.first, RegPair.second, ARMEmitter::Reg::rsp, -16); } // Additionally we need to store the lower 64bits of v8-v15 // Here's a fun thing, we can use two ST4 instructions to store everything // We just need a single sub to sp before that - const std::array< - std::tuple, 2> FPRs = {{ + const std::array< std::tuple, 2> FPRs = {{ {ARMEmitter::DReg::d8, ARMEmitter::DReg::d9, ARMEmitter::DReg::d10, ARMEmitter::DReg::d11}, {ARMEmitter::DReg::d12, ARMEmitter::DReg::d13, ARMEmitter::DReg::d14, ARMEmitter::DReg::d15}, }}; @@ -539,37 +507,21 @@ void Arm64Emitter::PushCalleeSavedRegisters() { // We just saved x19 so it is safe add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r19, ARMEmitter::Reg::rsp, 0); - for (auto &RegQuad : FPRs) { - st4(ARMEmitter::SubRegSize::i64Bit, - std::get<0>(RegQuad), - std::get<1>(RegQuad), - std::get<2>(RegQuad), - std::get<3>(RegQuad), - 0, - ARMEmitter::Reg::r19, - 32); + for (auto& RegQuad : FPRs) { + st4(ARMEmitter::SubRegSize::i64Bit, std::get<0>(RegQuad), std::get<1>(RegQuad), std::get<2>(RegQuad), std::get<3>(RegQuad), 0, + ARMEmitter::Reg::r19, 32); } } void Arm64Emitter::PopCalleeSavedRegisters() { - const std::array< - std::tuple, 2> FPRs = {{ + const std::array< std::tuple, 2> FPRs = {{ {ARMEmitter::DReg::d12, ARMEmitter::DReg::d13, ARMEmitter::DReg::d14, ARMEmitter::DReg::d15}, {ARMEmitter::DReg::d8, ARMEmitter::DReg::d9, ARMEmitter::DReg::d10, ARMEmitter::DReg::d11}, }}; - for (auto &RegQuad : FPRs) { - ld4(ARMEmitter::SubRegSize::i64Bit, - std::get<0>(RegQuad), - std::get<1>(RegQuad), - std::get<2>(RegQuad), - std::get<3>(RegQuad), - 0, - ARMEmitter::Reg::rsp, - 32); + for (auto& RegQuad : FPRs) { + ld4(ARMEmitter::SubRegSize::i64Bit, std::get<0>(RegQuad), std::get<1>(RegQuad), std::get<2>(RegQuad), std::get<3>(RegQuad), 0, + ARMEmitter::Reg::rsp, 32); } const fextl::vector> CalleeSaved = {{ @@ -581,7 +533,7 @@ void Arm64Emitter::PopCalleeSavedRegisters() { {ARMEmitter::XReg::x19, ARMEmitter::XReg::x20}, }}; - for (auto &RegPair : CalleeSaved) { + for (auto& RegPair : CalleeSaved) { ldp(RegPair.first, RegPair.second, ARMEmitter::Reg::rsp, 16); } } @@ -600,8 +552,8 @@ void Arm64Emitter::SpillStaticRegs(FEXCore::ARMEmitter::Register TmpReg, bool FP // FIZ(0): Flush Inputs to Zero mrs(TmpReg, ARMEmitter::SystemRegister::FPCR); bic(ARMEmitter::Size::i64Bit, TmpReg, TmpReg, - (1U << 2) | // NEP - (1U << 1)); // AH + (1U << 2) | // NEP + (1U << 1)); // AH msr(ARMEmitter::SystemRegister::FPCR, TmpReg); } #endif @@ -619,18 +571,15 @@ void Arm64Emitter::SpillStaticRegs(FEXCore::ARMEmitter::Register TmpReg, bool FP unsigned PFAFSpillMask = GPRSpillMask & PFAFMask; GPRSpillMask &= ~PFAFSpillMask; - for (size_t i = 0; i < StaticRegisters.size(); i+=2) { + for (size_t i = 0; i < StaticRegisters.size(); i += 2) { auto Reg1 = StaticRegisters[i]; - auto Reg2 = StaticRegisters[i+1]; - if (((1U << Reg1.Idx()) & GPRSpillMask) && - ((1U << Reg2.Idx()) & GPRSpillMask)) { + auto Reg2 = StaticRegisters[i + 1]; + if (((1U << Reg1.Idx()) & GPRSpillMask) && ((1U << Reg2.Idx()) & GPRSpillMask)) { stp(Reg1.X(), Reg2.X(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.gregs[i])); - } - else if (((1U << Reg1.Idx()) & GPRSpillMask)) { + } else if (((1U << Reg1.Idx()) & GPRSpillMask)) { str(Reg1.X(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.gregs[i])); - } - else if (((1U << Reg2.Idx()) & GPRSpillMask)) { - str(Reg2.X(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.gregs[i+1])); + } else if (((1U << Reg2.Idx()) & GPRSpillMask)) { + str(Reg2.X(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.gregs[i + 1])); } } @@ -664,21 +613,17 @@ void Arm64Emitter::SpillStaticRegs(FEXCore::ARMEmitter::Register TmpReg, bool FP const auto Reg4 = StaticFPRegisters[i + 3]; st1(Reg1.Q(), Reg2.Q(), Reg3.Q(), Reg4.Q(), TmpReg, 64); } - } - else { + } else { for (size_t i = 0; i < StaticFPRegisters.size(); i += 2) { const auto Reg1 = StaticFPRegisters[i]; const auto Reg2 = StaticFPRegisters[i + 1]; - if (((1U << Reg1.Idx()) & FPRSpillMask) && - ((1U << Reg2.Idx()) & FPRSpillMask)) { + if (((1U << Reg1.Idx()) & FPRSpillMask) && ((1U << Reg2.Idx()) & FPRSpillMask)) { stp(Reg1.Q(), Reg2.Q(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[i][0])); - } - else if (((1U << Reg1.Idx()) & FPRSpillMask)) { + } else if (((1U << Reg1.Idx()) & FPRSpillMask)) { str(Reg1.Q(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[i][0])); - } - else if (((1U << Reg2.Idx()) & FPRSpillMask)) { - str(Reg2.Q(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[i+1][0])); + } else if (((1U << Reg2.Idx()) & FPRSpillMask)) { + str(Reg2.Q(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[i + 1][0])); } } } @@ -689,7 +634,7 @@ void Arm64Emitter::SpillStaticRegs(FEXCore::ARMEmitter::Register TmpReg, bool FP void Arm64Emitter::FillStaticRegs(bool FPRs, uint32_t GPRFillMask, uint32_t FPRFillMask) { FEXCore::ARMEmitter::Register TmpReg = FEXCore::ARMEmitter::Reg::r0; LOGMAN_THROW_A_FMT(GPRFillMask != 0, "Must fill at least 1 GPR for a temp"); - [[maybe_unused]] bool FoundRegister{}; + [[maybe_unused]] bool FoundRegister {}; for (auto Reg : StaticRegisters) { if (((1U << Reg.Idx()) & GPRFillMask)) { TmpReg = Reg; @@ -713,8 +658,8 @@ void Arm64Emitter::FillStaticRegs(bool FPRs, uint32_t GPRFillMask, uint32_t FPRF // Additional interesting AFP bits: // FIZ(0): Flush Inputs to Zero orr(ARMEmitter::Size::i64Bit, TmpReg, TmpReg, - (1U << 2) | // NEP - (1U << 1)); // AH + (1U << 2) | // NEP + (1U << 1)); // AH msr(ARMEmitter::SystemRegister::FPCR, TmpReg); } #endif @@ -759,21 +704,17 @@ void Arm64Emitter::FillStaticRegs(bool FPRs, uint32_t GPRFillMask, uint32_t FPRF const auto Reg4 = StaticFPRegisters[i + 3]; ld1(Reg1.Q(), Reg2.Q(), Reg3.Q(), Reg4.Q(), TmpReg, 64); } - } - else { + } else { for (size_t i = 0; i < StaticFPRegisters.size(); i += 2) { const auto Reg1 = StaticFPRegisters[i]; const auto Reg2 = StaticFPRegisters[i + 1]; - if (((1U << Reg1.Idx()) & FPRFillMask) && - ((1U << Reg2.Idx()) & FPRFillMask)) { + if (((1U << Reg1.Idx()) & FPRFillMask) && ((1U << Reg2.Idx()) & FPRFillMask)) { ldp(Reg1.Q(), Reg2.Q(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[i][0])); - } - else if (((1U << Reg1.Idx()) & FPRFillMask)) { + } else if (((1U << Reg1.Idx()) & FPRFillMask)) { ldr(Reg1.Q(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[i][0])); - } - else if (((1U << Reg2.Idx()) & FPRFillMask)) { - ldr(Reg2.Q(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[i+1][0])); + } else if (((1U << Reg2.Idx()) & FPRFillMask)) { + ldr(Reg2.Q(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[i + 1][0])); } } } @@ -785,18 +726,15 @@ void Arm64Emitter::FillStaticRegs(bool FPRs, uint32_t GPRFillMask, uint32_t FPRF uint32_t PFAFFillMask = GPRFillMask & PFAFMask; GPRFillMask &= ~PFAFMask; - for (size_t i = 0; i < StaticRegisters.size(); i+=2) { + for (size_t i = 0; i < StaticRegisters.size(); i += 2) { auto Reg1 = StaticRegisters[i]; - auto Reg2 = StaticRegisters[i+1]; - if (((1U << Reg1.Idx()) & GPRFillMask) && - ((1U << Reg2.Idx()) & GPRFillMask)) { + auto Reg2 = StaticRegisters[i + 1]; + if (((1U << Reg1.Idx()) & GPRFillMask) && ((1U << Reg2.Idx()) & GPRFillMask)) { ldp(Reg1.X(), Reg2.X(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.gregs[i])); - } - else if ((1U << Reg1.Idx()) & GPRFillMask) { + } else if ((1U << Reg1.Idx()) & GPRFillMask) { ldr(Reg1.X(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.gregs[i])); - } - else if ((1U << Reg2.Idx()) & GPRFillMask) { - ldr(Reg2.X(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.gregs[i+1])); + } else if ((1U << Reg2.Idx()) & GPRFillMask) { + ldr(Reg2.X(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.gregs[i + 1])); } } @@ -828,8 +766,7 @@ void Arm64Emitter::PushVectorRegisters(FEXCore::ARMEmitter::Register TmpReg, boo st4b(Reg1.Z(), Reg2.Z(), Reg3.Z(), Reg4.Z(), PRED_TMP_32B, TmpReg, 0); add(ARMEmitter::Size::i64Bit, TmpReg, TmpReg, 32 * 4); } - } - else { + } else { size_t i = 0; for (; i < (VRegs.size() % 4); i += 2) { const auto Reg1 = VRegs[i]; @@ -913,8 +850,7 @@ void Arm64Emitter::PopGeneralRegisters(std::spanHostFeatures.SupportsAVX; const auto GPRSize = (ConfiguredDynamicRegisterBase.size() + 1) * Core::CPUState::GPR_REG_SIZE; - const auto FPRRegSize = CanUseSVE ? Core::CPUState::XMM_AVX_REG_SIZE - : Core::CPUState::XMM_SSE_REG_SIZE; + const auto FPRRegSize = CanUseSVE ? Core::CPUState::XMM_AVX_REG_SIZE : Core::CPUState::XMM_SSE_REG_SIZE; const auto FPRSize = GeneralFPRegisters.size() * FPRRegSize; const uint64_t SPOffset = AlignUp(GPRSize + FPRSize, 16); @@ -948,13 +884,12 @@ void Arm64Emitter::PopDynamicRegsAndLR() { void Arm64Emitter::SpillForPreserveAllABICall(FEXCore::ARMEmitter::Register TmpReg, bool FPRs) { const auto CanUseSVE = EmitterCTX->HostFeatures.SupportsAVX; - const auto FPRRegSize = CanUseSVE ? Core::CPUState::XMM_AVX_REG_SIZE - : Core::CPUState::XMM_SSE_REG_SIZE; + const auto FPRRegSize = CanUseSVE ? Core::CPUState::XMM_AVX_REG_SIZE : Core::CPUState::XMM_SSE_REG_SIZE; - std::span DynamicGPRs{}; - std::span DynamicFPRs{}; - uint32_t PreserveSRAMask{}; - uint32_t PreserveSRAFPRMask{}; + std::span DynamicGPRs {}; + std::span DynamicFPRs {}; + uint32_t PreserveSRAMask {}; + uint32_t PreserveSRAFPRMask {}; if (EmitterCTX->Config.Is64BitMode()) { DynamicGPRs = x64::PreserveAll_Dynamic; DynamicFPRs = x64::PreserveAll_DynamicFPR; @@ -965,8 +900,7 @@ void Arm64Emitter::SpillForPreserveAllABICall(FEXCore::ARMEmitter::Register TmpR DynamicFPRs = x64::PreserveAll_DynamicFPRSVE; PreserveSRAFPRMask = x64::PreserveAll_SRAFPRSVEMask; } - } - else { + } else { DynamicGPRs = x32::PreserveAll_Dynamic; DynamicFPRs = x32::PreserveAll_DynamicFPR; PreserveSRAMask = x32::PreserveAll_SRAMask; @@ -1000,10 +934,10 @@ void Arm64Emitter::SpillForPreserveAllABICall(FEXCore::ARMEmitter::Register TmpR void Arm64Emitter::FillForPreserveAllABICall(bool FPRs) { const auto CanUseSVE = EmitterCTX->HostFeatures.SupportsAVX; - std::span DynamicGPRs{}; - std::span DynamicFPRs{}; - uint32_t PreserveSRAMask{}; - uint32_t PreserveSRAFPRMask{}; + std::span DynamicGPRs {}; + std::span DynamicFPRs {}; + uint32_t PreserveSRAMask {}; + uint32_t PreserveSRAFPRMask {}; if (EmitterCTX->Config.Is64BitMode()) { DynamicGPRs = x64::PreserveAll_Dynamic; @@ -1015,8 +949,7 @@ void Arm64Emitter::FillForPreserveAllABICall(bool FPRs) { DynamicFPRs = x64::PreserveAll_DynamicFPRSVE; PreserveSRAFPRMask = x64::PreserveAll_SRAFPRSVEMask; } - } - else { + } else { DynamicGPRs = x32::PreserveAll_Dynamic; DynamicFPRs = x32::PreserveAll_DynamicFPR; PreserveSRAMask = x32::PreserveAll_SRAMask; @@ -1045,4 +978,4 @@ void Arm64Emitter::Align16B() { } } -} +} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/BlockSamplingData.cpp b/FEXCore/Source/Interface/Core/BlockSamplingData.cpp index c4c6cb098b..6608633065 100644 --- a/FEXCore/Source/Interface/Core/BlockSamplingData.cpp +++ b/FEXCore/Source/Interface/Core/BlockSamplingData.cpp @@ -6,48 +6,46 @@ #include namespace FEXCore { - void BlockSamplingData::DumpBlockData() { - std::fstream Output; - Output.open("output.csv", std::fstream::out | std::fstream::binary); +void BlockSamplingData::DumpBlockData() { + std::fstream Output; + Output.open("output.csv", std::fstream::out | std::fstream::binary); - if (!Output.is_open()) - return; - - Output << "Entry, Min, Max, Total, Calls, Average" << std::endl; + if (!Output.is_open()) { + return; + } - for (auto it : SamplingMap) { - if (!it.second->TotalCalls) - continue; + Output << "Entry, Min, Max, Total, Calls, Average" << std::endl; - Output << "0x" << std::hex << it.first - << ", " << std::dec << it.second->Min - << ", " << std::dec << it.second->Max - << ", " << std::dec << it.second->TotalTime - << ", " << std::dec << it.second->TotalCalls - << ", " << std::dec << ((double)it.second->TotalTime / (double)it.second->TotalCalls) - << std::endl; + for (auto it : SamplingMap) { + if (!it.second->TotalCalls) { + continue; } - Output.close(); - LogMan::Msg::DFmt("Dumped {} blocks of sampling data", SamplingMap.size()); + + Output << "0x" << std::hex << it.first << ", " << std::dec << it.second->Min << ", " << std::dec << it.second->Max << ", " << std::dec + << it.second->TotalTime << ", " << std::dec << it.second->TotalCalls << ", " << std::dec + << ((double)it.second->TotalTime / (double)it.second->TotalCalls) << std::endl; } + Output.close(); + LogMan::Msg::DFmt("Dumped {} blocks of sampling data", SamplingMap.size()); +} - BlockSamplingData::BlockData *BlockSamplingData::GetBlockData(uint64_t RIP) { - auto it = SamplingMap.find(RIP); - if (it != SamplingMap.end()) { - return it->second; - } - BlockData *NewData = new BlockData{}; - memset(NewData, 0, sizeof(BlockData)); - NewData->Min = ~0ULL; - SamplingMap[RIP] = NewData; - return NewData; +BlockSamplingData::BlockData* BlockSamplingData::GetBlockData(uint64_t RIP) { + auto it = SamplingMap.find(RIP); + if (it != SamplingMap.end()) { + return it->second; } + BlockData* NewData = new BlockData {}; + memset(NewData, 0, sizeof(BlockData)); + NewData->Min = ~0ULL; + SamplingMap[RIP] = NewData; + return NewData; +} - BlockSamplingData::~BlockSamplingData() { - DumpBlockData(); - for (auto it : SamplingMap) { - delete it.second; - } - SamplingMap.clear(); +BlockSamplingData::~BlockSamplingData() { + DumpBlockData(); + for (auto it : SamplingMap) { + delete it.second; } + SamplingMap.clear(); } +} // namespace FEXCore diff --git a/FEXCore/Source/Interface/Core/CPUBackend.cpp b/FEXCore/Source/Interface/Core/CPUBackend.cpp index ddd02a1204..020fb22b9e 100644 --- a/FEXCore/Source/Interface/Core/CPUBackend.cpp +++ b/FEXCore/Source/Interface/Core/CPUBackend.cpp @@ -12,347 +12,318 @@ namespace FEXCore { namespace CPU { -constexpr static uint64_t NamedVectorConstants[FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_CONST_POOL_MAX][2] = { - {0x0003'0002'0001'0000ULL, 0x0007'0006'0005'0004ULL}, // NAMED_VECTOR_INCREMENTAL_U16_INDEX - {0x000B'000A'0009'0008ULL, 0x000F'000E'000D'000CULL}, // NAMED_VECTOR_INCREMENTAL_U16_INDEX_UPPER - {0x0000'0000'8000'0000ULL, 0x0000'0000'8000'0000ULL}, // NAMED_VECTOR_PADDSUBPS_INVERT - {0x0000'0000'8000'0000ULL, 0x0000'0000'8000'0000ULL}, // NAMED_VECTOR_PADDSUBPS_INVERT_UPPER - {0x8000'0000'0000'0000ULL, 0x0000'0000'0000'0000ULL}, // NAMED_VECTOR_PADDSUBPD_INVERT - {0x8000'0000'0000'0000ULL, 0x0000'0000'0000'0000ULL}, // NAMED_VECTOR_PADDSUBPD_INVERT_UPPER - {0x0000'0001'0000'0000ULL, 0x0000'0003'0000'0002ULL}, // NAMED_VECTOR_MOVMSKPS_SHIFT - {0x040B'0E01'0B0E'0104ULL, 0x0C03'0609'0306'090CULL}, // NAMED_VECTOR_AESKEYGENASSIST_SWIZZLE - {0x0706'0504'FFFF'FFFFULL, 0xFFFF'FFFF'0B0A'0908ULL}, // NAMED_VECTOR_BLENDPS_0110B - {0x0706'0504'0302'0100ULL, 0xFFFF'FFFF'0B0A'0908ULL}, // NAMED_VECTOR_BLENDPS_0111B - {0xFFFF'FFFF'0302'0100ULL, 0x0F0E'0D0C'FFFF'FFFFULL}, // NAMED_VECTOR_BLENDPS_1001B - {0x0706'0504'0302'0100ULL, 0x0F0E'0D0C'FFFF'FFFFULL}, // NAMED_VECTOR_BLENDPS_1011B - {0xFFFF'FFFF'0302'0100ULL, 0x0F0E'0D0C'0B0A'0908ULL}, // NAMED_VECTOR_BLENDPS_1101B - {0x0706'0504'FFFF'FFFFULL, 0x0F0E'0D0C'0B0A'0908ULL}, // NAMED_VECTOR_BLENDPS_1110B -}; - -constexpr static auto PSHUFLW_LUT { -[]() consteval { - struct LUTType { - uint64_t Val[2]; + constexpr static uint64_t NamedVectorConstants[FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_CONST_POOL_MAX][2] = { + {0x0003'0002'0001'0000ULL, 0x0007'0006'0005'0004ULL}, // NAMED_VECTOR_INCREMENTAL_U16_INDEX + {0x000B'000A'0009'0008ULL, 0x000F'000E'000D'000CULL}, // NAMED_VECTOR_INCREMENTAL_U16_INDEX_UPPER + {0x0000'0000'8000'0000ULL, 0x0000'0000'8000'0000ULL}, // NAMED_VECTOR_PADDSUBPS_INVERT + {0x0000'0000'8000'0000ULL, 0x0000'0000'8000'0000ULL}, // NAMED_VECTOR_PADDSUBPS_INVERT_UPPER + {0x8000'0000'0000'0000ULL, 0x0000'0000'0000'0000ULL}, // NAMED_VECTOR_PADDSUBPD_INVERT + {0x8000'0000'0000'0000ULL, 0x0000'0000'0000'0000ULL}, // NAMED_VECTOR_PADDSUBPD_INVERT_UPPER + {0x0000'0001'0000'0000ULL, 0x0000'0003'0000'0002ULL}, // NAMED_VECTOR_MOVMSKPS_SHIFT + {0x040B'0E01'0B0E'0104ULL, 0x0C03'0609'0306'090CULL}, // NAMED_VECTOR_AESKEYGENASSIST_SWIZZLE + {0x0706'0504'FFFF'FFFFULL, 0xFFFF'FFFF'0B0A'0908ULL}, // NAMED_VECTOR_BLENDPS_0110B + {0x0706'0504'0302'0100ULL, 0xFFFF'FFFF'0B0A'0908ULL}, // NAMED_VECTOR_BLENDPS_0111B + {0xFFFF'FFFF'0302'0100ULL, 0x0F0E'0D0C'FFFF'FFFFULL}, // NAMED_VECTOR_BLENDPS_1001B + {0x0706'0504'0302'0100ULL, 0x0F0E'0D0C'FFFF'FFFFULL}, // NAMED_VECTOR_BLENDPS_1011B + {0xFFFF'FFFF'0302'0100ULL, 0x0F0E'0D0C'0B0A'0908ULL}, // NAMED_VECTOR_BLENDPS_1101B + {0x0706'0504'FFFF'FFFFULL, 0x0F0E'0D0C'0B0A'0908ULL}, // NAMED_VECTOR_BLENDPS_1110B }; - // Expectation for this LUT is to simulate PSHUFLW with ARM's TBL (single register) instruction - // PSHUFLW behaviour: - // 16-bit words in [63:48], [47:32], [31:16], [15:0] are selected using the 8-bit Index. - // For 128-bit PSHUFLW, bits [127:64] are identity copied. - constexpr uint64_t IdentityCopyUpper = 0x0f'0e'0d'0c'0b'0a'09'08; - std::array TotalLUT{}; - uint64_t WordSelection[4] = { - 0x01'00, - 0x03'02, - 0x05'04, - 0x07'06, - }; - for (size_t i = 0; i < 256; ++i) { - auto &LUT = TotalLUT[i]; - const auto Word0 = (i >> 0) & 0b11; - const auto Word1 = (i >> 2) & 0b11; - const auto Word2 = (i >> 4) & 0b11; - const auto Word3 = (i >> 6) & 0b11; - - LUT.Val[0] = - (WordSelection[Word0] << 0) | - (WordSelection[Word1] << 16) | - (WordSelection[Word2] << 32) | - (WordSelection[Word3] << 48); - - LUT.Val[1] = IdentityCopyUpper; - } - return TotalLUT; -}() -}; - -constexpr static auto PSHUFHW_LUT { -[]() consteval { - struct LUTType { - uint64_t Val[2]; - }; - // Expectation for this LUT is to simulate PSHUFHW with ARM's TBL (single register) instruction - // PSHUFHW behaviour: - // 16-bit words in [127:112], [111:96], [95:80], [79:64] are selected using the 8-bit Index. - // Incoming words come from bits [127:64] of the source. - // Bits [63:0] are identity copied. - constexpr uint64_t IdentityCopyLower = 0x07'06'05'04'03'02'01'00; - std::array TotalLUT{}; - uint64_t WordSelection[4] = { - 0x09'08, - 0x0b'0a, - 0x0d'0c, - 0x0f'0e, - }; - for (size_t i = 0; i < 256; ++i) { - auto &LUT = TotalLUT[i]; - const auto Word0 = (i >> 0) & 0b11; - const auto Word1 = (i >> 2) & 0b11; - const auto Word2 = (i >> 4) & 0b11; - const auto Word3 = (i >> 6) & 0b11; - LUT.Val[0] = IdentityCopyLower; + constexpr static auto PSHUFLW_LUT {[]() consteval { + struct LUTType { + uint64_t Val[2]; + }; + // Expectation for this LUT is to simulate PSHUFLW with ARM's TBL (single register) instruction + // PSHUFLW behaviour: + // 16-bit words in [63:48], [47:32], [31:16], [15:0] are selected using the 8-bit Index. + // For 128-bit PSHUFLW, bits [127:64] are identity copied. + constexpr uint64_t IdentityCopyUpper = 0x0f'0e'0d'0c'0b'0a'09'08; + std::array TotalLUT {}; + uint64_t WordSelection[4] = { + 0x01'00, + 0x03'02, + 0x05'04, + 0x07'06, + }; + for (size_t i = 0; i < 256; ++i) { + auto& LUT = TotalLUT[i]; + const auto Word0 = (i >> 0) & 0b11; + const auto Word1 = (i >> 2) & 0b11; + const auto Word2 = (i >> 4) & 0b11; + const auto Word3 = (i >> 6) & 0b11; - LUT.Val[1] = - (WordSelection[Word0] << 0) | - (WordSelection[Word1] << 16) | - (WordSelection[Word2] << 32) | - (WordSelection[Word3] << 48); + LUT.Val[0] = (WordSelection[Word0] << 0) | (WordSelection[Word1] << 16) | (WordSelection[Word2] << 32) | (WordSelection[Word3] << 48); - } - return TotalLUT; -}() -}; - -constexpr static auto PSHUFD_LUT { -[]() consteval { - struct LUTType { - uint64_t Val[2]; - }; - // Expectation for this LUT is to simulate PSHUFD with ARM's TBL (single register) instruction - // PSHUFD behaviour: - // 32-bit words in [127:96], [95:64], [63:32], [31:0] are selected using the 8-bit Index. - std::array TotalLUT{}; - uint64_t WordSelection[4] = { - 0x03'02'01'00, - 0x07'06'05'04, - 0x0b'0a'09'08, - 0x0f'0e'0d'0c, - }; - for (size_t i = 0; i < 256; ++i) { - auto &LUT = TotalLUT[i]; - const auto Word0 = (i >> 0) & 0b11; - const auto Word1 = (i >> 2) & 0b11; - const auto Word2 = (i >> 4) & 0b11; - const auto Word3 = (i >> 6) & 0b11; - - LUT.Val[0] = - (WordSelection[Word0] << 0) | - (WordSelection[Word1] << 32); - - LUT.Val[1] = - (WordSelection[Word2] << 0) | - (WordSelection[Word3] << 32); - } - return TotalLUT; -}() -}; - -constexpr static auto SHUFPS_LUT { -[]() consteval { - struct LUTType { - uint64_t Val[2]; - }; - // 32-bit words in [127:96], [95:64], [63:32], [31:0] are selected using the 8-bit Index. - // Expectation for this LUT is to simulate SHUFPS with ARM's TBL (two register) instruction. - // SHUFPS behaviour: - // Two 32-bits words from each source are selected from each source in the lower and upper halves of the 128-bit destination. - // Dest[31:0] = Src1[] - // Dest[63:32] = Src1[] - // Dest[95:64] = Src2[] - // Dest[127:96] = Src2[] - - std::array TotalLUT{}; - const uint64_t WordSelectionSrc1[4] = { - 0x03'02'01'00, - 0x07'06'05'04, - 0x0b'0a'09'08, - 0x0f'0e'0d'0c, - }; + LUT.Val[1] = IdentityCopyUpper; + } + return TotalLUT; + }()}; - // Src2 needs to offset each byte index by 16-bytes to pull from the second source. - const uint64_t WordSelectionSrc2[4] = { - 0x03'02'01'00 + (0x10101010), - 0x07'06'05'04 + (0x10101010), - 0x0b'0a'09'08 + (0x10101010), - 0x0f'0e'0d'0c + (0x10101010), - }; + constexpr static auto PSHUFHW_LUT {[]() consteval { + struct LUTType { + uint64_t Val[2]; + }; + // Expectation for this LUT is to simulate PSHUFHW with ARM's TBL (single register) instruction + // PSHUFHW behaviour: + // 16-bit words in [127:112], [111:96], [95:80], [79:64] are selected using the 8-bit Index. + // Incoming words come from bits [127:64] of the source. + // Bits [63:0] are identity copied. + constexpr uint64_t IdentityCopyLower = 0x07'06'05'04'03'02'01'00; + std::array TotalLUT {}; + uint64_t WordSelection[4] = { + 0x09'08, + 0x0b'0a, + 0x0d'0c, + 0x0f'0e, + }; + for (size_t i = 0; i < 256; ++i) { + auto& LUT = TotalLUT[i]; + const auto Word0 = (i >> 0) & 0b11; + const auto Word1 = (i >> 2) & 0b11; + const auto Word2 = (i >> 4) & 0b11; + const auto Word3 = (i >> 6) & 0b11; - for (size_t i = 0; i < 256; ++i) { - auto &LUT = TotalLUT[i]; - const auto Word0 = (i >> 0) & 0b11; - const auto Word1 = (i >> 2) & 0b11; - const auto Word2 = (i >> 4) & 0b11; - const auto Word3 = (i >> 6) & 0b11; + LUT.Val[0] = IdentityCopyLower; - LUT.Val[0] = - (WordSelectionSrc1[Word0] << 0) | - (WordSelectionSrc1[Word1] << 32); + LUT.Val[1] = (WordSelection[Word0] << 0) | (WordSelection[Word1] << 16) | (WordSelection[Word2] << 32) | (WordSelection[Word3] << 48); + } + return TotalLUT; + }()}; - LUT.Val[1] = - (WordSelectionSrc2[Word2] << 0) | - (WordSelectionSrc2[Word3] << 32); - } - return TotalLUT; -}() -}; - -constexpr static auto DPPS_MASK { -[]() consteval { - struct LUTType { - uint32_t Val[4]; - }; + constexpr static auto PSHUFD_LUT {[]() consteval { + struct LUTType { + uint64_t Val[2]; + }; + // Expectation for this LUT is to simulate PSHUFD with ARM's TBL (single register) instruction + // PSHUFD behaviour: + // 32-bit words in [127:96], [95:64], [63:32], [31:0] are selected using the 8-bit Index. + std::array TotalLUT {}; + uint64_t WordSelection[4] = { + 0x03'02'01'00, + 0x07'06'05'04, + 0x0b'0a'09'08, + 0x0f'0e'0d'0c, + }; + for (size_t i = 0; i < 256; ++i) { + auto& LUT = TotalLUT[i]; + const auto Word0 = (i >> 0) & 0b11; + const auto Word1 = (i >> 2) & 0b11; + const auto Word2 = (i >> 4) & 0b11; + const auto Word3 = (i >> 6) & 0b11; - std::array TotalLUT{}; - for (size_t i = 0; i < TotalLUT.size(); ++i) { - auto &LUT = TotalLUT[i]; - constexpr auto GetLUT = [](size_t i, size_t Index) { - if (i & (1U << Index)) { - return -1U; - } - return 0U; + LUT.Val[0] = (WordSelection[Word0] << 0) | (WordSelection[Word1] << 32); + + LUT.Val[1] = (WordSelection[Word2] << 0) | (WordSelection[Word3] << 32); + } + return TotalLUT; + }()}; + + constexpr static auto SHUFPS_LUT {[]() consteval { + struct LUTType { + uint64_t Val[2]; + }; + // 32-bit words in [127:96], [95:64], [63:32], [31:0] are selected using the 8-bit Index. + // Expectation for this LUT is to simulate SHUFPS with ARM's TBL (two register) instruction. + // SHUFPS behaviour: + // Two 32-bits words from each source are selected from each source in the lower and upper halves of the 128-bit destination. + // Dest[31:0] = Src1[] + // Dest[63:32] = Src1[] + // Dest[95:64] = Src2[] + // Dest[127:96] = Src2[] + + std::array TotalLUT {}; + const uint64_t WordSelectionSrc1[4] = { + 0x03'02'01'00, + 0x07'06'05'04, + 0x0b'0a'09'08, + 0x0f'0e'0d'0c, }; - LUT.Val[0] = GetLUT(i, 0); - LUT.Val[1] = GetLUT(i, 1); - LUT.Val[2] = GetLUT(i, 2); - LUT.Val[3] = GetLUT(i, 3); - } - return TotalLUT; -}() -}; - -constexpr static auto DPPD_MASK { -[]() consteval { - struct LUTType { - uint64_t Val[2]; - }; + // Src2 needs to offset each byte index by 16-bytes to pull from the second source. + const uint64_t WordSelectionSrc2[4] = { + 0x03'02'01'00 + (0x10101010), + 0x07'06'05'04 + (0x10101010), + 0x0b'0a'09'08 + (0x10101010), + 0x0f'0e'0d'0c + (0x10101010), + }; - std::array TotalLUT{}; - for (size_t i = 0; i < TotalLUT.size(); ++i) { - auto &LUT = TotalLUT[i]; - constexpr auto GetLUT = [](size_t i, size_t Index) { - if (i & (1U << Index)) { - return -1ULL; - } - return 0ULL; + for (size_t i = 0; i < 256; ++i) { + auto& LUT = TotalLUT[i]; + const auto Word0 = (i >> 0) & 0b11; + const auto Word1 = (i >> 2) & 0b11; + const auto Word2 = (i >> 4) & 0b11; + const auto Word3 = (i >> 6) & 0b11; + + LUT.Val[0] = (WordSelectionSrc1[Word0] << 0) | (WordSelectionSrc1[Word1] << 32); + + LUT.Val[1] = (WordSelectionSrc2[Word2] << 0) | (WordSelectionSrc2[Word3] << 32); + } + return TotalLUT; + }()}; + + constexpr static auto DPPS_MASK {[]() consteval { + struct LUTType { + uint32_t Val[4]; }; - LUT.Val[0] = GetLUT(i, 0); - LUT.Val[1] = GetLUT(i, 1); - } - return TotalLUT; -}() -}; - -constexpr static auto PBLENDW_LUT { -[]() consteval { - struct LUTType { - uint16_t Val[8]; - }; - // 16-bit words in [127:112], [111:96], [95:80], [79:64], [63:48], [47:32], [31:16], [15:0] are selected using 8-bit swizzle. - // Expectation for this LUT is to simulate PBLENDW with ARM's TBX (one register) instruction. - // PBLENDW behaviour: - // 16-bit words from the source is moved in to the destination based on the bit in the swizzle. - // Dest[15:0] = Swizzle[0] ? Src[15:0] : Dest[15:0] - // Dest[31:16] = Swizzle[1] ? Src[31:16] : Dest[31:16] - // Dest[47:32] = Swizzle[2] ? Src[47:32] : Dest[47:32] - // Dest[63:48] = Swizzle[3] ? Src[63:48] : Dest[63:48] - // Dest[79:64] = Swizzle[4] ? Src[79:64] : Dest[79:64] - // Dest[95:80] = Swizzle[5] ? Src[95:80] : Dest[95:80] - // Dest[111:96] = Swizzle[6] ? Src[111:96] : Dest[111:96] - // Dest[127:112] = Swizzle[7] ? Src[127:112] : Dest[127:112] - - std::array TotalLUT{}; - const uint16_t WordSelectionSrc[8] = { - 0x01'00, - 0x03'02, - 0x05'04, - 0x07'06, - 0x09'08, - 0x0B'0A, - 0x0D'0C, - 0x0F'0E, - }; + std::array TotalLUT {}; + for (size_t i = 0; i < TotalLUT.size(); ++i) { + auto& LUT = TotalLUT[i]; + constexpr auto GetLUT = [](size_t i, size_t Index) { + if (i & (1U << Index)) { + return -1U; + } + return 0U; + }; - constexpr uint16_t OriginalDest = 0xFF'FF; + LUT.Val[0] = GetLUT(i, 0); + LUT.Val[1] = GetLUT(i, 1); + LUT.Val[2] = GetLUT(i, 2); + LUT.Val[3] = GetLUT(i, 3); + } + return TotalLUT; + }()}; - for (size_t i = 0; i < 256; ++i) { - auto &LUT = TotalLUT[i]; - for (size_t j = 0; j < 8; ++j) { - LUT.Val[j] = ((i >> j) & 1) ? WordSelectionSrc[j] : OriginalDest; + constexpr static auto DPPD_MASK {[]() consteval { + struct LUTType { + uint64_t Val[2]; + }; + + std::array TotalLUT {}; + for (size_t i = 0; i < TotalLUT.size(); ++i) { + auto& LUT = TotalLUT[i]; + constexpr auto GetLUT = [](size_t i, size_t Index) { + if (i & (1U << Index)) { + return -1ULL; + } + return 0ULL; + }; + + LUT.Val[0] = GetLUT(i, 0); + LUT.Val[1] = GetLUT(i, 1); } - } - return TotalLUT; -}() -}; + return TotalLUT; + }()}; + + constexpr static auto PBLENDW_LUT {[]() consteval { + struct LUTType { + uint16_t Val[8]; + }; + // 16-bit words in [127:112], [111:96], [95:80], [79:64], [63:48], [47:32], [31:16], [15:0] are selected using 8-bit swizzle. + // Expectation for this LUT is to simulate PBLENDW with ARM's TBX (one register) instruction. + // PBLENDW behaviour: + // 16-bit words from the source is moved in to the destination based on the bit in the swizzle. + // Dest[15:0] = Swizzle[0] ? Src[15:0] : Dest[15:0] + // Dest[31:16] = Swizzle[1] ? Src[31:16] : Dest[31:16] + // Dest[47:32] = Swizzle[2] ? Src[47:32] : Dest[47:32] + // Dest[63:48] = Swizzle[3] ? Src[63:48] : Dest[63:48] + // Dest[79:64] = Swizzle[4] ? Src[79:64] : Dest[79:64] + // Dest[95:80] = Swizzle[5] ? Src[95:80] : Dest[95:80] + // Dest[111:96] = Swizzle[6] ? Src[111:96] : Dest[111:96] + // Dest[127:112] = Swizzle[7] ? Src[127:112] : Dest[127:112] + + std::array TotalLUT {}; + const uint16_t WordSelectionSrc[8] = { + 0x01'00, 0x03'02, 0x05'04, 0x07'06, 0x09'08, 0x0B'0A, 0x0D'0C, 0x0F'0E, + }; -CPUBackend::CPUBackend(FEXCore::Core::InternalThreadState *ThreadState, size_t InitialCodeSize, size_t MaxCodeSize) - : ThreadState(ThreadState), InitialCodeSize(InitialCodeSize), MaxCodeSize(MaxCodeSize) { + constexpr uint16_t OriginalDest = 0xFF'FF; - auto &Common = ThreadState->CurrentFrame->Pointers.Common; + for (size_t i = 0; i < 256; ++i) { + auto& LUT = TotalLUT[i]; + for (size_t j = 0; j < 8; ++j) { + LUT.Val[j] = ((i >> j) & 1) ? WordSelectionSrc[j] : OriginalDest; + } + } + return TotalLUT; + }()}; - // Initialize named vector constants. - for (size_t i = 0; i < FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_CONST_POOL_MAX; ++i) { - Common.NamedVectorConstantPointers[i] = reinterpret_cast(NamedVectorConstants[i]); - } + CPUBackend::CPUBackend(FEXCore::Core::InternalThreadState* ThreadState, size_t InitialCodeSize, size_t MaxCodeSize) + : ThreadState(ThreadState) + , InitialCodeSize(InitialCodeSize) + , MaxCodeSize(MaxCodeSize) { - // Copy named vector constants. - memcpy(Common.NamedVectorConstants, NamedVectorConstants, sizeof(NamedVectorConstants)); + auto& Common = ThreadState->CurrentFrame->Pointers.Common; - // Initialize Indexed named vector constants. - Common.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFLW] = reinterpret_cast(PSHUFLW_LUT.data()); - Common.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFHW] = reinterpret_cast(PSHUFHW_LUT.data()); - Common.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFD] = reinterpret_cast(PSHUFD_LUT.data()); - Common.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_SHUFPS] = reinterpret_cast(SHUFPS_LUT.data()); - Common.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_DPPS_MASK] = reinterpret_cast(DPPS_MASK.data()); - Common.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_DPPD_MASK] = reinterpret_cast(DPPD_MASK.data()); - Common.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PBLENDW] = reinterpret_cast(PBLENDW_LUT.data()); + // Initialize named vector constants. + for (size_t i = 0; i < FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_CONST_POOL_MAX; ++i) { + Common.NamedVectorConstantPointers[i] = reinterpret_cast(NamedVectorConstants[i]); + } + + // Copy named vector constants. + memcpy(Common.NamedVectorConstants, NamedVectorConstants, sizeof(NamedVectorConstants)); + + // Initialize Indexed named vector constants. + Common.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFLW] = + reinterpret_cast(PSHUFLW_LUT.data()); + Common.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFHW] = + reinterpret_cast(PSHUFHW_LUT.data()); + Common.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFD] = + reinterpret_cast(PSHUFD_LUT.data()); + Common.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_SHUFPS] = + reinterpret_cast(SHUFPS_LUT.data()); + Common.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_DPPS_MASK] = + reinterpret_cast(DPPS_MASK.data()); + Common.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_DPPD_MASK] = + reinterpret_cast(DPPD_MASK.data()); + Common.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PBLENDW] = + reinterpret_cast(PBLENDW_LUT.data()); #ifndef FEX_DISABLE_TELEMETRY - // Fill in telemetry values - for (size_t i = 0; i < FEXCore::Telemetry::TYPE_LAST; ++i) { - auto &Telem = FEXCore::Telemetry::GetTelemetryValue(static_cast(i)); - Common.TelemetryValueAddresses[i] = reinterpret_cast(Telem.GetAddr()); - } + // Fill in telemetry values + for (size_t i = 0; i < FEXCore::Telemetry::TYPE_LAST; ++i) { + auto& Telem = FEXCore::Telemetry::GetTelemetryValue(static_cast(i)); + Common.TelemetryValueAddresses[i] = reinterpret_cast(Telem.GetAddr()); + } #endif -} + } -CPUBackend::~CPUBackend() { - for (auto CodeBuffer : CodeBuffers) { - FreeCodeBuffer(CodeBuffer); + CPUBackend::~CPUBackend() { + for (auto CodeBuffer : CodeBuffers) { + FreeCodeBuffer(CodeBuffer); + } + CodeBuffers.clear(); } - CodeBuffers.clear(); -} -auto CPUBackend::GetEmptyCodeBuffer() -> CodeBuffer * { - if (ThreadState->CurrentFrame->SignalHandlerRefCounter == 0) { - if (CodeBuffers.empty()) { - auto NewCodeBuffer = AllocateNewCodeBuffer(InitialCodeSize); - EmplaceNewCodeBuffer(NewCodeBuffer); - } else { - if (CodeBuffers.size() > 1) { - // If we have more than one code buffer we are tracking then walk them and delete - // This is a cleanup step - for (size_t i = 1; i < CodeBuffers.size(); i++) { - FreeCodeBuffer(CodeBuffers[i]); + auto CPUBackend::GetEmptyCodeBuffer() -> CodeBuffer* { + if (ThreadState->CurrentFrame->SignalHandlerRefCounter == 0) { + if (CodeBuffers.empty()) { + auto NewCodeBuffer = AllocateNewCodeBuffer(InitialCodeSize); + EmplaceNewCodeBuffer(NewCodeBuffer); + } else { + if (CodeBuffers.size() > 1) { + // If we have more than one code buffer we are tracking then walk them and delete + // This is a cleanup step + for (size_t i = 1; i < CodeBuffers.size(); i++) { + FreeCodeBuffer(CodeBuffers[i]); + } + CodeBuffers.resize(1); } - CodeBuffers.resize(1); - } - // Set the current code buffer to the initial - CurrentCodeBuffer = &CodeBuffers[0]; + // Set the current code buffer to the initial + CurrentCodeBuffer = &CodeBuffers[0]; - if (CurrentCodeBuffer->Size != MaxCodeSize) { - FreeCodeBuffer(*CurrentCodeBuffer); + if (CurrentCodeBuffer->Size != MaxCodeSize) { + FreeCodeBuffer(*CurrentCodeBuffer); - // Resize the code buffer and reallocate our code size - CurrentCodeBuffer->Size *= 1.5; - CurrentCodeBuffer->Size = std::min(CurrentCodeBuffer->Size, MaxCodeSize); + // Resize the code buffer and reallocate our code size + CurrentCodeBuffer->Size *= 1.5; + CurrentCodeBuffer->Size = std::min(CurrentCodeBuffer->Size, MaxCodeSize); - *CurrentCodeBuffer = AllocateNewCodeBuffer(CurrentCodeBuffer->Size); + *CurrentCodeBuffer = AllocateNewCodeBuffer(CurrentCodeBuffer->Size); + } } + } else { + // We have signal handlers that have generated code + // This means that we can not safely clear the code at this point in time + // Allocate some new code buffers that we can switch over to instead + auto NewCodeBuffer = AllocateNewCodeBuffer(InitialCodeSize); + EmplaceNewCodeBuffer(NewCodeBuffer); } - } else { - // We have signal handlers that have generated code - // This means that we can not safely clear the code at this point in time - // Allocate some new code buffers that we can switch over to instead - auto NewCodeBuffer = AllocateNewCodeBuffer(InitialCodeSize); - EmplaceNewCodeBuffer(NewCodeBuffer); - } - return CurrentCodeBuffer; -} + return CurrentCodeBuffer; + } -auto CPUBackend::AllocateNewCodeBuffer(size_t Size) -> CodeBuffer { + auto CPUBackend::AllocateNewCodeBuffer(size_t Size) -> CodeBuffer { #ifndef _WIN32 // MDWE (Memory-Deny-Write-Execute) is a new Linux 6.3 feature. // It's equivalent to systemd's `MemoryDenyWriteExecute` but implemented entirely in the kernel. @@ -372,40 +343,39 @@ auto CPUBackend::AllocateNewCodeBuffer(size_t Size) -> CodeBuffer { #ifndef PR_GET_MDWE #define PR_GET_MDWE 66 #endif - int MDWE = ::prctl(PR_GET_MDWE, 0, 0, 0, 0); - if (MDWE != -1 && MDWE != 0) { - LogMan::Msg::EFmt("MDWE was set to 0x{:x} which means FEX can't allocate executable memory", MDWE); - } + int MDWE = ::prctl(PR_GET_MDWE, 0, 0, 0, 0); + if (MDWE != -1 && MDWE != 0) { + LogMan::Msg::EFmt("MDWE was set to 0x{:x} which means FEX can't allocate executable memory", MDWE); + } #endif - CodeBuffer Buffer; - Buffer.Size = Size; - Buffer.Ptr = static_cast( - FEXCore::Allocator::VirtualAlloc(Buffer.Size, true)); - LOGMAN_THROW_AA_FMT(!!Buffer.Ptr, "Couldn't allocate code buffer"); + CodeBuffer Buffer; + Buffer.Size = Size; + Buffer.Ptr = static_cast(FEXCore::Allocator::VirtualAlloc(Buffer.Size, true)); + LOGMAN_THROW_AA_FMT(!!Buffer.Ptr, "Couldn't allocate code buffer"); - if (static_cast(ThreadState->CTX)->Config.GlobalJITNaming()) { - static_cast(ThreadState->CTX)->Symbols.RegisterJITSpace(Buffer.Ptr, Buffer.Size); + if (static_cast(ThreadState->CTX)->Config.GlobalJITNaming()) { + static_cast(ThreadState->CTX)->Symbols.RegisterJITSpace(Buffer.Ptr, Buffer.Size); + } + return Buffer; } - return Buffer; -} -void CPUBackend::FreeCodeBuffer(CodeBuffer Buffer) { - FEXCore::Allocator::VirtualFree(Buffer.Ptr, Buffer.Size); -} + void CPUBackend::FreeCodeBuffer(CodeBuffer Buffer) { + FEXCore::Allocator::VirtualFree(Buffer.Ptr, Buffer.Size); + } -bool CPUBackend::IsAddressInCodeBuffer(uintptr_t Address) const { - for (auto &Buffer: CodeBuffers) { - auto start = (uintptr_t)Buffer.Ptr; - auto end = start + Buffer.Size; + bool CPUBackend::IsAddressInCodeBuffer(uintptr_t Address) const { + for (auto& Buffer : CodeBuffers) { + auto start = (uintptr_t)Buffer.Ptr; + auto end = start + Buffer.Size; - if (Address >= start && Address < end) { - return true; + if (Address >= start && Address < end) { + return true; + } } - } - return false; -} + return false; + } -} -} +} // namespace CPU +} // namespace FEXCore diff --git a/FEXCore/Source/Interface/Core/CPUID.cpp b/FEXCore/Source/Interface/Core/CPUID.cpp index 5be7b107f6..d6f9a18acd 100644 --- a/FEXCore/Source/Interface/Core/CPUID.cpp +++ b/FEXCore/Source/Interface/Core/CPUID.cpp @@ -71,53 +71,50 @@ namespace ProductNames { static const char ARM_Icestorm[] = "Apple Icestorm"; #else #endif -} +} // namespace ProductNames static uint32_t GetCPUID() { - uint32_t CPU{}; + uint32_t CPU {}; FHU::Syscalls::getcpu(&CPU, nullptr); return CPU; } #ifdef CPUID_AMD -constexpr uint32_t FAMILY_IDENTIFIER = - 0 | // Stepping - (0xA << 4) | // Model - (0xF << 8) | // Family ID - (0 << 12) | // Processor type - (0 << 16) | // Extended model ID - (1 << 20); // Extended family ID +constexpr uint32_t FAMILY_IDENTIFIER = 0 | // Stepping + (0xA << 4) | // Model + (0xF << 8) | // Family ID + (0 << 12) | // Processor type + (0 << 16) | // Extended model ID + (1 << 20); // Extended family ID #else -constexpr uint32_t FAMILY_IDENTIFIER = - 0 | // Stepping - (0x7 << 4) | // Model - (0x6 << 8) | // Family ID - (0 << 12) | // Processor type - (1 << 16) | // Extended model ID - (0x0 << 20); // Extended family ID +constexpr uint32_t FAMILY_IDENTIFIER = 0 | // Stepping + (0x7 << 4) | // Model + (0x6 << 8) | // Family ID + (0 << 12) | // Processor type + (1 << 16) | // Extended model ID + (0x0 << 20); // Extended family ID #endif #ifdef _M_ARM_64 uint32_t GetCycleCounterFrequency() { - uint64_t Result{}; - __asm("mrs %[Res], CNTFRQ_EL0" - : [Res] "=r" (Result)); + uint64_t Result {}; + __asm("mrs %[Res], CNTFRQ_EL0" : [Res] "=r"(Result)); return Result; } void CPUIDEmu::SetupHostHybridFlag() { PerCPUData.resize(Cores); - uint64_t MIDR{}; + uint64_t MIDR {}; for (size_t i = 0; i < Cores; ++i) { - std::error_code ec{}; + std::error_code ec {}; fextl::string MIDRPath = fextl::fmt::format("/sys/devices/system/cpu/cpu{}/regs/identification/midr_el1", i); std::array Data; // Needs to be a fixed size since depending on kernel it will try to read a full page of data and fail // Only read 18 bytes for a 64bit value prefixed with 0x if (FEXCore::FileLoading::LoadFileToBuffer(MIDRPath, Data) == sizeof(Data)) { - uint64_t NewMIDR{}; + uint64_t NewMIDR {}; std::string_view MIDRView(Data.data(), sizeof(Data)); if (FEXCore::StrConv::Conv(MIDRView, &NewMIDR)) { if (MIDR != 0 && MIDR != NewMIDR) { @@ -137,7 +134,7 @@ void CPUIDEmu::SetupHostHybridFlag() { uint8_t Implementer; uint16_t Part; bool DefaultBig; // Defaults to a big core - const char *ProductName{}; + const char* ProductName {}; }; // CPU priority order @@ -147,32 +144,32 @@ void CPUIDEmu::SetupHostHybridFlag() { // Typically big CPU cores {0x61, 0x023, 1, ProductNames::ARM_Firestorm}, // Apple M1 Firestorm - {0x41, 0xd82, 1, ProductNames::ARM_X4}, // X4 - {0x41, 0xd81, 1, ProductNames::ARM_A720}, // A720 - {0x41, 0xd4e, 1, ProductNames::ARM_X3}, // X3 - {0x41, 0xd4d, 1, ProductNames::ARM_A715}, // A715 - {0x41, 0xd4f, 1, ProductNames::ARM_V2}, // V2 - {0x41, 0xd49, 1, ProductNames::ARM_N2}, // N2 - {0x41, 0xd4b, 1, ProductNames::ARM_A78C}, // A78C - {0x41, 0xd4a, 1, ProductNames::ARM_E1}, // E1 - {0x41, 0xd49, 1, ProductNames::ARM_N2}, // N2 - {0x41, 0xd48, 1, ProductNames::ARM_X2}, // X2 - {0x41, 0xd47, 1, ProductNames::ARM_A710}, // A710 - {0x41, 0xd4C, 1, ProductNames::ARM_X1C}, // X1C - {0x41, 0xd44, 1, ProductNames::ARM_X1}, // X1 - {0x41, 0xd42, 1, ProductNames::ARM_A78AE}, // A78AE - {0x41, 0xd41, 1, ProductNames::ARM_A78}, // A78 - {0x41, 0xd40, 1, ProductNames::ARM_V1}, // V1 - {0x41, 0xd0e, 1, ProductNames::ARM_A76AE}, // A76AE - {0x41, 0xd0d, 1, ProductNames::ARM_A77}, // A77 - {0x41, 0xd0c, 1, ProductNames::ARM_N1}, // N1 - {0x41, 0xd0b, 1, ProductNames::ARM_A76}, // A76 + {0x41, 0xd82, 1, ProductNames::ARM_X4}, // X4 + {0x41, 0xd81, 1, ProductNames::ARM_A720}, // A720 + {0x41, 0xd4e, 1, ProductNames::ARM_X3}, // X3 + {0x41, 0xd4d, 1, ProductNames::ARM_A715}, // A715 + {0x41, 0xd4f, 1, ProductNames::ARM_V2}, // V2 + {0x41, 0xd49, 1, ProductNames::ARM_N2}, // N2 + {0x41, 0xd4b, 1, ProductNames::ARM_A78C}, // A78C + {0x41, 0xd4a, 1, ProductNames::ARM_E1}, // E1 + {0x41, 0xd49, 1, ProductNames::ARM_N2}, // N2 + {0x41, 0xd48, 1, ProductNames::ARM_X2}, // X2 + {0x41, 0xd47, 1, ProductNames::ARM_A710}, // A710 + {0x41, 0xd4C, 1, ProductNames::ARM_X1C}, // X1C + {0x41, 0xd44, 1, ProductNames::ARM_X1}, // X1 + {0x41, 0xd42, 1, ProductNames::ARM_A78AE}, // A78AE + {0x41, 0xd41, 1, ProductNames::ARM_A78}, // A78 + {0x41, 0xd40, 1, ProductNames::ARM_V1}, // V1 + {0x41, 0xd0e, 1, ProductNames::ARM_A76AE}, // A76AE + {0x41, 0xd0d, 1, ProductNames::ARM_A77}, // A77 + {0x41, 0xd0c, 1, ProductNames::ARM_N1}, // N1 + {0x41, 0xd0b, 1, ProductNames::ARM_A76}, // A76 {0x51, 0x804, 1, ProductNames::ARM_Kryo400}, // Kryo 4xx Gold (A76 based) - {0x41, 0xd0a, 1, ProductNames::ARM_A75}, // A75 + {0x41, 0xd0a, 1, ProductNames::ARM_A75}, // A75 {0x51, 0x802, 1, ProductNames::ARM_Kryo300}, // Kryo 3xx Gold (A75 based) - {0x41, 0xd09, 1, ProductNames::ARM_A73}, // A73 + {0x41, 0xd09, 1, ProductNames::ARM_A73}, // A73 {0x51, 0x800, 1, ProductNames::ARM_Kryo200}, // Kryo 2xx Gold (A73 based) - {0x41, 0xd08, 1, ProductNames::ARM_A72}, // A72 + {0x41, 0xd08, 1, ProductNames::ARM_A72}, // A72 {0x4e, 0x004, 1, ProductNames::ARM_Carmel}, // Carmel @@ -183,27 +180,26 @@ void CPUIDEmu::SetupHostHybridFlag() { // Typically Little CPU cores {0x61, 0x022, 0, ProductNames::ARM_Icestorm}, // Apple M1 Icestorm - {0x41, 0xd80, 0, ProductNames::ARM_A520}, // A520 - {0x41, 0xd46, 0, ProductNames::ARM_A510}, // A510 - {0x41, 0xd06, 0, ProductNames::ARM_A65}, // A65 - {0x41, 0xd05, 0, ProductNames::ARM_A55}, // A55 + {0x41, 0xd80, 0, ProductNames::ARM_A520}, // A520 + {0x41, 0xd46, 0, ProductNames::ARM_A510}, // A510 + {0x41, 0xd06, 0, ProductNames::ARM_A65}, // A65 + {0x41, 0xd05, 0, ProductNames::ARM_A55}, // A55 {0x51, 0x805, 0, ProductNames::ARM_Kryo400S}, // Kryo 4xx/5xx Silver (A55 based) {0x51, 0x803, 0, ProductNames::ARM_Kryo300S}, // Kryo 3xx Silver (A55 based) - {0x41, 0xd03, 0, ProductNames::ARM_A53}, // A53 + {0x41, 0xd03, 0, ProductNames::ARM_A53}, // A53 {0x51, 0x801, 0, ProductNames::ARM_Kryo200S}, // Kryo 2xx Silver (A53 based) - {0x41, 0xd04, 0, ProductNames::ARM_A35}, // A35 + {0x41, 0xd04, 0, ProductNames::ARM_A35}, // A35 {0x41, 0, 0, ProductNames::ARM_UNKNOWN}, // Invalid CPU or Apple CPU inside Parallels VM - {0x0, 0, 0, ProductNames::ARM_UNKNOWN}, // Invalid starting point is lowest ranked + {0x0, 0, 0, ProductNames::ARM_UNKNOWN}, // Invalid starting point is lowest ranked }}; auto FindDefinedMIDR = [](uint32_t MIDR) -> const CPUMIDR* { uint8_t Implementer = MIDR >> 24; uint16_t Part = (MIDR >> 4) & 0xFFF; - for (auto &MIDROption : CPUMIDRs) { - if (MIDROption.Implementer == Implementer && - MIDROption.Part == Part) { + for (auto& MIDROption : CPUMIDRs) { + if (MIDROption.Implementer == Implementer && MIDROption.Part == Part) { return &MIDROption; } } @@ -224,12 +220,10 @@ void CPUIDEmu::SetupHostHybridFlag() { // Found one if (MIDROption->DefaultBig) { BigCores.emplace_back(MIDROption); - } - else { + } else { LittleCores.emplace_back(MIDROption); } - } - else { + } else { // If we didn't insert this MIDR then claim it is a little core. LittleCores.emplace_back(&CPUMIDRs.back()); } @@ -246,11 +240,8 @@ void CPUIDEmu::SetupHostHybridFlag() { // Walk our list of CPUMIDRs to find the most little core for (size_t j = LowestMIDRIdx; j < CPUMIDRs.size(); ++j) { - auto &MIDROption = CPUMIDRs[i]; - if ((MIDROption.Implementer == Implementer && - MIDROption.Part == Part) || - (MIDROption.Implementer == 0 && - MIDROption.Part == 0)) { + auto& MIDROption = CPUMIDRs[i]; + if ((MIDROption.Implementer == Implementer && MIDROption.Part == Part) || (MIDROption.Implementer == 0 && MIDROption.Part == 0)) { LowestMIDRIdx = j; LowestMIDR = MIDR; @@ -261,13 +252,12 @@ void CPUIDEmu::SetupHostHybridFlag() { // Now we WILL have found a big core to demote to little status // Demote them - std::erase_if(BigCores, [&LittleCores, LowestMIDR](auto *Entry) { + std::erase_if(BigCores, [&LittleCores, LowestMIDR](auto* Entry) { // Demote by erase copy to little array uint8_t Implementer = LowestMIDR >> 24; uint16_t Part = (LowestMIDR >> 4) & 0xFFF; - if (Entry->Implementer == Implementer && - Entry->Part == Part) { + if (Entry->Implementer == Implementer && Entry->Part == Part) { // Add it to the BigCore list LittleCores.emplace_back(Entry); return true; @@ -281,13 +271,12 @@ void CPUIDEmu::SetupHostHybridFlag() { // Grab the first core, consider it as little, move everything else to Big uint32_t LittleMIDR = PerCPUData[0].MIDR; // Now walk the little cores and move them to Big if they don't match - std::erase_if(LittleCores, [&BigCores, LittleMIDR](auto *Entry) { + std::erase_if(LittleCores, [&BigCores, LittleMIDR](auto* Entry) { // You're promoted now uint8_t Implementer = LittleMIDR >> 24; uint16_t Part = (LittleMIDR >> 4) & 0xFFF; - if (Entry->Implementer != Implementer || - Entry->Part != Part) { + if (Entry->Implementer != Implementer || Entry->Part != Part) { // Add it to the BigCore list BigCores.emplace_back(Entry); return true; @@ -297,15 +286,14 @@ void CPUIDEmu::SetupHostHybridFlag() { } // Now walk the per CPU data one more time and set if it is big or little - for (auto &Data : PerCPUData) { + for (auto& Data : PerCPUData) { uint8_t Implementer = Data.MIDR >> 24; uint16_t Part = (Data.MIDR >> 4) & 0xFFF; - bool FoundBig{}; - const CPUMIDR *MIDR{}; + bool FoundBig {}; + const CPUMIDR* MIDR {}; for (auto Big : BigCores) { - if (Big->Implementer == Implementer && - Big->Part == Part) { + if (Big->Implementer == Implementer && Big->Part == Part) { FoundBig = true; MIDR = Big; break; @@ -314,8 +302,7 @@ void CPUIDEmu::SetupHostHybridFlag() { if (!FoundBig) { for (auto Little : LittleCores) { - if (Little->Implementer == Implementer && - Little->Part == Part) { + if (Little->Implementer == Implementer && Little->Part == Part) { MIDR = Little; break; } @@ -325,13 +312,11 @@ void CPUIDEmu::SetupHostHybridFlag() { Data.IsBig = FoundBig; if (MIDR) { Data.ProductName = MIDR->ProductName ?: ProductNames::ARM_UNKNOWN; - } - else { + } else { Data.ProductName = ProductNames::ARM_UNKNOWN; } } - } - else { + } else { // If we aren't hybrid then just claim everything is big for (size_t i = 0; i < Cores; ++i) { uint32_t MIDR = PerCPUData[i].MIDR; @@ -340,8 +325,7 @@ void CPUIDEmu::SetupHostHybridFlag() { PerCPUData[i].IsBig = true; if (MIDROption) { PerCPUData[i].ProductName = MIDROption->ProductName ?: ProductNames::ARM_UNKNOWN; - } - else { + } else { PerCPUData[i].ProductName = ProductNames::ARM_UNKNOWN; } } @@ -353,8 +337,7 @@ uint32_t GetCycleCounterFrequency() { return 0; } -void CPUIDEmu::SetupHostHybridFlag() { -} +void CPUIDEmu::SetupHostHybridFlag() {} #endif @@ -373,21 +356,21 @@ void CPUIDEmu::SetupFeatures() { } #define ENABLE_DISABLE_OPTION(FeatureName, name, enum_name) \ - do { \ + do { \ const bool Disable##name = (CPUIDFeatures() & FEXCore::Config::CPUID::DISABLE##enum_name) != 0; \ - const bool Enable##name = (CPUIDFeatures() & FEXCore::Config::CPUID::ENABLE##enum_name) != 0; \ - LogMan::Throw::AFmt(!(Disable##name && Enable##name), "Disabling and Enabling CPU feature (" #name ") is mutually exclusive"); \ - const bool AlreadyEnabled = Features.FeatureName; \ - const bool Result = (AlreadyEnabled | Enable##name) & !Disable##name; \ - Features.FeatureName = Result; \ - } while (0) + const bool Enable##name = (CPUIDFeatures() & FEXCore::Config::CPUID::ENABLE##enum_name) != 0; \ + LogMan::Throw::AFmt(!(Disable##name && Enable##name), "Disabling and Enabling CPU feature (" #name ") is mutually exclusive"); \ + const bool AlreadyEnabled = Features.FeatureName; \ + const bool Result = (AlreadyEnabled | Enable##name) & !Disable##name; \ + Features.FeatureName = Result; \ + } while (0) ENABLE_DISABLE_OPTION(SHA, SHA, SHA); #undef ENABLE_DISABLE_OPTION } FEXCore::CPUID::FunctionResults CPUIDEmu::Function_0h(uint32_t Leaf) const { - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; // EBX, EDX, ECX become the manufacturer id string #ifdef CPUID_AMD @@ -406,116 +389,106 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_0h(uint32_t Leaf) const { // Processor Info and Features bits FEXCore::CPUID::FunctionResults CPUIDEmu::Function_01h(uint32_t Leaf) const { - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; // Hypervisor bit is normally set but some applications have issues with it. uint32_t Hypervisor = HideHypervisorBit() ? 0 : 1; Res.eax = FAMILY_IDENTIFIER; - Res.ebx = 0 | // Brand index - (8 << 8) | // Cache line size in bytes - (Cores << 16) | // Number of addressable IDs for the logical cores in the physical CPU - (0 << 24); // Local APIC ID - - Res.ecx = - (1 << 0) | // SSE3 - (CTX->HostFeatures.SupportsPMULL_128Bit << 1) | // PCLMULQDQ - (1 << 2) | // DS area supports 64bit layout - (1 << 3) | // MWait - (0 << 4) | // DS-CPL - (0 << 5) | // VMX - (0 << 6) | // SMX - (0 << 7) | // Intel SpeedStep - (1 << 8) | // Thermal Monitor 2 - (1 << 9) | // SSSE3 - (0 << 10) | // L1 context ID - (0 << 11) | // Silicon debug - (0 << 12) | // FMA3 - (1 << 13) | // CMPXCHG16B - (0 << 14) | // xTPR update control - (0 << 15) | // Perfmon and debug capability - (0 << 16) | // Reserved - (0 << 17) | // Process-context identifiers - (0 << 18) | // Prefetching from memory mapped device - (1 << 19) | // SSE4.1 - (CTX->HostFeatures.SupportsCRC << 20) | // SSE4.2 - (0 << 21) | // X2APIC - (1 << 22) | // MOVBE - (1 << 23) | // POPCNT - (0 << 24) | // APIC TSC-Deadline - (CTX->HostFeatures.SupportsAES << 25) | // AES - (SupportsAVX() << 26) | // XSAVE - (SupportsAVX() << 27) | // OSXSAVE - (SupportsAVX() << 28) | // AVX - (0 << 29) | // F16C - (CTX->HostFeatures.SupportsRAND << 30) | // RDRAND - (Hypervisor << 31); - - Res.edx = - (1 << 0) | // FPU - (1 << 1) | // Virtual 8086 mode enhancements - (0 << 2) | // Debugging extensions - (0 << 3) | // Page size extension - (1 << 4) | // RDTSC supported - (1 << 5) | // MSR supported - (1 << 6) | // PAE - (1 << 7) | // Machine Check exception - (1 << 8) | // CMPXCHG8B - (1 << 9) | // APIC on-chip - (0 << 10) | // Reserved - (1 << 11) | // SYSENTER/SYSEXIT - (1 << 12) | // Memory Type Range registers, MTRRs are supported - (1 << 13) | // Page Global bit - (1 << 14) | // Machine Check architecture - (1 << 15) | // CMOV - (1 << 16) | // Page Attribute Table - (1 << 17) | // 36bit page size extension - (0 << 18) | // Processor serial number - (1 << 19) | // CLFLUSH - (0 << 20) | // Reserved - (0 << 21) | // Debug store - (0 << 22) | // Thermal monitor and software controled clock - (1 << 23) | // MMX - (1 << 24) | // FXSAVE/FXRSTOR - (1 << 25) | // SSE - (1 << 26) | // SSE2 - (0 << 27) | // Self Snoop - (1 << 28) | // Max APIC IDs reserved field is valid - (1 << 29) | // Thermal monitor - (0 << 30) | // Reserved - (0 << 31); // Pending break enable + Res.ebx = 0 | // Brand index + (8 << 8) | // Cache line size in bytes + (Cores << 16) | // Number of addressable IDs for the logical cores in the physical CPU + (0 << 24); // Local APIC ID + + Res.ecx = (1 << 0) | // SSE3 + (CTX->HostFeatures.SupportsPMULL_128Bit << 1) | // PCLMULQDQ + (1 << 2) | // DS area supports 64bit layout + (1 << 3) | // MWait + (0 << 4) | // DS-CPL + (0 << 5) | // VMX + (0 << 6) | // SMX + (0 << 7) | // Intel SpeedStep + (1 << 8) | // Thermal Monitor 2 + (1 << 9) | // SSSE3 + (0 << 10) | // L1 context ID + (0 << 11) | // Silicon debug + (0 << 12) | // FMA3 + (1 << 13) | // CMPXCHG16B + (0 << 14) | // xTPR update control + (0 << 15) | // Perfmon and debug capability + (0 << 16) | // Reserved + (0 << 17) | // Process-context identifiers + (0 << 18) | // Prefetching from memory mapped device + (1 << 19) | // SSE4.1 + (CTX->HostFeatures.SupportsCRC << 20) | // SSE4.2 + (0 << 21) | // X2APIC + (1 << 22) | // MOVBE + (1 << 23) | // POPCNT + (0 << 24) | // APIC TSC-Deadline + (CTX->HostFeatures.SupportsAES << 25) | // AES + (SupportsAVX() << 26) | // XSAVE + (SupportsAVX() << 27) | // OSXSAVE + (SupportsAVX() << 28) | // AVX + (0 << 29) | // F16C + (CTX->HostFeatures.SupportsRAND << 30) | // RDRAND + (Hypervisor << 31); + + Res.edx = (1 << 0) | // FPU + (1 << 1) | // Virtual 8086 mode enhancements + (0 << 2) | // Debugging extensions + (0 << 3) | // Page size extension + (1 << 4) | // RDTSC supported + (1 << 5) | // MSR supported + (1 << 6) | // PAE + (1 << 7) | // Machine Check exception + (1 << 8) | // CMPXCHG8B + (1 << 9) | // APIC on-chip + (0 << 10) | // Reserved + (1 << 11) | // SYSENTER/SYSEXIT + (1 << 12) | // Memory Type Range registers, MTRRs are supported + (1 << 13) | // Page Global bit + (1 << 14) | // Machine Check architecture + (1 << 15) | // CMOV + (1 << 16) | // Page Attribute Table + (1 << 17) | // 36bit page size extension + (0 << 18) | // Processor serial number + (1 << 19) | // CLFLUSH + (0 << 20) | // Reserved + (0 << 21) | // Debug store + (0 << 22) | // Thermal monitor and software controled clock + (1 << 23) | // MMX + (1 << 24) | // FXSAVE/FXRSTOR + (1 << 25) | // SSE + (1 << 26) | // SSE2 + (0 << 27) | // Self Snoop + (1 << 28) | // Max APIC IDs reserved field is valid + (1 << 29) | // Thermal monitor + (0 << 30) | // Reserved + (0 << 31); // Pending break enable return Res; } // 2: Cache and TLB information FEXCore::CPUID::FunctionResults CPUIDEmu::Function_02h(uint32_t Leaf) const { - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; // returns default values from i7 model 1Ah Res.eax = 0x1 | // Number of iterations needed for all descriptors - (0x5A << 8) | - (0x03 << 16) | - (0x55 << 24); + (0x5A << 8) | (0x03 << 16) | (0x55 << 24); - Res.ebx = 0xE4 | - (0xB2 << 8) | - (0xF0 << 16) | - (0 << 24); + Res.ebx = 0xE4 | (0xB2 << 8) | (0xF0 << 16) | (0 << 24); Res.ecx = 0; // null descriptors - Res.edx = 0x2C | - (0x21 << 8) | - (0xCA << 16) | - (0x09 << 24); + Res.edx = 0x2C | (0x21 << 8) | (0xCA << 16) | (0x09 << 24); return Res; } // 4: Deterministic cache parameters for each level FEXCore::CPUID::FunctionResults CPUIDEmu::Function_04h(uint32_t Leaf) const { - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; constexpr uint32_t CacheType_Data = 1; constexpr uint32_t CacheType_Instruction = 2; constexpr uint32_t CacheType_Unified = 3; @@ -524,111 +497,100 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_04h(uint32_t Leaf) const { // Report L1D uint32_t CoreCount = Cores - 1; - Res.eax = CacheType_Data | // Cache type - (0b001 << 5) | // Cache level - (1 << 8) | // Self initializing cache level - (0 << 9) | // Fully associative - (0 << 14) | // Maximum number of addressable IDs for logical processors sharing this cache (With SMT this would be 1) - (CoreCount << 26); // Maximum number of addressable IDs for processor cores in the physical package + Res.eax = CacheType_Data | // Cache type + (0b001 << 5) | // Cache level + (1 << 8) | // Self initializing cache level + (0 << 9) | // Fully associative + (0 << 14) | // Maximum number of addressable IDs for logical processors sharing this cache (With SMT this would be 1) + (CoreCount << 26); // Maximum number of addressable IDs for processor cores in the physical package - Res.ebx = - (63 << 0) | // Line Size - 1 : Claiming 64 byte - (0 << 12) | // Physical Line partitions - (7 << 22); // Associativity - 1 : Claiming 8 way + Res.ebx = (63 << 0) | // Line Size - 1 : Claiming 64 byte + (0 << 12) | // Physical Line partitions + (7 << 22); // Associativity - 1 : Claiming 8 way // 32KB Res.ecx = 63; // Number of sets - 1 : Claiming 64 sets - Res.edx = - (0 << 0) | // Write-back invalidate - (0 << 1) | // Cache inclusiveness - Includes lower caches - (0 << 2); // Complex cache indexing - 0: Direct, 1: Complex - } - else if (Leaf == 1) { + Res.edx = (0 << 0) | // Write-back invalidate + (0 << 1) | // Cache inclusiveness - Includes lower caches + (0 << 2); // Complex cache indexing - 0: Direct, 1: Complex + } else if (Leaf == 1) { // Report L1I uint32_t CoreCount = Cores - 1; Res.eax = CacheType_Instruction | // Cache type - (0b001 << 5) | // Cache level - (1 << 8) | // Self initializing cache level - (0 << 9) | // Fully associative - (0 << 14) | // Maximum number of addressable IDs for logical processors sharing this cache (With SMT this would be 1) - (CoreCount << 26); // Maximum number of addressable IDs for processor cores in the physical package + (0b001 << 5) | // Cache level + (1 << 8) | // Self initializing cache level + (0 << 9) | // Fully associative + (0 << 14) | // Maximum number of addressable IDs for logical processors sharing this cache (With SMT this would be 1) + (CoreCount << 26); // Maximum number of addressable IDs for processor cores in the physical package - Res.ebx = - (63 << 0) | // Line Size - 1 : Claiming 64 byte - (0 << 12) | // Physical Line partitions - (7 << 22); // Associativity - 1 : Claiming 8 way + Res.ebx = (63 << 0) | // Line Size - 1 : Claiming 64 byte + (0 << 12) | // Physical Line partitions + (7 << 22); // Associativity - 1 : Claiming 8 way // 32KB Res.ecx = 63; // Number of sets - 1 : Claiming 64 sets - Res.edx = - (0 << 0) | // Write-back invalidate - (0 << 1) | // Cache inclusiveness - Includes lower caches - (0 << 2); // Complex cache indexing - 0: Direct, 1: Complex - } - else if (Leaf == 2) { + Res.edx = (0 << 0) | // Write-back invalidate + (0 << 1) | // Cache inclusiveness - Includes lower caches + (0 << 2); // Complex cache indexing - 0: Direct, 1: Complex + } else if (Leaf == 2) { // Report L2 uint32_t CoreCount = Cores - 1; Res.eax = CacheType_Unified | // Cache type - (0b010 << 5) | // Cache level - (1 << 8) | // Self initializing cache level - (0 << 9) | // Fully associative - (0 << 14) | // Maximum number of addressable IDs for logical processors sharing this cache - (CoreCount << 26); // Maximum number of addressable IDs for processor cores in the physical package + (0b010 << 5) | // Cache level + (1 << 8) | // Self initializing cache level + (0 << 9) | // Fully associative + (0 << 14) | // Maximum number of addressable IDs for logical processors sharing this cache + (CoreCount << 26); // Maximum number of addressable IDs for processor cores in the physical package - Res.ebx = - (63 << 0) | // Line Size - 1 : Claiming 64 byte - (0 << 12) | // Physical Line partitions - (7 << 22); // Associativity - 1 : Claiming 8 way + Res.ebx = (63 << 0) | // Line Size - 1 : Claiming 64 byte + (0 << 12) | // Physical Line partitions + (7 << 22); // Associativity - 1 : Claiming 8 way // 512KB Res.ecx = 0x3FF; // Number of sets - 1 : Claiming 1024 sets - Res.edx = - (0 << 0) | // Write-back invalidate - (0 << 1) | // Cache inclusiveness - Includes lower caches - (0 << 2); // Complex cache indexing - 0: Direct, 1: Complex - } - else if (Leaf == 3) { + Res.edx = (0 << 0) | // Write-back invalidate + (0 << 1) | // Cache inclusiveness - Includes lower caches + (0 << 2); // Complex cache indexing - 0: Direct, 1: Complex + } else if (Leaf == 3) { // Report L3 uint32_t CoreCount = Cores - 1; Res.eax = CacheType_Unified | // Cache type - (0b011 << 5) | // Cache level - (1 << 8) | // Self initializing cache level - (0 << 9) | // Fully associative - (CoreCount << 14) | // Maximum number of addressable IDs for logical processors sharing this cache - (CoreCount << 26); // Maximum number of addressable IDs for processor cores in the physical package + (0b011 << 5) | // Cache level + (1 << 8) | // Self initializing cache level + (0 << 9) | // Fully associative + (CoreCount << 14) | // Maximum number of addressable IDs for logical processors sharing this cache + (CoreCount << 26); // Maximum number of addressable IDs for processor cores in the physical package - Res.ebx = - (63 << 0) | // Line Size - 1 : Claiming 64 byte - (0 << 12) | // Physical Line partitions - (7 << 22); // Associativity - 1 : Claiming 8 way + Res.ebx = (63 << 0) | // Line Size - 1 : Claiming 64 byte + (0 << 12) | // Physical Line partitions + (7 << 22); // Associativity - 1 : Claiming 8 way // 8MB Res.ecx = 0x4000; // Number of sets - 1 : Claiming 16384 sets - Res.edx = - (0 << 0) | // Write-back invalidate - (0 << 1) | // Cache inclusiveness - Includes lower caches - (1 << 2); // Complex cache indexing - 0: Direct, 1: Complex + Res.edx = (0 << 0) | // Write-back invalidate + (0 << 1) | // Cache inclusiveness - Includes lower caches + (1 << 2); // Complex cache indexing - 0: Direct, 1: Complex } return Res; } FEXCore::CPUID::FunctionResults CPUIDEmu::Function_06h(uint32_t Leaf) const { - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; Res.eax = (1 << 2); // Always running APIC Res.ecx = (0 << 3); // Intel performance energy bias preference (EPB) return Res; } FEXCore::CPUID::FunctionResults CPUIDEmu::Function_07h(uint32_t Leaf) const { - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; if (Leaf == 0) { // Disable Enhanced REP MOVS when TSO is enabled. // vcruntime140 memmove will use `rep movsb` in this case which completely destroys perf in Hades(appId 1145360) @@ -637,107 +599,104 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_07h(uint32_t Leaf) const { // Number of subfunctions Res.eax = 0x0; - Res.ebx = - (1 << 0) | // FS/GS support - (0 << 1) | // TSC adjust MSR - (0 << 2) | // SGX - (SupportsAVX() << 3) | // BMI1 - (0 << 4) | // Intel Hardware Lock Elison - (0 << 5) | // AVX2 support - (1 << 6) | // FPU data pointer updated only on exception - (1 << 7) | // SMEP support - (SupportsAVX() << 8) | // BMI2 - (SupportsEnhancedREPMOVS << 9) | // Enhanced REP MOVSB/STOSB - (1 << 10) | // INVPCID for system software control of process-context - (0 << 11) | // Restricted transactional memory - (0 << 12) | // Intel resource directory technology Monitoring - (1 << 13) | // Deprecates FPU CS and DS - (0 << 14) | // Intel MPX - (0 << 15) | // Intel Resource Directory Technology Allocation - (0 << 16) | // Reserved - (0 << 17) | // Reserved - (CTX->HostFeatures.SupportsRAND << 18) | // RDSEED - (1 << 19) | // ADCX and ADOX instructions - (0 << 20) | // SMAP Supervisor mode access prevention and CLAC/STAC instructions - (0 << 21) | // Reserved - (0 << 22) | // Reserved - (1 << 23) | // CLFLUSHOPT instruction - (CTX->HostFeatures.SupportsCLWB << 24) | // CLWB instruction - (0 << 25) | // Intel processor trace - (0 << 26) | // Reserved - (0 << 27) | // Reserved - (0 << 28) | // Reserved - (Features.SHA << 29) | // SHA instructions - (0 << 30) | // Reserved - (0 << 31); // Reserved - - Res.ecx = - (1 << 0) | // PREFETCHWT1 - (0 << 1) | // AVX512VBMI - (0 << 2) | // Usermode instruction prevention - (0 << 3) | // Protection keys for user mode pages - (0 << 4) | // OS protection keys - (0 << 5) | // waitpkg - (0 << 6) | // AVX512_VBMI2 - (0 << 7) | // CET shadow stack - (0 << 8) | // GFNI - (0 << 9) | // VAES - (0 << 10) | // VPCLMULQDQ - (0 << 11) | // AVX512_VNNI - (0 << 12) | // AVX512_BITALG - (0 << 13) | // Intel Total Memory Encryption - (0 << 14) | // AVX512_VPOPCNTDQ - (0 << 15) | // Reserved - (0 << 16) | // 5 Level page tables - (0 << 17) | // MPX MAWAU - (0 << 18) | // MPX MAWAU - (0 << 19) | // MPX MAWAU - (0 << 20) | // MPX MAWAU - (0 << 21) | // MPX MAWAU - (0 << 22) | // RDPID Read Processor ID - (0 << 23) | // Reserved - (0 << 24) | // Reserved - (0 << 25) | // CLDEMOTE - (0 << 26) | // Reserved - (0 << 27) | // MOVDIRI - (0 << 28) | // MOVDIR64B - (0 << 29) | // Reserved - (0 << 30) | // SGX Launch configuration - (0 << 31); // Reserved - - Res.edx = - (0 << 0) | // Reserved - (0 << 1) | // Reserved - (0 << 2) | // AVX512_4VNNIW - (0 << 3) | // AVX512_4FMAPS - (1 << 4) | // Fast Short Rep Mov - (0 << 5) | // Reserved - (0 << 6) | // Reserved - (0 << 7) | // Reserved - (0 << 8) | // AVX512_VP2INTERSECT - (0 << 9) | // SRBDS_CTRL (Special Register Buffer Data Sampling Mitigations) - (0 << 10) | // VERW clears CPU buffers - (0 << 11) | // Reserved - (0 << 12) | // Reserved - (0 << 13) | // TSX Force Abort (TSX will force abort if attempted) - (0 << 14) | // SERIALIZE instruction - ((Hybrid ? 1U : 0U) << 15) | // Hybrid - (0 << 16) | // TSXLDTRK (TSX Suspend load address tracking) - Allows untracked memory loads inside TSX region - (0 << 17) | // Reserved - (0 << 18) | // Intel PCONFIG - (0 << 19) | // Intel Architectural LBR - (0 << 20) | // Intel CET - (0 << 21) | // Reserved - (0 << 22) | // AMX-BF16 - Tile computation on bfloat16 - (0 << 23) | // AVX512_FP16 - FP16 AVX512 instructions - (0 << 24) | // AMX-tile - If AMX is implemented - (0 << 25) | // AMX-int8 - AMX on 8-bit integers - (0 << 26) | // IBRS_IBPB - Speculation control - (0 << 27) | // STIBP - Single Thread Indirect Branch Predictor, Part of IBC - (0 << 28) | // L1D Flush - (0 << 29) | // Arch capabilities - Speculative side channel mitigations - (0 << 30) | // Arch capabilities - MSR module specific - (0 << 31); // SSBD - Speculative Store Bypass Disable + Res.ebx = (1 << 0) | // FS/GS support + (0 << 1) | // TSC adjust MSR + (0 << 2) | // SGX + (SupportsAVX() << 3) | // BMI1 + (0 << 4) | // Intel Hardware Lock Elison + (0 << 5) | // AVX2 support + (1 << 6) | // FPU data pointer updated only on exception + (1 << 7) | // SMEP support + (SupportsAVX() << 8) | // BMI2 + (SupportsEnhancedREPMOVS << 9) | // Enhanced REP MOVSB/STOSB + (1 << 10) | // INVPCID for system software control of process-context + (0 << 11) | // Restricted transactional memory + (0 << 12) | // Intel resource directory technology Monitoring + (1 << 13) | // Deprecates FPU CS and DS + (0 << 14) | // Intel MPX + (0 << 15) | // Intel Resource Directory Technology Allocation + (0 << 16) | // Reserved + (0 << 17) | // Reserved + (CTX->HostFeatures.SupportsRAND << 18) | // RDSEED + (1 << 19) | // ADCX and ADOX instructions + (0 << 20) | // SMAP Supervisor mode access prevention and CLAC/STAC instructions + (0 << 21) | // Reserved + (0 << 22) | // Reserved + (1 << 23) | // CLFLUSHOPT instruction + (CTX->HostFeatures.SupportsCLWB << 24) | // CLWB instruction + (0 << 25) | // Intel processor trace + (0 << 26) | // Reserved + (0 << 27) | // Reserved + (0 << 28) | // Reserved + (Features.SHA << 29) | // SHA instructions + (0 << 30) | // Reserved + (0 << 31); // Reserved + + Res.ecx = (1 << 0) | // PREFETCHWT1 + (0 << 1) | // AVX512VBMI + (0 << 2) | // Usermode instruction prevention + (0 << 3) | // Protection keys for user mode pages + (0 << 4) | // OS protection keys + (0 << 5) | // waitpkg + (0 << 6) | // AVX512_VBMI2 + (0 << 7) | // CET shadow stack + (0 << 8) | // GFNI + (0 << 9) | // VAES + (0 << 10) | // VPCLMULQDQ + (0 << 11) | // AVX512_VNNI + (0 << 12) | // AVX512_BITALG + (0 << 13) | // Intel Total Memory Encryption + (0 << 14) | // AVX512_VPOPCNTDQ + (0 << 15) | // Reserved + (0 << 16) | // 5 Level page tables + (0 << 17) | // MPX MAWAU + (0 << 18) | // MPX MAWAU + (0 << 19) | // MPX MAWAU + (0 << 20) | // MPX MAWAU + (0 << 21) | // MPX MAWAU + (0 << 22) | // RDPID Read Processor ID + (0 << 23) | // Reserved + (0 << 24) | // Reserved + (0 << 25) | // CLDEMOTE + (0 << 26) | // Reserved + (0 << 27) | // MOVDIRI + (0 << 28) | // MOVDIR64B + (0 << 29) | // Reserved + (0 << 30) | // SGX Launch configuration + (0 << 31); // Reserved + + Res.edx = (0 << 0) | // Reserved + (0 << 1) | // Reserved + (0 << 2) | // AVX512_4VNNIW + (0 << 3) | // AVX512_4FMAPS + (1 << 4) | // Fast Short Rep Mov + (0 << 5) | // Reserved + (0 << 6) | // Reserved + (0 << 7) | // Reserved + (0 << 8) | // AVX512_VP2INTERSECT + (0 << 9) | // SRBDS_CTRL (Special Register Buffer Data Sampling Mitigations) + (0 << 10) | // VERW clears CPU buffers + (0 << 11) | // Reserved + (0 << 12) | // Reserved + (0 << 13) | // TSX Force Abort (TSX will force abort if attempted) + (0 << 14) | // SERIALIZE instruction + ((Hybrid ? 1U : 0U) << 15) | // Hybrid + (0 << 16) | // TSXLDTRK (TSX Suspend load address tracking) - Allows untracked memory loads inside TSX region + (0 << 17) | // Reserved + (0 << 18) | // Intel PCONFIG + (0 << 19) | // Intel Architectural LBR + (0 << 20) | // Intel CET + (0 << 21) | // Reserved + (0 << 22) | // AMX-BF16 - Tile computation on bfloat16 + (0 << 23) | // AVX512_FP16 - FP16 AVX512 instructions + (0 << 24) | // AMX-tile - If AMX is implemented + (0 << 25) | // AMX-int8 - AMX on 8-bit integers + (0 << 26) | // IBRS_IBPB - Speculation control + (0 << 27) | // STIBP - Single Thread Indirect Branch Predictor, Part of IBC + (0 << 28) | // L1D Flush + (0 << 29) | // Arch capabilities - Speculative side channel mitigations + (0 << 30) | // Arch capabilities - MSR module specific + (0 << 31); // SSBD - Speculative Store Bypass Disable } return Res; @@ -745,19 +704,18 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_07h(uint32_t Leaf) const { FEXCore::CPUID::FunctionResults CPUIDEmu::Function_0Dh(uint32_t Leaf) const { // Leaf 0 - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; uint32_t XFeatureSupportedSizeMax = SupportsAVX() ? 0x0000'0340 : 0x0000'0240; // XFeatureEnabledSizeMax: Legacy Header + FPU/SSE + AVX if (Leaf == 0) { // XFeatureSupportedMask[31:0] - Res.eax = - (1 << 0) | // X87 support - (1 << 1) | // 128-bit SSE support - (SupportsAVX() << 2) | // 256-bit AVX support - (0b00 << 3) | // MPX State - (0b000 << 5) | // AVX-512 state - (0 << 8) | // "Used for IA32_XSS" ... Used for what? - (0 << 9); // PKRU state + Res.eax = (1 << 0) | // X87 support + (1 << 1) | // 128-bit SSE support + (SupportsAVX() << 2) | // 256-bit AVX support + (0b00 << 3) | // MPX State + (0b000 << 5) | // AVX-512 state + (0 << 8) | // "Used for IA32_XSS" ... Used for what? + (0 << 9); // PKRU state // EBX and ECX doesn't need to match if a feature is supported but not enabled Res.ebx = XFeatureSupportedSizeMax; @@ -765,28 +723,24 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_0Dh(uint32_t Leaf) const { // XFeatureSupportedMask[63:32] Res.edx = 0; // Upper 32-bits of XFeatureSupportedMask - } - else if (Leaf == 1) { - Res.eax = - (0 << 0) | // XSAVEOPT - (0 << 1) | // XSAVEC (and XRSTOR) - (0 << 2) | // XGETBV - XGETBV with ECX=1 supported - (0 << 3); // XSAVES - XSAVES, XRSTORS, and IA32_XSS supported + } else if (Leaf == 1) { + Res.eax = (0 << 0) | // XSAVEOPT + (0 << 1) | // XSAVEC (and XRSTOR) + (0 << 2) | // XGETBV - XGETBV with ECX=1 supported + (0 << 3); // XSAVES - XSAVES, XRSTORS, and IA32_XSS supported // Same information as Leaf 0 for ebx Res.ebx = XFeatureSupportedSizeMax; // Lower supported 32bits of IA32_XSS MSR. IA32_XSS[n] can only be set to 1 if ECX[n] is 1 - Res.ecx = - (0b0000'0000 << 0) | // Used for XCR0 - (0 << 8) | // PT state - (0 << 9); // Used for XCR0 + Res.ecx = (0b0000'0000 << 0) | // Used for XCR0 + (0 << 8) | // PT state + (0 << 9); // Used for XCR0 // Upper supported 32bits of IA32_XSS MSR. IA32_XSS[n+32] can only be set to 1 if EDX[n] is 1 // Entirely reserved atm Res.edx = 0; - } - else if (Leaf == 2) { + } else if (Leaf == 2) { Res.eax = SupportsAVX() ? 0x0000'0100 : 0; // YmmSaveStateSize Res.ebx = SupportsAVX() ? 0x0000'0240 : 0; // YmmSaveStateOffset @@ -798,7 +752,7 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_0Dh(uint32_t Leaf) const { } FEXCore::CPUID::FunctionResults CPUIDEmu::Function_15h(uint32_t Leaf) const { - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; // TSC frequency = ECX * EBX / EAX uint32_t FrequencyHz = GetCycleCounterFrequency(); if (FrequencyHz) { @@ -810,10 +764,10 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_15h(uint32_t Leaf) const { } FEXCore::CPUID::FunctionResults CPUIDEmu::Function_1Ah(uint32_t Leaf) const { - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; if (Hybrid) { uint32_t CPU = GetCPUID(); - auto &Data = PerCPUData[CPU]; + auto& Data = PerCPUData[CPU]; // 0x40 is a big CPU // 0x20 is a little CPU Res.eax |= (Data.IsBig ? 0x40 : 0x20) << 24; @@ -823,7 +777,7 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_1Ah(uint32_t Leaf) const { // Hypervisor CPUID information leaf FEXCore::CPUID::FunctionResults CPUIDEmu::Function_4000_0000h(uint32_t Leaf) const { - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; // Maximum supported hypervisor leafs // We only expose the information leaf // @@ -845,7 +799,7 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_4000_0000h(uint32_t Leaf) con // Hypervisor CPUID information leaf FEXCore::CPUID::FunctionResults CPUIDEmu::Function_4000_0001h(uint32_t Leaf) const { - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; if (Leaf == 0) { // EAX[3:0] Is the host architecture that FEX is running under #ifdef _M_X86_64 @@ -864,7 +818,7 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_4000_0001h(uint32_t Leaf) con // Highest extended function implemented FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0000h(uint32_t Leaf) const { - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; Res.eax = 0x8000001F; // EBX, EDX, ECX become the manufacturer id string @@ -890,88 +844,84 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0001h(uint32_t Leaf) con #else constexpr uint32_t SUPPORTS_RDTSCP = 1; #endif - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; Res.eax = FAMILY_IDENTIFIER; - Res.ecx = - (1 << 0) | // LAHF/SAHF - (1 << 1) | // 0 = Single core product, 1 = multi core product - (0 << 2) | // SVM - (1 << 3) | // Extended APIC register space - (0 << 4) | // LOCK MOV CR0 means MOV CR8 - (1 << 5) | // ABM instructions - (0 << 6) | // SSE4a - (0 << 7) | // Misaligned SSE mode - (1 << 8) | // PREFETCHW - (0 << 9) | // OS visible workaround support - (0 << 10) | // Instruction based sampling support - (0 << 11) | // XOP - (0 << 12) | // SKINIT - (0 << 13) | // Watchdog timer support - (0 << 14) | // Reserved - (0 << 15) | // Lightweight profiling support - (0 << 16) | // FMA4 - (1 << 17) | // Translation cache extension - (0 << 18) | // Reserved - (0 << 19) | // Reserved - (0 << 20) | // Reserved - (0 << 21) | // Reserved - (0 << 22) | // Topology extensions support - (0 << 23) | // Core performance counter extensions - (0 << 24) | // NB performance counter extensions - (0 << 25) | // Reserved - (0 << 26) | // Data breakpoints extensions - (0 << 27) | // Performance TSC - (0 << 28) | // L2 perf counter extensions - (0 << 29) | // Reserved - (0 << 30) | // Reserved - (0 << 31); // Reserved - - Res.edx = - (1 << 0) | // FPU - (1 << 1) | // Virtual mode extensions - (1 << 2) | // Debugging extensions - (1 << 3) | // Page size extensions - (1 << 4) | // TSC - (1 << 5) | // MSR support - (1 << 6) | // PAE - (1 << 7) | // Machine Check Exception - (1 << 8) | // CMPXCHG8B - (1 << 9) | // APIC - (0 << 10) | // Reserved - (1 << 11) | // SYSCALL/SYSRET - (1 << 12) | // MTRR - (1 << 13) | // Page global extension - (1 << 14) | // Machine Check architecture - (1 << 15) | // CMOV - (1 << 16) | // Page attribute table - (1 << 17) | // Page-size extensions - (0 << 18) | // Reserved - (0 << 19) | // Reserved - (1 << 20) | // NX - (0 << 21) | // Reserved - (1 << 22) | // MMXExt - (1 << 23) | // MMX - (1 << 24) | // FXSAVE/FXRSTOR - (1 << 25) | // FXSAVE/FXRSTOR Optimizations - (0 << 26) | // 1 gigabit pages - (SUPPORTS_RDTSCP << 27) | // RDTSCP - (0 << 28) | // Reserved - (1 << 29) | // Long Mode - (1 << 30) | // 3DNow! Extensions - (1 << 31); // 3DNow! + Res.ecx = (1 << 0) | // LAHF/SAHF + (1 << 1) | // 0 = Single core product, 1 = multi core product + (0 << 2) | // SVM + (1 << 3) | // Extended APIC register space + (0 << 4) | // LOCK MOV CR0 means MOV CR8 + (1 << 5) | // ABM instructions + (0 << 6) | // SSE4a + (0 << 7) | // Misaligned SSE mode + (1 << 8) | // PREFETCHW + (0 << 9) | // OS visible workaround support + (0 << 10) | // Instruction based sampling support + (0 << 11) | // XOP + (0 << 12) | // SKINIT + (0 << 13) | // Watchdog timer support + (0 << 14) | // Reserved + (0 << 15) | // Lightweight profiling support + (0 << 16) | // FMA4 + (1 << 17) | // Translation cache extension + (0 << 18) | // Reserved + (0 << 19) | // Reserved + (0 << 20) | // Reserved + (0 << 21) | // Reserved + (0 << 22) | // Topology extensions support + (0 << 23) | // Core performance counter extensions + (0 << 24) | // NB performance counter extensions + (0 << 25) | // Reserved + (0 << 26) | // Data breakpoints extensions + (0 << 27) | // Performance TSC + (0 << 28) | // L2 perf counter extensions + (0 << 29) | // Reserved + (0 << 30) | // Reserved + (0 << 31); // Reserved + + Res.edx = (1 << 0) | // FPU + (1 << 1) | // Virtual mode extensions + (1 << 2) | // Debugging extensions + (1 << 3) | // Page size extensions + (1 << 4) | // TSC + (1 << 5) | // MSR support + (1 << 6) | // PAE + (1 << 7) | // Machine Check Exception + (1 << 8) | // CMPXCHG8B + (1 << 9) | // APIC + (0 << 10) | // Reserved + (1 << 11) | // SYSCALL/SYSRET + (1 << 12) | // MTRR + (1 << 13) | // Page global extension + (1 << 14) | // Machine Check architecture + (1 << 15) | // CMOV + (1 << 16) | // Page attribute table + (1 << 17) | // Page-size extensions + (0 << 18) | // Reserved + (0 << 19) | // Reserved + (1 << 20) | // NX + (0 << 21) | // Reserved + (1 << 22) | // MMXExt + (1 << 23) | // MMX + (1 << 24) | // FXSAVE/FXRSTOR + (1 << 25) | // FXSAVE/FXRSTOR Optimizations + (0 << 26) | // 1 gigabit pages + (SUPPORTS_RDTSCP << 27) | // RDTSCP + (0 << 28) | // Reserved + (1 << 29) | // Long Mode + (1 << 30) | // 3DNow! Extensions + (1 << 31); // 3DNow! return Res; } -constexpr char ProcessorBrand[32] = { - GIT_DESCRIBE_STRING -}; +constexpr char ProcessorBrand[32] = {GIT_DESCRIBE_STRING}; constexpr ssize_t DESCRIBE_STR_SIZE = std::char_traits::length(GIT_DESCRIBE_STRING); static_assert(DESCRIBE_STR_SIZE < 32); -//Processor brand string +// Processor brand string FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0002h(uint32_t Leaf) const { return Function_8000_0002h(Leaf, GetCPUID()); } @@ -985,140 +935,126 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0004h(uint32_t Leaf) con } FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0002h(uint32_t Leaf, uint32_t CPU) const { - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; memset(&Res, ' ', sizeof(FEXCore::CPUID::FunctionResults)); - memcpy(&Res, &ProcessorBrand[0], std::min(ssize_t{16L}, DESCRIBE_STR_SIZE)); + memcpy(&Res, &ProcessorBrand[0], std::min(ssize_t {16L}, DESCRIBE_STR_SIZE)); return Res; } FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0003h(uint32_t Leaf, uint32_t CPU) const { - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; memset(&Res, ' ', sizeof(FEXCore::CPUID::FunctionResults)); - memcpy(&Res, &ProcessorBrand[16], std::max(ssize_t{0L}, DESCRIBE_STR_SIZE - 16)); + memcpy(&Res, &ProcessorBrand[16], std::max(ssize_t {0L}, DESCRIBE_STR_SIZE - 16)); return Res; } FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0004h(uint32_t Leaf, uint32_t CPU) const { - FEXCore::CPUID::FunctionResults Res{}; - auto &Data = PerCPUData[CPU]; + FEXCore::CPUID::FunctionResults Res {}; + auto& Data = PerCPUData[CPU]; memcpy(&Res, Data.ProductName, std::min(strlen(Data.ProductName), sizeof(FEXCore::CPUID::FunctionResults))); return Res; } // L1 Cache and TLB identifiers FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0005h(uint32_t Leaf) const { - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; // L1 TLB Information for 2MB and 4MB pages - Res.eax = - (64 << 0) | // Number of TLB instruction entries - (255 << 8) | // instruction TLB associativity type (full) - (64 << 16) | // Number of TLB data entries - (255 << 24); // data TLB associativity type (full) + Res.eax = (64 << 0) | // Number of TLB instruction entries + (255 << 8) | // instruction TLB associativity type (full) + (64 << 16) | // Number of TLB data entries + (255 << 24); // data TLB associativity type (full) // L1 TLB Information for 4KB pages - Res.ebx = - (64 << 0) | // Number of TLB instruction entries - (255 << 8) | // instruction TLB associativity type (full) - (64 << 16) | // Number of TLB data entries - (255 << 24); // data TLB associativity type (full) + Res.ebx = (64 << 0) | // Number of TLB instruction entries + (255 << 8) | // instruction TLB associativity type (full) + (64 << 16) | // Number of TLB data entries + (255 << 24); // data TLB associativity type (full) // L1 data cache identifiers - Res.ecx = - (64 << 0) | // L1 data cache size line in bytes - (1 << 8) | // L1 data cachelines per tag - (8 << 16) | // L1 data cache associativity - (32 << 24); // L1 data cache size in KB + Res.ecx = (64 << 0) | // L1 data cache size line in bytes + (1 << 8) | // L1 data cachelines per tag + (8 << 16) | // L1 data cache associativity + (32 << 24); // L1 data cache size in KB // L1 instruction cache identifiers - Res.edx = - (64 << 0) | // L1 instruction cache line size in bytes - (1 << 8) | // L1 instruction cachelines per tag - (4 << 16) | // L1 instruction cache associativity - (64 << 24); // L1 instruction cache size in KB + Res.edx = (64 << 0) | // L1 instruction cache line size in bytes + (1 << 8) | // L1 instruction cachelines per tag + (4 << 16) | // L1 instruction cache associativity + (64 << 24); // L1 instruction cache size in KB return Res; } // L2 Cache identifiers FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0006h(uint32_t Leaf) const { - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; // L2 TLB Information for 2MB and 4MB pages - Res.eax = - (1024 << 0) | // Number of TLB instruction entries - (6 << 12) | // instruction TLB associativity type - (1536 << 16) | // Number of TLB data entries - (3 << 28); // data TLB associativity type + Res.eax = (1024 << 0) | // Number of TLB instruction entries + (6 << 12) | // instruction TLB associativity type + (1536 << 16) | // Number of TLB data entries + (3 << 28); // data TLB associativity type // L2 TLB Information for 4KB pages - Res.ebx = - (1024 << 0) | // Number of TLB instruction entries - (6 << 12) | // instruction TLB associativity type - (1536 << 16) | // Number of TLB data entries - (5 << 28); // data TLB associativity type + Res.ebx = (1024 << 0) | // Number of TLB instruction entries + (6 << 12) | // instruction TLB associativity type + (1536 << 16) | // Number of TLB data entries + (5 << 28); // data TLB associativity type // L2 cache identifiers - Res.ecx = - (64 << 0) | // cacheline size - (1 << 8) | // cachelines per tag - (6 << 12) | // cache associativity - (512 << 16); // L2 cache size in KB + Res.ecx = (64 << 0) | // cacheline size + (1 << 8) | // cachelines per tag + (6 << 12) | // cache associativity + (512 << 16); // L2 cache size in KB // L3 cache identifiers - Res.edx = - (64 << 0) | // cacheline size - (1 << 8) | // cachelines per tag - (6 << 12) | // cache associativity - (16 << 18); // L2 cache size in KB + Res.edx = (64 << 0) | // cacheline size + (1 << 8) | // cachelines per tag + (6 << 12) | // cache associativity + (16 << 18); // L2 cache size in KB return Res; } // Advanced power management FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0007h(uint32_t Leaf) const { - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; Res.eax = (1 << 2); // APIC timer not affected by p-state - Res.edx = - (1 << 8); // Invariant TSC + Res.edx = (1 << 8); // Invariant TSC return Res; } // Virtual and physical address sizes FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0008h(uint32_t Leaf) const { - FEXCore::CPUID::FunctionResults Res{}; - Res.eax = - (48 << 0) | // PhysAddrSize = 48-bit - (48 << 8) | // LinAddrSize = 48-bit - (0 << 16); // GuestPhysAddrSize == PhysAddrSize + FEXCore::CPUID::FunctionResults Res {}; + Res.eax = (48 << 0) | // PhysAddrSize = 48-bit + (48 << 8) | // LinAddrSize = 48-bit + (0 << 16); // GuestPhysAddrSize == PhysAddrSize - Res.ebx = - (0 << 2) | // XSaveErPtr: Saving and restoring error pointers - (0 << 1) | // IRPerf: Instructions retired count support - (CTX->HostFeatures.SupportsCLZERO << 0); // CLZERO support + Res.ebx = (0 << 2) | // XSaveErPtr: Saving and restoring error pointers + (0 << 1) | // IRPerf: Instructions retired count support + (CTX->HostFeatures.SupportsCLZERO << 0); // CLZERO support uint32_t CoreCount = Cores - 1; - Res.ecx = - (0 << 16) | // PerfTscSize: Performance timestamp count size - ((uint32_t)std::log2(CoreCount + 1) << 12) | // ApicIdSize: Number of bits in ApicID - (CoreCount << 0); // Count count subtract one + Res.ecx = (0 << 16) | // PerfTscSize: Performance timestamp count size + ((uint32_t)std::log2(CoreCount + 1) << 12) | // ApicIdSize: Number of bits in ApicID + (CoreCount << 0); // Count count subtract one return Res; } // TLB 1GB page identifiers FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0019h(uint32_t Leaf) const { - FEXCore::CPUID::FunctionResults Res{}; - Res.eax = - (0xF << 28) | // L1 DTLB associativity for 1GB pages - (64 << 16) | // L1 DTLB entry count for 1GB pages - (0xF << 12) | // L1 ITLB associativity for 1GB pages - (64 << 0); // L1 ITLB entry count for 1GB pages - - Res.ebx = - (0 << 28) | // L2 DTLB associativity for 1GB pages - (0 << 16) | // L2 DTLB entry count for 1GB pages - (0 << 12) | // L2 ITLB associativity for 1GB pages - (0 << 0); // L2 ITLB entry count for 1GB pages + FEXCore::CPUID::FunctionResults Res {}; + Res.eax = (0xF << 28) | // L1 DTLB associativity for 1GB pages + (64 << 16) | // L1 DTLB entry count for 1GB pages + (0xF << 12) | // L1 ITLB associativity for 1GB pages + (64 << 0); // L1 ITLB entry count for 1GB pages + + Res.ebx = (0 << 28) | // L2 DTLB associativity for 1GB pages + (0 << 16) | // L2 DTLB entry count for 1GB pages + (0 << 12) | // L2 ITLB associativity for 1GB pages + (0 << 0); // L2 ITLB entry count for 1GB pages return Res; } @@ -1127,7 +1063,7 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_001Dh(uint32_t Leaf) con // This is nearly a copy of CPUID function 4h // There are some minor changes though - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; constexpr uint32_t CacheType_Data = 1; constexpr uint32_t CacheType_Instruction = 2; constexpr uint32_t CacheType_Unified = 3; @@ -1135,97 +1071,86 @@ FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_001Dh(uint32_t Leaf) con if (Leaf == 0) { // Report L1D Res.eax = CacheType_Data | // Cache type - (0b001 << 5) | // Cache level - (1 << 8) | // Self initializing cache level - (0 << 9) | // Fully associative - (0 << 14); // Maximum number of addressable IDs for logical processors sharing this cache (With SMT this would be 1) + (0b001 << 5) | // Cache level + (1 << 8) | // Self initializing cache level + (0 << 9) | // Fully associative + (0 << 14); // Maximum number of addressable IDs for logical processors sharing this cache (With SMT this would be 1) - Res.ebx = - (63 << 0) | // Line Size - 1 : Claiming 64 byte - (0 << 12) | // Physical Line partitions - (7 << 22); // Associativity - 1 : Claiming 8 way + Res.ebx = (63 << 0) | // Line Size - 1 : Claiming 64 byte + (0 << 12) | // Physical Line partitions + (7 << 22); // Associativity - 1 : Claiming 8 way // 32KB Res.ecx = 63; // Number of sets - 1 : Claiming 64 sets - Res.edx = - (0 << 0) | // Write-back invalidate - (0 << 1); // Cache inclusiveness - Includes lower caches - } - else if (Leaf == 1) { + Res.edx = (0 << 0) | // Write-back invalidate + (0 << 1); // Cache inclusiveness - Includes lower caches + } else if (Leaf == 1) { // Report L1I Res.eax = CacheType_Instruction | // Cache type - (0b001 << 5) | // Cache level - (1 << 8) | // Self initializing cache level - (0 << 9) | // Fully associative - (0 << 14); // Maximum number of addressable IDs for logical processors sharing this cache (With SMT this would be 1) + (0b001 << 5) | // Cache level + (1 << 8) | // Self initializing cache level + (0 << 9) | // Fully associative + (0 << 14); // Maximum number of addressable IDs for logical processors sharing this cache (With SMT this would be 1) - Res.ebx = - (63 << 0) | // Line Size - 1 : Claiming 64 byte - (0 << 12) | // Physical Line partitions - (7 << 22); // Associativity - 1 : Claiming 8 way + Res.ebx = (63 << 0) | // Line Size - 1 : Claiming 64 byte + (0 << 12) | // Physical Line partitions + (7 << 22); // Associativity - 1 : Claiming 8 way // 32KB Res.ecx = 63; // Number of sets - 1 : Claiming 64 sets - Res.edx = - (0 << 0) | // Write-back invalidate - (0 << 1); // Cache inclusiveness - Includes lower caches - } - else if (Leaf == 2) { + Res.edx = (0 << 0) | // Write-back invalidate + (0 << 1); // Cache inclusiveness - Includes lower caches + } else if (Leaf == 2) { // Report L2 Res.eax = CacheType_Unified | // Cache type - (0b010 << 5) | // Cache level - (1 << 8) | // Self initializing cache level - (0 << 9) | // Fully associative - (0 << 14); // Maximum number of addressable IDs for logical processors sharing this cache + (0b010 << 5) | // Cache level + (1 << 8) | // Self initializing cache level + (0 << 9) | // Fully associative + (0 << 14); // Maximum number of addressable IDs for logical processors sharing this cache - Res.ebx = - (63 << 0) | // Line Size - 1 : Claiming 64 byte - (0 << 12) | // Physical Line partitions - (7 << 22); // Associativity - 1 : Claiming 8 way + Res.ebx = (63 << 0) | // Line Size - 1 : Claiming 64 byte + (0 << 12) | // Physical Line partitions + (7 << 22); // Associativity - 1 : Claiming 8 way // 512KB Res.ecx = 0x3FF; // Number of sets - 1 : Claiming 1024 sets - Res.edx = - (0 << 0) | // Write-back invalidate - (0 << 1); // Cache inclusiveness - Includes lower caches - } - else if (Leaf == 3) { + Res.edx = (0 << 0) | // Write-back invalidate + (0 << 1); // Cache inclusiveness - Includes lower caches + } else if (Leaf == 3) { // Report L3 uint32_t CoreCount = Cores - 1; Res.eax = CacheType_Unified | // Cache type - (0b011 << 5) | // Cache level - (1 << 8) | // Self initializing cache level - (0 << 9) | // Fully associative - (CoreCount << 14); // Maximum number of addressable IDs for logical processors sharing this cache + (0b011 << 5) | // Cache level + (1 << 8) | // Self initializing cache level + (0 << 9) | // Fully associative + (CoreCount << 14); // Maximum number of addressable IDs for logical processors sharing this cache - Res.ebx = - (63 << 0) | // Line Size - 1 : Claiming 64 byte - (0 << 12) | // Physical Line partitions - (7 << 22); // Associativity - 1 : Claiming 8 way + Res.ebx = (63 << 0) | // Line Size - 1 : Claiming 64 byte + (0 << 12) | // Physical Line partitions + (7 << 22); // Associativity - 1 : Claiming 8 way // 8MB Res.ecx = 0x4000; // Number of sets - 1 : Claiming 16384 sets - Res.edx = - (0 << 0) | // Write-back invalidate - (0 << 1); // Cache inclusiveness - Includes lower caches + Res.edx = (0 << 0) | // Write-back invalidate + (0 << 1); // Cache inclusiveness - Includes lower caches } return Res; } FEXCore::CPUID::FunctionResults CPUIDEmu::Function_Reserved(uint32_t Leaf) const { - FEXCore::CPUID::FunctionResults Res{}; + FEXCore::CPUID::FunctionResults Res {}; return Res; } FEXCore::CPUID::XCRResults CPUIDEmu::XCRFunction_0h() const { // This just returns XCR0 - FEXCore::CPUID::XCRResults Res{ + FEXCore::CPUID::XCRResults Res { .eax = static_cast(XCR0), .edx = static_cast(XCR0 >> 32), }; @@ -1233,7 +1158,7 @@ FEXCore::CPUID::XCRResults CPUIDEmu::XCRFunction_0h() const { return Res; } -CPUIDEmu::CPUIDEmu(FEXCore::Context::ContextImpl const *ctx) +CPUIDEmu::CPUIDEmu(const FEXCore::Context::ContextImpl* ctx) : CTX {ctx} { Cores = FEXCore::CPUInfo::CalculateNumberOfCPUs(); @@ -1242,5 +1167,4 @@ CPUIDEmu::CPUIDEmu(FEXCore::Context::ContextImpl const *ctx) SetupFeatures(); } -} - +} // namespace FEXCore diff --git a/FEXCore/Source/Interface/Core/Core.cpp b/FEXCore/Source/Interface/Core/Core.cpp index 680ba3b843..37419ed7f4 100644 --- a/FEXCore/Source/Interface/Core/Core.cpp +++ b/FEXCore/Source/Interface/Core/Core.cpp @@ -78,946 +78,923 @@ desc: Glues Frontend, OpDispatcher and IR Opts & Compilation, LookupCache, Dispa #include namespace FEXCore::Context { - ContextImpl::ContextImpl() +ContextImpl::ContextImpl() : CPUID {this} , IRCaptureCache {this} { #ifdef BLOCKSTATS - BlockData = std::make_unique(); + BlockData = std::make_unique(); #endif - if (Config.CacheObjectCodeCompilation() != FEXCore::Config::ConfigObjectCodeHandler::CONFIG_NONE) { - CodeObjectCacheService = fextl::make_unique(this); - } - if (!Config.Is64BitMode()) { - // When operating in 32-bit mode, the virtual memory we care about is only the lower 32-bits. - Config.VirtualMemSize = 1ULL << 32; - } + if (Config.CacheObjectCodeCompilation() != FEXCore::Config::ConfigObjectCodeHandler::CONFIG_NONE) { + CodeObjectCacheService = fextl::make_unique(this); + } + if (!Config.Is64BitMode()) { + // When operating in 32-bit mode, the virtual memory we care about is only the lower 32-bits. + Config.VirtualMemSize = 1ULL << 32; + } - if (Config.BlockJITNaming() || - Config.GlobalJITNaming() || - Config.LibraryJITNaming()) { - // Only initialize symbols file if enabled. Ensures we don't pollute /tmp with empty files. - Symbols.InitFile(); - } + if (Config.BlockJITNaming() || Config.GlobalJITNaming() || Config.LibraryJITNaming()) { + // Only initialize symbols file if enabled. Ensures we don't pollute /tmp with empty files. + Symbols.InitFile(); + } - if (FEXCore::GetCycleCounterFrequency() >= FEXCore::Context::TSC_SCALE_MAXIMUM) { - Config.SmallTSCScale = false; - } + if (FEXCore::GetCycleCounterFrequency() >= FEXCore::Context::TSC_SCALE_MAXIMUM) { + Config.SmallTSCScale = false; + } - // Track atomic TSO emulation configuration. - UpdateAtomicTSOEmulationConfig(); + // Track atomic TSO emulation configuration. + UpdateAtomicTSOEmulationConfig(); +} + +ContextImpl::~ContextImpl() { + { + if (CodeObjectCacheService) { + CodeObjectCacheService->Shutdown(); + } } +} - ContextImpl::~ContextImpl() { - { - if (CodeObjectCacheService) { - CodeObjectCacheService->Shutdown(); +uint64_t ContextImpl::RestoreRIPFromHostPC(FEXCore::Core::InternalThreadState* Thread, uint64_t HostPC) { + const auto Frame = Thread->CurrentFrame; + const uint64_t BlockBegin = Frame->State.InlineJITBlockHeader; + auto InlineHeader = reinterpret_cast(BlockBegin); + + if (InlineHeader) { + auto InlineTail = reinterpret_cast(Frame->State.InlineJITBlockHeader + InlineHeader->OffsetToBlockTail); + auto RIPEntries = reinterpret_cast( + Frame->State.InlineJITBlockHeader + InlineHeader->OffsetToBlockTail + InlineTail->OffsetToRIPEntries); + + // Check if the host PC is currently within a code block. + // If it is then RIP can be reconstructed from the beginning of the code block. + // This is currently as close as FEX can get RIP reconstructions. + if (HostPC >= reinterpret_cast(BlockBegin) && HostPC < reinterpret_cast(BlockBegin + InlineTail->Size)) { + + // Reconstruct RIP from JIT entries for this block. + uint64_t StartingHostPC = BlockBegin; + uint64_t StartingGuestRIP = InlineTail->RIP; + + for (uint32_t i = 0; i < InlineTail->NumberOfRIPEntries; ++i) { + const auto& RIPEntry = RIPEntries[i]; + if (HostPC >= (StartingHostPC + RIPEntry.HostPCOffset)) { + // We are beyond this entry, keep going forward. + StartingHostPC += RIPEntry.HostPCOffset; + StartingGuestRIP += RIPEntry.GuestRIPOffset; + } else { + // Passed where the Host PC is at. Break now. + break; + } } + return StartingGuestRIP; } } - uint64_t ContextImpl::RestoreRIPFromHostPC(FEXCore::Core::InternalThreadState *Thread, uint64_t HostPC) { - const auto Frame = Thread->CurrentFrame; - const uint64_t BlockBegin = Frame->State.InlineJITBlockHeader; - auto InlineHeader = reinterpret_cast(BlockBegin); + // Fallback to what is stored in the RIP currently. + return Frame->State.rip; +} - if (InlineHeader) { - auto InlineTail = reinterpret_cast(Frame->State.InlineJITBlockHeader + InlineHeader->OffsetToBlockTail); - auto RIPEntries = reinterpret_cast(Frame->State.InlineJITBlockHeader + InlineHeader->OffsetToBlockTail + InlineTail->OffsetToRIPEntries); +uint32_t ContextImpl::ReconstructCompactedEFLAGS(FEXCore::Core::InternalThreadState* Thread, bool WasInJIT, uint64_t* HostGPRs, uint64_t PSTATE) { + const auto Frame = Thread->CurrentFrame; + uint32_t EFLAGS {}; + + // Currently these flags just map 1:1 inside of the resulting value. + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_EFLAG_BITS; ++i) { + switch (i) { + case X86State::RFLAG_CF_RAW_LOC: + case X86State::RFLAG_PF_RAW_LOC: + case X86State::RFLAG_AF_RAW_LOC: + case X86State::RFLAG_ZF_RAW_LOC: + case X86State::RFLAG_SF_RAW_LOC: + case X86State::RFLAG_OF_RAW_LOC: + // Intentionally do nothing. + // These contain multiple bits which can corrupt other members when compacted. + break; + default: EFLAGS |= uint32_t {Frame->State.flags[i]} << i; break; + } + } - // Check if the host PC is currently within a code block. - // If it is then RIP can be reconstructed from the beginning of the code block. - // This is currently as close as FEX can get RIP reconstructions. - if (HostPC >= reinterpret_cast(BlockBegin) && - HostPC < reinterpret_cast(BlockBegin + InlineTail->Size)) { + uint32_t Packed_NZCV {}; + if (WasInJIT) { + // If we were in the JIT then NZCV is in the CPU's PSTATE object. + // Packed in to the same bit locations as RFLAG_NZCV_LOC. + Packed_NZCV = PSTATE; - // Reconstruct RIP from JIT entries for this block. - uint64_t StartingHostPC = BlockBegin; - uint64_t StartingGuestRIP = InlineTail->RIP; + // If we were in the JIT then PF and AF are in registers. + // Move them to the CPUState frame now. + Frame->State.pf_raw = HostGPRs[CPU::REG_PF.Idx()]; + Frame->State.af_raw = HostGPRs[CPU::REG_AF.Idx()]; + } else { + // If we were not in the JIT then the NZCV state is stored in the CPUState RFLAG_NZCV_LOC. + // SF/ZF/CF/OF are packed in a 32-bit value in RFLAG_NZCV_LOC. + memcpy(&Packed_NZCV, &Frame->State.flags[X86State::RFLAG_NZCV_LOC], sizeof(Packed_NZCV)); + } - for (uint32_t i = 0; i < InlineTail->NumberOfRIPEntries; ++i) { - const auto &RIPEntry = RIPEntries[i]; - if (HostPC >= (StartingHostPC + RIPEntry.HostPCOffset)) { - // We are beyond this entry, keep going forward. - StartingHostPC += RIPEntry.HostPCOffset; - StartingGuestRIP += RIPEntry.GuestRIPOffset; - } - else { - // Passed where the Host PC is at. Break now. - break; - } - } - return StartingGuestRIP; - } - } + uint32_t OF = (Packed_NZCV >> IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_OF_RAW_LOC)) & 1; + uint32_t CF = (Packed_NZCV >> IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_CF_RAW_LOC)) & 1; + uint32_t ZF = (Packed_NZCV >> IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_ZF_RAW_LOC)) & 1; + uint32_t SF = (Packed_NZCV >> IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_SF_RAW_LOC)) & 1; - // Fallback to what is stored in the RIP currently. - return Frame->State.rip; - } - - uint32_t ContextImpl::ReconstructCompactedEFLAGS(FEXCore::Core::InternalThreadState *Thread, bool WasInJIT, uint64_t *HostGPRs, uint64_t PSTATE) { - const auto Frame = Thread->CurrentFrame; - uint32_t EFLAGS{}; - - // Currently these flags just map 1:1 inside of the resulting value. - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_EFLAG_BITS; ++i) { - switch (i) { - case X86State::RFLAG_CF_RAW_LOC: - case X86State::RFLAG_PF_RAW_LOC: - case X86State::RFLAG_AF_RAW_LOC: - case X86State::RFLAG_ZF_RAW_LOC: - case X86State::RFLAG_SF_RAW_LOC: - case X86State::RFLAG_OF_RAW_LOC: - // Intentionally do nothing. - // These contain multiple bits which can corrupt other members when compacted. - break; - default: - EFLAGS |= uint32_t{Frame->State.flags[i]} << i; - break; - } - } + // Pack in to EFLAGS + EFLAGS |= OF << X86State::RFLAG_OF_RAW_LOC; + EFLAGS |= CF << X86State::RFLAG_CF_RAW_LOC; + EFLAGS |= ZF << X86State::RFLAG_ZF_RAW_LOC; + EFLAGS |= SF << X86State::RFLAG_SF_RAW_LOC; - uint32_t Packed_NZCV{}; - if (WasInJIT) { - // If we were in the JIT then NZCV is in the CPU's PSTATE object. - // Packed in to the same bit locations as RFLAG_NZCV_LOC. - Packed_NZCV = PSTATE; + // PF calculation is deferred, calculate it now. + // Popcount the 8-bit flag and then extract the lower bit. + uint32_t PFByte = Frame->State.pf_raw & 0xff; + uint32_t PF = std::popcount(PFByte ^ 1) & 1; + EFLAGS |= PF << X86State::RFLAG_PF_RAW_LOC; - // If we were in the JIT then PF and AF are in registers. - // Move them to the CPUState frame now. - Frame->State.pf_raw = HostGPRs[CPU::REG_PF.Idx()]; - Frame->State.af_raw = HostGPRs[CPU::REG_AF.Idx()]; - } - else { - // If we were not in the JIT then the NZCV state is stored in the CPUState RFLAG_NZCV_LOC. - // SF/ZF/CF/OF are packed in a 32-bit value in RFLAG_NZCV_LOC. - memcpy(&Packed_NZCV, &Frame->State.flags[X86State::RFLAG_NZCV_LOC], sizeof(Packed_NZCV)); - } + // AF calculation is deferred, calculate it now. + // XOR with PF byte and extract bit 4. + uint32_t AF = ((Frame->State.af_raw ^ PFByte) & (1 << 4)) ? 1 : 0; + EFLAGS |= AF << X86State::RFLAG_AF_RAW_LOC; - uint32_t OF = (Packed_NZCV >> IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_OF_RAW_LOC)) & 1; - uint32_t CF = (Packed_NZCV >> IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_CF_RAW_LOC)) & 1; - uint32_t ZF = (Packed_NZCV >> IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_ZF_RAW_LOC)) & 1; - uint32_t SF = (Packed_NZCV >> IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_SF_RAW_LOC)) & 1; - - // Pack in to EFLAGS - EFLAGS |= OF << X86State::RFLAG_OF_RAW_LOC; - EFLAGS |= CF << X86State::RFLAG_CF_RAW_LOC; - EFLAGS |= ZF << X86State::RFLAG_ZF_RAW_LOC; - EFLAGS |= SF << X86State::RFLAG_SF_RAW_LOC; - - // PF calculation is deferred, calculate it now. - // Popcount the 8-bit flag and then extract the lower bit. - uint32_t PFByte = Frame->State.pf_raw & 0xff; - uint32_t PF = std::popcount(PFByte ^ 1) & 1; - EFLAGS |= PF << X86State::RFLAG_PF_RAW_LOC; - - // AF calculation is deferred, calculate it now. - // XOR with PF byte and extract bit 4. - uint32_t AF = ((Frame->State.af_raw ^ PFByte) & (1 << 4)) ? 1 : 0; - EFLAGS |= AF << X86State::RFLAG_AF_RAW_LOC; - - return EFLAGS; - } - - void ContextImpl::SetFlagsFromCompactedEFLAGS(FEXCore::Core::InternalThreadState *Thread, uint32_t EFLAGS) { - const auto Frame = Thread->CurrentFrame; - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_EFLAG_BITS; ++i) { - switch (i) { - case X86State::RFLAG_OF_RAW_LOC: - case X86State::RFLAG_CF_RAW_LOC: - case X86State::RFLAG_ZF_RAW_LOC: - case X86State::RFLAG_SF_RAW_LOC: - // Intentionally do nothing. - break; - case X86State::RFLAG_AF_RAW_LOC: - // AF stored in bit 4 in our internal representation. It is also - // XORed with byte 4 of the PF byte, but we write that as zero here so - // we don't need any special handling for that. - Frame->State.af_raw = (EFLAGS & (1U << i)) ? (1 << 4) : 0; - break; - case X86State::RFLAG_PF_RAW_LOC: - // PF is inverted in our internal representation. - Frame->State.pf_raw = (EFLAGS & (1U << i)) ? 0 : 1; - break; - default: - Frame->State.flags[i] = (EFLAGS & (1U << i)) ? 1 : 0; - break; - } - } + return EFLAGS; +} - // Calculate packed NZCV - uint32_t Packed_NZCV{}; - Packed_NZCV |= (EFLAGS & (1U << X86State::RFLAG_OF_RAW_LOC)) ? 1U << IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_OF_RAW_LOC) : 0; - Packed_NZCV |= (EFLAGS & (1U << X86State::RFLAG_CF_RAW_LOC)) ? 1U << IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_CF_RAW_LOC) : 0; - Packed_NZCV |= (EFLAGS & (1U << X86State::RFLAG_ZF_RAW_LOC)) ? 1U << IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_ZF_RAW_LOC) : 0; - Packed_NZCV |= (EFLAGS & (1U << X86State::RFLAG_SF_RAW_LOC)) ? 1U << IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_SF_RAW_LOC) : 0; - memcpy(&Frame->State.flags[X86State::RFLAG_NZCV_LOC], &Packed_NZCV, sizeof(Packed_NZCV)); - - // Reserved, Read-As-1, Write-as-1 - Frame->State.flags[X86State::RFLAG_RESERVED_LOC] = 1; - // Interrupt Flag. Can't be written by CPL-3 userland. - Frame->State.flags[X86State::RFLAG_IF_LOC] = 1; - } - - bool ContextImpl::InitCore() { - // Initialize the CPU core signal handlers & DispatcherConfig - switch (Config.Core) { - case FEXCore::Config::CONFIG_IRJIT: - BackendFeatures = FEXCore::CPU::GetArm64JITBackendFeatures(); +void ContextImpl::SetFlagsFromCompactedEFLAGS(FEXCore::Core::InternalThreadState* Thread, uint32_t EFLAGS) { + const auto Frame = Thread->CurrentFrame; + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_EFLAG_BITS; ++i) { + switch (i) { + case X86State::RFLAG_OF_RAW_LOC: + case X86State::RFLAG_CF_RAW_LOC: + case X86State::RFLAG_ZF_RAW_LOC: + case X86State::RFLAG_SF_RAW_LOC: + // Intentionally do nothing. + break; + case X86State::RFLAG_AF_RAW_LOC: + // AF stored in bit 4 in our internal representation. It is also + // XORed with byte 4 of the PF byte, but we write that as zero here so + // we don't need any special handling for that. + Frame->State.af_raw = (EFLAGS & (1U << i)) ? (1 << 4) : 0; break; - case FEXCore::Config::CONFIG_CUSTOM: - // Do nothing + case X86State::RFLAG_PF_RAW_LOC: + // PF is inverted in our internal representation. + Frame->State.pf_raw = (EFLAGS & (1U << i)) ? 0 : 1; break; - default: - LogMan::Msg::EFmt("Unknown core configuration"); - return false; + default: Frame->State.flags[i] = (EFLAGS & (1U << i)) ? 1 : 0; break; } + } + + // Calculate packed NZCV + uint32_t Packed_NZCV {}; + Packed_NZCV |= (EFLAGS & (1U << X86State::RFLAG_OF_RAW_LOC)) ? 1U << IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_OF_RAW_LOC) : 0; + Packed_NZCV |= (EFLAGS & (1U << X86State::RFLAG_CF_RAW_LOC)) ? 1U << IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_CF_RAW_LOC) : 0; + Packed_NZCV |= (EFLAGS & (1U << X86State::RFLAG_ZF_RAW_LOC)) ? 1U << IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_ZF_RAW_LOC) : 0; + Packed_NZCV |= (EFLAGS & (1U << X86State::RFLAG_SF_RAW_LOC)) ? 1U << IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_SF_RAW_LOC) : 0; + memcpy(&Frame->State.flags[X86State::RFLAG_NZCV_LOC], &Packed_NZCV, sizeof(Packed_NZCV)); - Dispatcher = FEXCore::CPU::Dispatcher::Create(this); + // Reserved, Read-As-1, Write-as-1 + Frame->State.flags[X86State::RFLAG_RESERVED_LOC] = 1; + // Interrupt Flag. Can't be written by CPL-3 userland. + Frame->State.flags[X86State::RFLAG_IF_LOC] = 1; +} - // Set up the SignalDelegator config since core is initialized. - FEXCore::SignalDelegator::SignalDelegatorConfig SignalConfig { - .SupportsAVX = HostFeatures.SupportsAVX, +bool ContextImpl::InitCore() { + // Initialize the CPU core signal handlers & DispatcherConfig + switch (Config.Core) { + case FEXCore::Config::CONFIG_IRJIT: BackendFeatures = FEXCore::CPU::GetArm64JITBackendFeatures(); break; + case FEXCore::Config::CONFIG_CUSTOM: + // Do nothing + break; + default: LogMan::Msg::EFmt("Unknown core configuration"); return false; + } - .DispatcherBegin = Dispatcher->Start, - .DispatcherEnd = Dispatcher->End, + Dispatcher = FEXCore::CPU::Dispatcher::Create(this); - .AbsoluteLoopTopAddress = Dispatcher->AbsoluteLoopTopAddress, - .AbsoluteLoopTopAddressFillSRA = Dispatcher->AbsoluteLoopTopAddressFillSRA, - .SignalHandlerReturnAddress = Dispatcher->SignalHandlerReturnAddress, - .SignalHandlerReturnAddressRT = Dispatcher->SignalHandlerReturnAddressRT, + // Set up the SignalDelegator config since core is initialized. + FEXCore::SignalDelegator::SignalDelegatorConfig SignalConfig { + .SupportsAVX = HostFeatures.SupportsAVX, - .PauseReturnInstruction = Dispatcher->PauseReturnInstruction, - .ThreadPauseHandlerAddressSpillSRA = Dispatcher->ThreadPauseHandlerAddressSpillSRA, - .ThreadPauseHandlerAddress = Dispatcher->ThreadPauseHandlerAddress, + .DispatcherBegin = Dispatcher->Start, + .DispatcherEnd = Dispatcher->End, - // Stop handlers. - .ThreadStopHandlerAddressSpillSRA = Dispatcher->ThreadStopHandlerAddressSpillSRA, - .ThreadStopHandlerAddress = Dispatcher->ThreadStopHandlerAddress, + .AbsoluteLoopTopAddress = Dispatcher->AbsoluteLoopTopAddress, + .AbsoluteLoopTopAddressFillSRA = Dispatcher->AbsoluteLoopTopAddressFillSRA, + .SignalHandlerReturnAddress = Dispatcher->SignalHandlerReturnAddress, + .SignalHandlerReturnAddressRT = Dispatcher->SignalHandlerReturnAddressRT, - // SRA information. - .SRAGPRCount = Dispatcher->GetSRAGPRCount(), - .SRAFPRCount = Dispatcher->GetSRAFPRCount(), - }; + .PauseReturnInstruction = Dispatcher->PauseReturnInstruction, + .ThreadPauseHandlerAddressSpillSRA = Dispatcher->ThreadPauseHandlerAddressSpillSRA, + .ThreadPauseHandlerAddress = Dispatcher->ThreadPauseHandlerAddress, + + // Stop handlers. + .ThreadStopHandlerAddressSpillSRA = Dispatcher->ThreadStopHandlerAddressSpillSRA, + .ThreadStopHandlerAddress = Dispatcher->ThreadStopHandlerAddress, + + // SRA information. + .SRAGPRCount = Dispatcher->GetSRAGPRCount(), + .SRAFPRCount = Dispatcher->GetSRAFPRCount(), + }; - Dispatcher->GetSRAGPRMapping(SignalConfig.SRAGPRMapping); - Dispatcher->GetSRAFPRMapping(SignalConfig.SRAFPRMapping); + Dispatcher->GetSRAGPRMapping(SignalConfig.SRAGPRMapping); + Dispatcher->GetSRAFPRMapping(SignalConfig.SRAFPRMapping); - // Give this configuration to the SignalDelegator. - SignalDelegation->SetConfig(SignalConfig); + // Give this configuration to the SignalDelegator. + SignalDelegation->SetConfig(SignalConfig); #ifndef _WIN32 - ThunkHandler = FEXCore::ThunkHandler::Create(); + ThunkHandler = FEXCore::ThunkHandler::Create(); #else - // WIN32 always needs the interrupt fault check to be enabled. - Config.NeedsPendingInterruptFaultCheck = true; + // WIN32 always needs the interrupt fault check to be enabled. + Config.NeedsPendingInterruptFaultCheck = true; #endif - if (Config.GdbServer) { - // If gdbserver is enabled then this needs to be enabled. - Config.NeedsPendingInterruptFaultCheck = true; - // FEX needs to start paused when gdb is enabled. - StartPaused = true; - } - - return true; + if (Config.GdbServer) { + // If gdbserver is enabled then this needs to be enabled. + Config.NeedsPendingInterruptFaultCheck = true; + // FEX needs to start paused when gdb is enabled. + StartPaused = true; } - void ContextImpl::HandleCallback(FEXCore::Core::InternalThreadState *Thread, uint64_t RIP) { - static_cast(Thread->CTX)->Dispatcher->ExecuteJITCallback(Thread->CurrentFrame, RIP); - } + return true; +} - FEXCore::Context::ExitReason ContextImpl::RunUntilExit(FEXCore::Core::InternalThreadState *Thread) { - ExecutionThread(Thread); - while(true) { - auto reason = Thread->ExitReason; +void ContextImpl::HandleCallback(FEXCore::Core::InternalThreadState* Thread, uint64_t RIP) { + static_cast(Thread->CTX)->Dispatcher->ExecuteJITCallback(Thread->CurrentFrame, RIP); +} - // Don't return if a custom exit handling the exit - if (!CustomExitHandler || reason == ExitReason::EXIT_SHUTDOWN) { - return reason; - } +FEXCore::Context::ExitReason ContextImpl::RunUntilExit(FEXCore::Core::InternalThreadState* Thread) { + ExecutionThread(Thread); + while (true) { + auto reason = Thread->ExitReason; + + // Don't return if a custom exit handling the exit + if (!CustomExitHandler || reason == ExitReason::EXIT_SHUTDOWN) { + return reason; } } +} - void ContextImpl::ExecuteThread(FEXCore::Core::InternalThreadState *Thread) { - Dispatcher->ExecuteDispatch(Thread->CurrentFrame); - } +void ContextImpl::ExecuteThread(FEXCore::Core::InternalThreadState* Thread) { + Dispatcher->ExecuteDispatch(Thread->CurrentFrame); +} - void ContextImpl::InitializeThreadTLSData(FEXCore::Core::InternalThreadState *Thread) { - // Let's do some initial bookkeeping here - Thread->ThreadManager.TID = FHU::Syscalls::gettid(); - Thread->ThreadManager.PID = ::getpid(); +void ContextImpl::InitializeThreadTLSData(FEXCore::Core::InternalThreadState* Thread) { + // Let's do some initial bookkeeping here + Thread->ThreadManager.TID = FHU::Syscalls::gettid(); + Thread->ThreadManager.PID = ::getpid(); - if (Config.BlockJITNaming() || - Config.GlobalJITNaming() || - Config.LibraryJITNaming()) { - // Allocate a TLS JIT symbol buffer only if enabled. - Thread->SymbolBuffer = JITSymbols::AllocateBuffer(); - } + if (Config.BlockJITNaming() || Config.GlobalJITNaming() || Config.LibraryJITNaming()) { + // Allocate a TLS JIT symbol buffer only if enabled. + Thread->SymbolBuffer = JITSymbols::AllocateBuffer(); + } - SignalDelegation->RegisterTLSState(Thread); - if (ThunkHandler) { - ThunkHandler->RegisterTLSState(Thread); - } + SignalDelegation->RegisterTLSState(Thread); + if (ThunkHandler) { + ThunkHandler->RegisterTLSState(Thread); + } #ifndef _WIN32 - Alloc::OSAllocator::RegisterTLSData(Thread); + Alloc::OSAllocator::RegisterTLSData(Thread); #endif - } - - void ContextImpl::InitializeCompiler(FEXCore::Core::InternalThreadState* Thread) { - Thread->OpDispatcher = fextl::make_unique(this); - Thread->OpDispatcher->SetMultiblock(Config.Multiblock); - Thread->LookupCache = fextl::make_unique(this); - Thread->FrontendDecoder = fextl::make_unique(this); - Thread->PassManager = fextl::make_unique(); +} - Thread->CurrentFrame->Pointers.Common.L1Pointer = Thread->LookupCache->GetL1Pointer(); - Thread->CurrentFrame->Pointers.Common.L2Pointer = Thread->LookupCache->GetPagePointer(); +void ContextImpl::InitializeCompiler(FEXCore::Core::InternalThreadState* Thread) { + Thread->OpDispatcher = fextl::make_unique(this); + Thread->OpDispatcher->SetMultiblock(Config.Multiblock); + Thread->LookupCache = fextl::make_unique(this); + Thread->FrontendDecoder = fextl::make_unique(this); + Thread->PassManager = fextl::make_unique(); - Dispatcher->InitThreadPointers(Thread); + Thread->CurrentFrame->Pointers.Common.L1Pointer = Thread->LookupCache->GetL1Pointer(); + Thread->CurrentFrame->Pointers.Common.L2Pointer = Thread->LookupCache->GetPagePointer(); - Thread->CTX = this; + Dispatcher->InitThreadPointers(Thread); - Thread->PassManager->AddDefaultPasses(this, Config.Core == FEXCore::Config::CONFIG_IRJIT); - Thread->PassManager->AddDefaultValidationPasses(); + Thread->CTX = this; - Thread->PassManager->RegisterSyscallHandler(SyscallHandler); + Thread->PassManager->AddDefaultPasses(this, Config.Core == FEXCore::Config::CONFIG_IRJIT); + Thread->PassManager->AddDefaultValidationPasses(); - // Create CPU backend - switch (Config.Core) { - case FEXCore::Config::CONFIG_IRJIT: - Thread->PassManager->InsertRegisterAllocationPass(HostFeatures.SupportsAVX); - Thread->CPUBackend = FEXCore::CPU::CreateArm64JITCore(this, Thread); - break; - case FEXCore::Config::CONFIG_CUSTOM: - Thread->CPUBackend = CustomCPUFactory(this, Thread); - break; - default: - ERROR_AND_DIE_FMT("Unknown core configuration"); - break; - } + Thread->PassManager->RegisterSyscallHandler(SyscallHandler); - Thread->PassManager->Finalize(); + // Create CPU backend + switch (Config.Core) { + case FEXCore::Config::CONFIG_IRJIT: + Thread->PassManager->InsertRegisterAllocationPass(HostFeatures.SupportsAVX); + Thread->CPUBackend = FEXCore::CPU::CreateArm64JITCore(this, Thread); + break; + case FEXCore::Config::CONFIG_CUSTOM: Thread->CPUBackend = CustomCPUFactory(this, Thread); break; + default: ERROR_AND_DIE_FMT("Unknown core configuration"); break; } - FEXCore::Core::InternalThreadState* ContextImpl::CreateThread(uint64_t InitialRIP, uint64_t StackPointer, FEXCore::Core::CPUState *NewThreadState, uint64_t ParentTID) { - FEXCore::Core::InternalThreadState *Thread = new FEXCore::Core::InternalThreadState{}; + Thread->PassManager->Finalize(); +} - Thread->CurrentFrame->State.gregs[X86State::REG_RSP] = StackPointer; - Thread->CurrentFrame->State.rip = InitialRIP; +FEXCore::Core::InternalThreadState* +ContextImpl::CreateThread(uint64_t InitialRIP, uint64_t StackPointer, FEXCore::Core::CPUState* NewThreadState, uint64_t ParentTID) { + FEXCore::Core::InternalThreadState* Thread = new FEXCore::Core::InternalThreadState {}; - // Copy over the new thread state to the new object - if (NewThreadState) { - memcpy(&Thread->CurrentFrame->State, NewThreadState, sizeof(FEXCore::Core::CPUState)); - } + Thread->CurrentFrame->State.gregs[X86State::REG_RSP] = StackPointer; + Thread->CurrentFrame->State.rip = InitialRIP; - // Set up the thread manager state - Thread->ThreadManager.parent_tid = ParentTID; - Thread->CurrentFrame->Thread = Thread; + // Copy over the new thread state to the new object + if (NewThreadState) { + memcpy(&Thread->CurrentFrame->State, NewThreadState, sizeof(FEXCore::Core::CPUState)); + } - InitializeCompiler(Thread); + // Set up the thread manager state + Thread->ThreadManager.parent_tid = ParentTID; + Thread->CurrentFrame->Thread = Thread; - Thread->CurrentFrame->State.DeferredSignalRefCount.Store(0); - Thread->CurrentFrame->State.DeferredSignalFaultAddress = reinterpret_cast*>(FEXCore::Allocator::VirtualAlloc(4096)); + InitializeCompiler(Thread); - return Thread; - } + Thread->CurrentFrame->State.DeferredSignalRefCount.Store(0); + Thread->CurrentFrame->State.DeferredSignalFaultAddress = + reinterpret_cast*>(FEXCore::Allocator::VirtualAlloc(4096)); + + return Thread; +} - void ContextImpl::DestroyThread(FEXCore::Core::InternalThreadState *Thread, bool NeedsTLSUninstall) { - if (NeedsTLSUninstall) { +void ContextImpl::DestroyThread(FEXCore::Core::InternalThreadState* Thread, bool NeedsTLSUninstall) { + if (NeedsTLSUninstall) { #ifndef _WIN32 - Alloc::OSAllocator::UninstallTLSData(Thread); + Alloc::OSAllocator::UninstallTLSData(Thread); #endif - SignalDelegation->UninstallTLSState(Thread); - } - - FEXCore::Allocator::VirtualFree(reinterpret_cast(Thread->CurrentFrame->State.DeferredSignalFaultAddress), 4096); - delete Thread; + SignalDelegation->UninstallTLSState(Thread); } + FEXCore::Allocator::VirtualFree(reinterpret_cast(Thread->CurrentFrame->State.DeferredSignalFaultAddress), 4096); + delete Thread; +} + #ifndef _WIN32 - void ContextImpl::UnlockAfterFork(FEXCore::Core::InternalThreadState *LiveThread, bool Child) { - Allocator::UnlockAfterFork(LiveThread, Child); +void ContextImpl::UnlockAfterFork(FEXCore::Core::InternalThreadState* LiveThread, bool Child) { + Allocator::UnlockAfterFork(LiveThread, Child); - if (Child) { - CodeInvalidationMutex.StealAndDropActiveLocks(); - } - else { - CodeInvalidationMutex.unlock(); - return; - } + if (Child) { + CodeInvalidationMutex.StealAndDropActiveLocks(); + } else { + CodeInvalidationMutex.unlock(); + return; } +} - void ContextImpl::LockBeforeFork(FEXCore::Core::InternalThreadState *Thread) { - CodeInvalidationMutex.lock(); - Allocator::LockBeforeFork(Thread); - } +void ContextImpl::LockBeforeFork(FEXCore::Core::InternalThreadState* Thread) { + CodeInvalidationMutex.lock(); + Allocator::LockBeforeFork(Thread); +} #endif - void ContextImpl::AddBlockMapping(FEXCore::Core::InternalThreadState *Thread, uint64_t Address, void *Ptr) { - Thread->LookupCache->AddBlockMapping(Address, Ptr); - } - - void ContextImpl::ClearCodeCache(FEXCore::Core::InternalThreadState *Thread) { - FEXCORE_PROFILE_INSTANT("ClearCodeCache"); +void ContextImpl::AddBlockMapping(FEXCore::Core::InternalThreadState* Thread, uint64_t Address, void* Ptr) { + Thread->LookupCache->AddBlockMapping(Address, Ptr); +} - { - // Ensure the Code Object Serialization service has fully serialized this thread's data before clearing the cache - // Use the thread's object cache ref counter for this - CodeSerialize::CodeObjectSerializeService::WaitForEmptyJobQueue(&Thread->ObjectCacheRefCounter); - } - std::lock_guard lk(Thread->LookupCache->WriteLock); +void ContextImpl::ClearCodeCache(FEXCore::Core::InternalThreadState* Thread) { + FEXCORE_PROFILE_INSTANT("ClearCodeCache"); - Thread->LookupCache->ClearCache(); - Thread->CPUBackend->ClearCache(); - Thread->DebugStore.clear(); + { + // Ensure the Code Object Serialization service has fully serialized this thread's data before clearing the cache + // Use the thread's object cache ref counter for this + CodeSerialize::CodeObjectSerializeService::WaitForEmptyJobQueue(&Thread->ObjectCacheRefCounter); } + std::lock_guard lk(Thread->LookupCache->WriteLock); - static void IRDumper(FEXCore::Core::InternalThreadState *Thread, IR::IREmitter *IREmitter, uint64_t GuestRIP, IR::RegisterAllocationData* RA) { - FEXCore::File::File FD = FEXCore::File::File::GetStdERR(); - fextl::stringstream out; - auto NewIR = IREmitter->ViewIR(); - FEXCore::IR::Dump(&out, &NewIR, RA); - fextl::fmt::print(FD, "IR-ShouldDump-{} 0x{:x}:\n{}\n@@@@@\n", RA ? "post" : "pre", GuestRIP, out.str()); - }; + Thread->LookupCache->ClearCache(); + Thread->CPUBackend->ClearCache(); + Thread->DebugStore.clear(); +} - static void ValidateIR(ContextImpl *ctx, IR::IREmitter *IREmitter) { - // Convert to text, Parse, Convert to text again and make sure the texts match - fextl::stringstream out; - static auto compaction = IR::CreateIRCompaction(ctx->OpDispatcherAllocator); - compaction->Run(IREmitter); - auto NewIR = IREmitter->ViewIR(); - Dump(&out, &NewIR, nullptr); - out.seekg(0); - FEXCore::Utils::PooledAllocatorMalloc Allocator; - auto reparsed = IR::Parse(Allocator, out); - if (reparsed == nullptr) { - LOGMAN_MSG_A_FMT("Failed to parse IR\n"); - } else { - fextl::stringstream out2; - auto NewIR2 = reparsed->ViewIR(); - Dump(&out2, &NewIR2, nullptr); - if (out.str() != out2.str()) { - LogMan::Msg::IFmt("one:\n {}", out.str()); - LogMan::Msg::IFmt("two:\n {}", out2.str()); - LOGMAN_MSG_A_FMT("Parsed IR doesn't match\n"); - } +static void IRDumper(FEXCore::Core::InternalThreadState* Thread, IR::IREmitter* IREmitter, uint64_t GuestRIP, IR::RegisterAllocationData* RA) { + FEXCore::File::File FD = FEXCore::File::File::GetStdERR(); + fextl::stringstream out; + auto NewIR = IREmitter->ViewIR(); + FEXCore::IR::Dump(&out, &NewIR, RA); + fextl::fmt::print(FD, "IR-ShouldDump-{} 0x{:x}:\n{}\n@@@@@\n", RA ? "post" : "pre", GuestRIP, out.str()); +}; + +static void ValidateIR(ContextImpl* ctx, IR::IREmitter* IREmitter) { + // Convert to text, Parse, Convert to text again and make sure the texts match + fextl::stringstream out; + static auto compaction = IR::CreateIRCompaction(ctx->OpDispatcherAllocator); + compaction->Run(IREmitter); + auto NewIR = IREmitter->ViewIR(); + Dump(&out, &NewIR, nullptr); + out.seekg(0); + FEXCore::Utils::PooledAllocatorMalloc Allocator; + auto reparsed = IR::Parse(Allocator, out); + if (reparsed == nullptr) { + LOGMAN_MSG_A_FMT("Failed to parse IR\n"); + } else { + fextl::stringstream out2; + auto NewIR2 = reparsed->ViewIR(); + Dump(&out2, &NewIR2, nullptr); + if (out.str() != out2.str()) { + LogMan::Msg::IFmt("one:\n {}", out.str()); + LogMan::Msg::IFmt("two:\n {}", out2.str()); + LOGMAN_MSG_A_FMT("Parsed IR doesn't match\n"); } } +} - ContextImpl::GenerateIRResult ContextImpl::GenerateIR(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP, bool ExtendedDebugInfo, uint64_t MaxInst) { - FEXCORE_PROFILE_SCOPED("GenerateIR"); +ContextImpl::GenerateIRResult +ContextImpl::GenerateIR(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestRIP, bool ExtendedDebugInfo, uint64_t MaxInst) { + FEXCORE_PROFILE_SCOPED("GenerateIR"); - Thread->OpDispatcher->ReownOrClaimBuffer(); - Thread->OpDispatcher->ResetWorkingList(); + Thread->OpDispatcher->ReownOrClaimBuffer(); + Thread->OpDispatcher->ResetWorkingList(); - uint64_t TotalInstructions {0}; - uint64_t TotalInstructionsLength {0}; + uint64_t TotalInstructions {0}; + uint64_t TotalInstructionsLength {0}; - bool HasCustomIR{}; + bool HasCustomIR {}; - if (HasCustomIRHandlers.load(std::memory_order_relaxed)) { - std::shared_lock lk(CustomIRMutex); - auto Handler = CustomIRHandlers.find(GuestRIP); - if (Handler != CustomIRHandlers.end()) { - TotalInstructions = 1; - TotalInstructionsLength = 1; - std::get<0>(Handler->second)(GuestRIP, Thread->OpDispatcher.get()); - HasCustomIR = true; - } + if (HasCustomIRHandlers.load(std::memory_order_relaxed)) { + std::shared_lock lk(CustomIRMutex); + auto Handler = CustomIRHandlers.find(GuestRIP); + if (Handler != CustomIRHandlers.end()) { + TotalInstructions = 1; + TotalInstructionsLength = 1; + std::get<0>(Handler->second)(GuestRIP, Thread->OpDispatcher.get()); + HasCustomIR = true; } + } - if (!HasCustomIR) { - uint8_t const *GuestCode{}; - GuestCode = reinterpret_cast(GuestRIP); + if (!HasCustomIR) { + const uint8_t* GuestCode {}; + GuestCode = reinterpret_cast< const uint8_t*>(GuestRIP); - bool HadDispatchError {false}; + bool HadDispatchError {false}; - Thread->FrontendDecoder->DecodeInstructionsAtEntry(GuestCode, GuestRIP, MaxInst, [Thread](uint64_t BlockEntry, uint64_t Start, uint64_t Length) { - if (Thread->LookupCache->AddBlockExecutableRange(BlockEntry, Start, Length)) { - static_cast(Thread->CTX)->SyscallHandler->MarkGuestExecutableRange(Thread, Start, Length); - } - }); + Thread->FrontendDecoder->DecodeInstructionsAtEntry(GuestCode, GuestRIP, MaxInst, [Thread](uint64_t BlockEntry, uint64_t Start, uint64_t Length) { + if (Thread->LookupCache->AddBlockExecutableRange(BlockEntry, Start, Length)) { + static_cast(Thread->CTX)->SyscallHandler->MarkGuestExecutableRange(Thread, Start, Length); + } + }); - auto BlockInfo = Thread->FrontendDecoder->GetDecodedBlockInfo(); - auto CodeBlocks = &BlockInfo->Blocks; + auto BlockInfo = Thread->FrontendDecoder->GetDecodedBlockInfo(); + auto CodeBlocks = &BlockInfo->Blocks; - Thread->OpDispatcher->BeginFunction(GuestRIP, CodeBlocks, BlockInfo->TotalInstructionCount); + Thread->OpDispatcher->BeginFunction(GuestRIP, CodeBlocks, BlockInfo->TotalInstructionCount); - const uint8_t GPRSize = GetGPRSize(); + const uint8_t GPRSize = GetGPRSize(); - for (size_t j = 0; j < CodeBlocks->size(); ++j) { - FEXCore::Frontend::Decoder::DecodedBlocks const &Block = CodeBlocks->at(j); - // Set the block entry point - Thread->OpDispatcher->SetNewBlockIfChanged(Block.Entry); + for (size_t j = 0; j < CodeBlocks->size(); ++j) { + const FEXCore::Frontend::Decoder::DecodedBlocks& Block = CodeBlocks->at(j); + // Set the block entry point + Thread->OpDispatcher->SetNewBlockIfChanged(Block.Entry); - uint64_t BlockInstructionsLength {}; + uint64_t BlockInstructionsLength {}; - // Reset any block-specific state - Thread->OpDispatcher->StartNewBlock(); + // Reset any block-specific state + Thread->OpDispatcher->StartNewBlock(); - uint64_t InstsInBlock = Block.NumInstructions; + uint64_t InstsInBlock = Block.NumInstructions; - for (size_t i = 0; i < InstsInBlock; ++i) { - FEXCore::X86Tables::X86InstInfo const* TableInfo {nullptr}; - FEXCore::X86Tables::DecodedInst const* DecodedInfo {nullptr}; + for (size_t i = 0; i < InstsInBlock; ++i) { + const FEXCore::X86Tables::X86InstInfo* TableInfo {nullptr}; + const FEXCore::X86Tables::DecodedInst* DecodedInfo {nullptr}; - TableInfo = Block.DecodedInstructions[i].TableInfo; - DecodedInfo = &Block.DecodedInstructions[i]; - bool IsLocked = DecodedInfo->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK; + TableInfo = Block.DecodedInstructions[i].TableInfo; + DecodedInfo = &Block.DecodedInstructions[i]; + bool IsLocked = DecodedInfo->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK; - if (ExtendedDebugInfo || Thread->OpDispatcher->CanHaveSideEffects(TableInfo, DecodedInfo)) { - Thread->OpDispatcher->_GuestOpcode(Block.Entry + BlockInstructionsLength - GuestRIP); - } + if (ExtendedDebugInfo || Thread->OpDispatcher->CanHaveSideEffects(TableInfo, DecodedInfo)) { + Thread->OpDispatcher->_GuestOpcode(Block.Entry + BlockInstructionsLength - GuestRIP); + } - if (Config.SMCChecks == FEXCore::Config::CONFIG_SMC_FULL) { - auto ExistingCodePtr = reinterpret_cast(Block.Entry + BlockInstructionsLength); + if (Config.SMCChecks == FEXCore::Config::CONFIG_SMC_FULL) { + auto ExistingCodePtr = reinterpret_cast(Block.Entry + BlockInstructionsLength); - auto CodeChanged = Thread->OpDispatcher->_ValidateCode(ExistingCodePtr[0], ExistingCodePtr[1], (uintptr_t)ExistingCodePtr - GuestRIP, DecodedInfo->InstSize); + auto CodeChanged = Thread->OpDispatcher->_ValidateCode(ExistingCodePtr[0], ExistingCodePtr[1], + (uintptr_t)ExistingCodePtr - GuestRIP, DecodedInfo->InstSize); - auto InvalidateCodeCond = Thread->OpDispatcher->_CondJump(CodeChanged); + auto InvalidateCodeCond = Thread->OpDispatcher->_CondJump(CodeChanged); - auto CurrentBlock = Thread->OpDispatcher->GetCurrentBlock(); - auto CodeWasChangedBlock = Thread->OpDispatcher->CreateNewCodeBlockAtEnd(); - Thread->OpDispatcher->SetTrueJumpTarget(InvalidateCodeCond, CodeWasChangedBlock); + auto CurrentBlock = Thread->OpDispatcher->GetCurrentBlock(); + auto CodeWasChangedBlock = Thread->OpDispatcher->CreateNewCodeBlockAtEnd(); + Thread->OpDispatcher->SetTrueJumpTarget(InvalidateCodeCond, CodeWasChangedBlock); - Thread->OpDispatcher->SetCurrentCodeBlock(CodeWasChangedBlock); - Thread->OpDispatcher->_ThreadRemoveCodeEntry(); - Thread->OpDispatcher->_ExitFunction(Thread->OpDispatcher->_EntrypointOffset(IR::SizeToOpSize(GPRSize), Block.Entry + BlockInstructionsLength - GuestRIP)); + Thread->OpDispatcher->SetCurrentCodeBlock(CodeWasChangedBlock); + Thread->OpDispatcher->_ThreadRemoveCodeEntry(); + Thread->OpDispatcher->_ExitFunction( + Thread->OpDispatcher->_EntrypointOffset(IR::SizeToOpSize(GPRSize), Block.Entry + BlockInstructionsLength - GuestRIP)); - auto NextOpBlock = Thread->OpDispatcher->CreateNewCodeBlockAfter(CurrentBlock); + auto NextOpBlock = Thread->OpDispatcher->CreateNewCodeBlockAfter(CurrentBlock); - Thread->OpDispatcher->SetFalseJumpTarget(InvalidateCodeCond, NextOpBlock); - Thread->OpDispatcher->SetCurrentCodeBlock(NextOpBlock); - } + Thread->OpDispatcher->SetFalseJumpTarget(InvalidateCodeCond, NextOpBlock); + Thread->OpDispatcher->SetCurrentCodeBlock(NextOpBlock); + } - if (TableInfo && TableInfo->OpcodeDispatcher) { - auto Fn = TableInfo->OpcodeDispatcher; - Thread->OpDispatcher->ResetHandledLock(); - Thread->OpDispatcher->ResetDecodeFailure(); - std::invoke(Fn, Thread->OpDispatcher, DecodedInfo); - if (Thread->OpDispatcher->HadDecodeFailure()) { + if (TableInfo && TableInfo->OpcodeDispatcher) { + auto Fn = TableInfo->OpcodeDispatcher; + Thread->OpDispatcher->ResetHandledLock(); + Thread->OpDispatcher->ResetDecodeFailure(); + std::invoke(Fn, Thread->OpDispatcher, DecodedInfo); + if (Thread->OpDispatcher->HadDecodeFailure()) { + HadDispatchError = true; + } else { + if (Thread->OpDispatcher->HasHandledLock() != IsLocked) { HadDispatchError = true; + LogMan::Msg::EFmt("Missing LOCK HANDLER at 0x{:x}{{'{}'}}", Block.Entry + BlockInstructionsLength, TableInfo->Name ?: "UND"); } - else { - if (Thread->OpDispatcher->HasHandledLock() != IsLocked) { - HadDispatchError = true; - LogMan::Msg::EFmt("Missing LOCK HANDLER at 0x{:x}{{'{}'}}", Block.Entry + BlockInstructionsLength, TableInfo->Name ?: "UND"); - } - BlockInstructionsLength += DecodedInfo->InstSize; - TotalInstructionsLength += DecodedInfo->InstSize; - ++TotalInstructions; - } + BlockInstructionsLength += DecodedInfo->InstSize; + TotalInstructionsLength += DecodedInfo->InstSize; + ++TotalInstructions; } - else { - if (TableInfo) { - LogMan::Msg::EFmt("Invalid or Unknown instruction: {} 0x{:x}", TableInfo->Name ?: "UND", Block.Entry - GuestRIP); - } - // Invalid instruction - Thread->OpDispatcher->InvalidOp(DecodedInfo); - Thread->OpDispatcher->_ExitFunction(Thread->OpDispatcher->_EntrypointOffset(IR::SizeToOpSize(GPRSize), Block.Entry - GuestRIP)); + } else { + if (TableInfo) { + LogMan::Msg::EFmt("Invalid or Unknown instruction: {} 0x{:x}", TableInfo->Name ?: "UND", Block.Entry - GuestRIP); } + // Invalid instruction + Thread->OpDispatcher->InvalidOp(DecodedInfo); + Thread->OpDispatcher->_ExitFunction(Thread->OpDispatcher->_EntrypointOffset(IR::SizeToOpSize(GPRSize), Block.Entry - GuestRIP)); + } - const bool NeedsBlockEnd = (HadDispatchError && TotalInstructions > 0) || - (Thread->OpDispatcher->NeedsBlockEnder() && i + 1 == InstsInBlock); + const bool NeedsBlockEnd = + (HadDispatchError && TotalInstructions > 0) || (Thread->OpDispatcher->NeedsBlockEnder() && i + 1 == InstsInBlock); - // If we had a dispatch error then leave early - if (HadDispatchError && TotalInstructions == 0) { - // Couldn't handle any instruction in op dispatcher - Thread->OpDispatcher->ResetWorkingList(); - return { nullptr, nullptr, 0, 0, 0, 0 }; - } + // If we had a dispatch error then leave early + if (HadDispatchError && TotalInstructions == 0) { + // Couldn't handle any instruction in op dispatcher + Thread->OpDispatcher->ResetWorkingList(); + return {nullptr, nullptr, 0, 0, 0, 0}; + } - if (NeedsBlockEnd) { - const uint8_t GPRSize = GetGPRSize(); + if (NeedsBlockEnd) { + const uint8_t GPRSize = GetGPRSize(); - // We had some instructions. Early exit - Thread->OpDispatcher->_ExitFunction(Thread->OpDispatcher->_EntrypointOffset(IR::SizeToOpSize(GPRSize), Block.Entry + BlockInstructionsLength - GuestRIP)); - break; - } + // We had some instructions. Early exit + Thread->OpDispatcher->_ExitFunction( + Thread->OpDispatcher->_EntrypointOffset(IR::SizeToOpSize(GPRSize), Block.Entry + BlockInstructionsLength - GuestRIP)); + break; + } - if (Thread->OpDispatcher->FinishOp(DecodedInfo->PC + DecodedInfo->InstSize, i + 1 == InstsInBlock)) { - break; - } + if (Thread->OpDispatcher->FinishOp(DecodedInfo->PC + DecodedInfo->InstSize, i + 1 == InstsInBlock)) { + break; } } - - Thread->OpDispatcher->Finalize(); - - Thread->FrontendDecoder->DelayedDisownBuffer(); } - IR::IREmitter *IREmitter = Thread->OpDispatcher.get(); + Thread->OpDispatcher->Finalize(); - auto ShouldDump = Thread->OpDispatcher->ShouldDumpIR(); - // Debug - { - if (ShouldDump) { - IRDumper(Thread, IREmitter, GuestRIP, nullptr); - } - - if (static_cast(Thread->CTX)->Config.ValidateIRarser) { - ValidateIR(this, IREmitter); - } - } + Thread->FrontendDecoder->DelayedDisownBuffer(); + } - // Run the passmanager over the IR from the dispatcher - Thread->PassManager->Run(IREmitter); + IR::IREmitter* IREmitter = Thread->OpDispatcher.get(); - // Debug - { - if (ShouldDump) { - IRDumper(Thread, IREmitter, GuestRIP, Thread->PassManager->HasPass("RA") ? Thread->PassManager->GetPass("RA")->GetAllocationData() : nullptr); - } + auto ShouldDump = Thread->OpDispatcher->ShouldDumpIR(); + // Debug + { + if (ShouldDump) { + IRDumper(Thread, IREmitter, GuestRIP, nullptr); } - auto RAData = Thread->PassManager->HasPass("RA") ? Thread->PassManager->GetPass("RA")->PullAllocationData() : nullptr; - auto IRList = IREmitter->CreateIRCopy(); + if (static_cast(Thread->CTX)->Config.ValidateIRarser) { + ValidateIR(this, IREmitter); + } + } - IREmitter->DelayedDisownBuffer(); + // Run the passmanager over the IR from the dispatcher + Thread->PassManager->Run(IREmitter); - return { - .IRList = IRList, - .RAData = std::move(RAData), - .TotalInstructions = TotalInstructions, - .TotalInstructionsLength = TotalInstructionsLength, - .StartAddr = Thread->FrontendDecoder->DecodedMinAddress, - .Length = Thread->FrontendDecoder->DecodedMaxAddress - Thread->FrontendDecoder->DecodedMinAddress, - }; + // Debug + { + if (ShouldDump) { + IRDumper(Thread, IREmitter, GuestRIP, + Thread->PassManager->HasPass("RA") ? Thread->PassManager->GetPass("RA")->GetAllocationData() : nullptr); + } } - ContextImpl::CompileCodeResult ContextImpl::CompileCode(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP, uint64_t MaxInst) { - FEXCore::IR::IRListView *IRList {}; - FEXCore::Core::DebugData *DebugData {}; - FEXCore::IR::RegisterAllocationData::UniquePtr RAData {}; - bool GeneratedIR {}; - uint64_t StartAddr {}; - uint64_t Length {}; + auto RAData = Thread->PassManager->HasPass("RA") ? Thread->PassManager->GetPass("RA")->PullAllocationData() : nullptr; + auto IRList = IREmitter->CreateIRCopy(); - // JIT Code object cache lookup - if (CodeObjectCacheService) { - auto CodeCacheEntry = CodeObjectCacheService->FetchCodeObjectFromCache(GuestRIP); - if (CodeCacheEntry) { - auto CompiledCode = Thread->CPUBackend->RelocateJITObjectCode(GuestRIP, CodeCacheEntry); - if (CompiledCode) { - return { - .CompiledCode = CompiledCode, - .IRData = nullptr, // No IR data generated - .DebugData = nullptr, // nullptr here ensures that code serialization doesn't occur on from cache read - .RAData = nullptr, // No RA data generated - .GeneratedIR = false, // nullptr here ensures IR cache mechanisms won't run - .StartAddr = 0, // Unused - .Length = 0, // Unused - }; - } - } - } + IREmitter->DelayedDisownBuffer(); - if (SourcecodeResolver && Config.GDBSymbols()) { - auto AOTIRCacheEntry = SyscallHandler->LookupAOTIRCacheEntry(Thread, GuestRIP); - if (AOTIRCacheEntry.Entry && !AOTIRCacheEntry.Entry->ContainsCode) { - AOTIRCacheEntry.Entry->SourcecodeMap = - SourcecodeResolver->GenerateMap(AOTIRCacheEntry.Entry->Filename, AOTIRCacheEntry.Entry->FileId); - } - } + return { + .IRList = IRList, + .RAData = std::move(RAData), + .TotalInstructions = TotalInstructions, + .TotalInstructionsLength = TotalInstructionsLength, + .StartAddr = Thread->FrontendDecoder->DecodedMinAddress, + .Length = Thread->FrontendDecoder->DecodedMaxAddress - Thread->FrontendDecoder->DecodedMinAddress, + }; +} - // AOT IR bookkeeping and cache - { - auto [IRCopy, RACopy, DebugDataCopy, _StartAddr, _Length, _GeneratedIR] = IRCaptureCache.PreGenerateIRFetch(Thread, GuestRIP, IRList); - if (_GeneratedIR) { - // Setup pointers to internal structures - IRList = IRCopy; - RAData = std::move(RACopy); - DebugData = DebugDataCopy; - StartAddr = _StartAddr; - Length = _Length; - GeneratedIR = _GeneratedIR; +ContextImpl::CompileCodeResult ContextImpl::CompileCode(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestRIP, uint64_t MaxInst) { + FEXCore::IR::IRListView* IRList {}; + FEXCore::Core::DebugData* DebugData {}; + FEXCore::IR::RegisterAllocationData::UniquePtr RAData {}; + bool GeneratedIR {}; + uint64_t StartAddr {}; + uint64_t Length {}; + + // JIT Code object cache lookup + if (CodeObjectCacheService) { + auto CodeCacheEntry = CodeObjectCacheService->FetchCodeObjectFromCache(GuestRIP); + if (CodeCacheEntry) { + auto CompiledCode = Thread->CPUBackend->RelocateJITObjectCode(GuestRIP, CodeCacheEntry); + if (CompiledCode) { + return { + .CompiledCode = CompiledCode, + .IRData = nullptr, // No IR data generated + .DebugData = nullptr, // nullptr here ensures that code serialization doesn't occur on from cache read + .RAData = nullptr, // No RA data generated + .GeneratedIR = false, // nullptr here ensures IR cache mechanisms won't run + .StartAddr = 0, // Unused + .Length = 0, // Unused + }; } } + } - if (IRList == nullptr) { - // Generate IR + Meta Info - auto [IRCopy, RACopy, TotalInstructions, TotalInstructionsLength, _StartAddr, _Length] = GenerateIR(Thread, GuestRIP, Config.GDBSymbols(), MaxInst); + if (SourcecodeResolver && Config.GDBSymbols()) { + auto AOTIRCacheEntry = SyscallHandler->LookupAOTIRCacheEntry(Thread, GuestRIP); + if (AOTIRCacheEntry.Entry && !AOTIRCacheEntry.Entry->ContainsCode) { + AOTIRCacheEntry.Entry->SourcecodeMap = SourcecodeResolver->GenerateMap(AOTIRCacheEntry.Entry->Filename, AOTIRCacheEntry.Entry->FileId); + } + } + // AOT IR bookkeeping and cache + { + auto [IRCopy, RACopy, DebugDataCopy, _StartAddr, _Length, _GeneratedIR] = IRCaptureCache.PreGenerateIRFetch(Thread, GuestRIP, IRList); + if (_GeneratedIR) { // Setup pointers to internal structures IRList = IRCopy; RAData = std::move(RACopy); - DebugData = new FEXCore::Core::DebugData(); + DebugData = DebugDataCopy; StartAddr = _StartAddr; Length = _Length; - - // These blocks aren't already in the cache - GeneratedIR = true; - } - - if (IRList == nullptr) { - return {}; + GeneratedIR = _GeneratedIR; } - // Attempt to get the CPU backend to compile this code - return { - // FEX currently throws away the CPUBackend::CompiledCode object other than the entrypoint - // In the future with code caching getting wired up, we will pass the rest of the data forward. - // TODO: Pass the data forward when code caching is wired up to this. - .CompiledCode = Thread->CPUBackend->CompileCode(GuestRIP, IRList, DebugData, RAData.get()).BlockEntry, - .IRData = IRList, - .DebugData = DebugData, - .RAData = std::move(RAData), - .GeneratedIR = GeneratedIR, - .StartAddr = StartAddr, - .Length = Length, - }; - } - - uintptr_t ContextImpl::CompileBlock(FEXCore::Core::CpuStateFrame *Frame, uint64_t GuestRIP, uint64_t MaxInst) { - FEXCORE_PROFILE_SCOPED("CompileBlock"); - auto Thread = Frame->Thread; - - // Invalidate might take a unique lock on this, to guarantee that during invalidation no code gets compiled - auto lk = GuardSignalDeferringSection(CodeInvalidationMutex, Thread); - - // Is the code in the cache? - // The backends only check L1 and L2, not L3 - if (auto HostCode = Thread->LookupCache->FindBlock(GuestRIP)) { - return HostCode; - } - - void *CodePtr {}; - FEXCore::IR::IRListView *IRList {}; - FEXCore::Core::DebugData *DebugData {}; + } - bool GeneratedIR {}; - uint64_t StartAddr {}, Length {}; + if (IRList == nullptr) { + // Generate IR + Meta Info + auto [IRCopy, RACopy, TotalInstructions, TotalInstructionsLength, _StartAddr, _Length] = + GenerateIR(Thread, GuestRIP, Config.GDBSymbols(), MaxInst); - auto [Code, IR, Data, RAData, Generated, _StartAddr, _Length] = CompileCode(Thread, GuestRIP, MaxInst); - CodePtr = Code; - IRList = IR; - DebugData = Data; - GeneratedIR = Generated; + // Setup pointers to internal structures + IRList = IRCopy; + RAData = std::move(RACopy); + DebugData = new FEXCore::Core::DebugData(); StartAddr = _StartAddr; Length = _Length; - if (CodePtr == nullptr) { - return 0; - } + // These blocks aren't already in the cache + GeneratedIR = true; + } + + if (IRList == nullptr) { + return {}; + } + // Attempt to get the CPU backend to compile this code + return { + // FEX currently throws away the CPUBackend::CompiledCode object other than the entrypoint + // In the future with code caching getting wired up, we will pass the rest of the data forward. + // TODO: Pass the data forward when code caching is wired up to this. + .CompiledCode = Thread->CPUBackend->CompileCode(GuestRIP, IRList, DebugData, RAData.get()).BlockEntry, + .IRData = IRList, + .DebugData = DebugData, + .RAData = std::move(RAData), + .GeneratedIR = GeneratedIR, + .StartAddr = StartAddr, + .Length = Length, + }; +} - // The core managed to compile the code. - if (Config.BlockJITNaming()) { - auto FragmentBasePtr = reinterpret_cast(CodePtr); +uintptr_t ContextImpl::CompileBlock(FEXCore::Core::CpuStateFrame* Frame, uint64_t GuestRIP, uint64_t MaxInst) { + FEXCORE_PROFILE_SCOPED("CompileBlock"); + auto Thread = Frame->Thread; - if (DebugData) { - auto GuestRIPLookup = SyscallHandler->LookupAOTIRCacheEntry(Thread, GuestRIP); + // Invalidate might take a unique lock on this, to guarantee that during invalidation no code gets compiled + auto lk = GuardSignalDeferringSection(CodeInvalidationMutex, Thread); - if (DebugData->Subblocks.size()) { - for (auto& Subblock: DebugData->Subblocks) { - auto BlockBasePtr = FragmentBasePtr + Subblock.HostCodeOffset; - if (GuestRIPLookup.Entry) { - Symbols.Register(Thread->SymbolBuffer.get(), BlockBasePtr, DebugData->HostCodeSize, GuestRIPLookup.Entry->Filename, GuestRIP - GuestRIPLookup.VAFileStart); - } else { - Symbols.Register(Thread->SymbolBuffer.get(), BlockBasePtr, GuestRIP, Subblock.HostCodeSize); - } - } - } else { + // Is the code in the cache? + // The backends only check L1 and L2, not L3 + if (auto HostCode = Thread->LookupCache->FindBlock(GuestRIP)) { + return HostCode; + } + + void* CodePtr {}; + FEXCore::IR::IRListView* IRList {}; + FEXCore::Core::DebugData* DebugData {}; + + bool GeneratedIR {}; + uint64_t StartAddr {}, Length {}; + + auto [Code, IR, Data, RAData, Generated, _StartAddr, _Length] = CompileCode(Thread, GuestRIP, MaxInst); + CodePtr = Code; + IRList = IR; + DebugData = Data; + GeneratedIR = Generated; + StartAddr = _StartAddr; + Length = _Length; + + if (CodePtr == nullptr) { + return 0; + } + + // The core managed to compile the code. + if (Config.BlockJITNaming()) { + auto FragmentBasePtr = reinterpret_cast(CodePtr); + + if (DebugData) { + auto GuestRIPLookup = SyscallHandler->LookupAOTIRCacheEntry(Thread, GuestRIP); + + if (DebugData->Subblocks.size()) { + for (auto& Subblock : DebugData->Subblocks) { + auto BlockBasePtr = FragmentBasePtr + Subblock.HostCodeOffset; if (GuestRIPLookup.Entry) { - Symbols.Register(Thread->SymbolBuffer.get(), FragmentBasePtr, DebugData->HostCodeSize, GuestRIPLookup.Entry->Filename, GuestRIP - GuestRIPLookup.VAFileStart); + Symbols.Register(Thread->SymbolBuffer.get(), BlockBasePtr, DebugData->HostCodeSize, GuestRIPLookup.Entry->Filename, + GuestRIP - GuestRIPLookup.VAFileStart); + } else { + Symbols.Register(Thread->SymbolBuffer.get(), BlockBasePtr, GuestRIP, Subblock.HostCodeSize); + } + } + } else { + if (GuestRIPLookup.Entry) { + Symbols.Register(Thread->SymbolBuffer.get(), FragmentBasePtr, DebugData->HostCodeSize, GuestRIPLookup.Entry->Filename, + GuestRIP - GuestRIPLookup.VAFileStart); } else { Symbols.Register(Thread->SymbolBuffer.get(), FragmentBasePtr, GuestRIP, DebugData->HostCodeSize); - } } } } + } - // Tell the object cache service to serialize the code if enabled - if (CodeObjectCacheService && - Config.CacheObjectCodeCompilation == FEXCore::Config::ConfigObjectCodeHandler::CONFIG_READWRITE && - DebugData) { - CodeObjectCacheService->AsyncAddSerializationJob(fextl::make_unique( - CodeSerialize::AsyncJobHandler::SerializationJobData { - .GuestRIP = GuestRIP, - .GuestCodeLength = Length, - .GuestCodeHash = 0, - .HostCodeBegin = CodePtr, - .HostCodeLength = DebugData->HostCodeSize, - .HostCodeHash = 0, - .ThreadJobRefCount = &Thread->ObjectCacheRefCounter, - .Relocations = std::move(*DebugData->Relocations), - } - )); - } - - // Clear any relocations that might have been generated - Thread->CPUBackend->ClearRelocations(); - - if (IRCaptureCache.PostCompileCode( - Thread, - CodePtr, - GuestRIP, - StartAddr, - Length, - std::move(RAData), - IRList, - DebugData, - GeneratedIR)) { - // Early exit - return (uintptr_t)CodePtr; - } + // Tell the object cache service to serialize the code if enabled + if (CodeObjectCacheService && Config.CacheObjectCodeCompilation == FEXCore::Config::ConfigObjectCodeHandler::CONFIG_READWRITE && DebugData) { + CodeObjectCacheService->AsyncAddSerializationJob( + fextl::make_unique(CodeSerialize::AsyncJobHandler::SerializationJobData { + .GuestRIP = GuestRIP, + .GuestCodeLength = Length, + .GuestCodeHash = 0, + .HostCodeBegin = CodePtr, + .HostCodeLength = DebugData->HostCodeSize, + .HostCodeHash = 0, + .ThreadJobRefCount = &Thread->ObjectCacheRefCounter, + .Relocations = std::move(*DebugData->Relocations), + })); + } - // Insert to lookup cache - // Pages containing this block are added via AddBlockExecutableRange before each page gets accessed in the frontend - AddBlockMapping(Thread, GuestRIP, CodePtr); + // Clear any relocations that might have been generated + Thread->CPUBackend->ClearRelocations(); + if (IRCaptureCache.PostCompileCode(Thread, CodePtr, GuestRIP, StartAddr, Length, std::move(RAData), IRList, DebugData, GeneratedIR)) { + // Early exit return (uintptr_t)CodePtr; } - void ContextImpl::ExecutionThread(FEXCore::Core::InternalThreadState *Thread) { - Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_WAITING; + // Insert to lookup cache + // Pages containing this block are added via AddBlockExecutableRange before each page gets accessed in the frontend + AddBlockMapping(Thread, GuestRIP, CodePtr); - InitializeThreadTLSData(Thread); + return (uintptr_t)CodePtr; +} - // Now notify the thread that we are initialized - Thread->ThreadWaiting.NotifyAll(); +void ContextImpl::ExecutionThread(FEXCore::Core::InternalThreadState* Thread) { + Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_WAITING; - if (StartPaused || Thread->StartPaused) { - // Parent thread doesn't need to wait to run - Thread->StartRunning.Wait(); - } + InitializeThreadTLSData(Thread); - if (!Thread->RunningEvents.EarlyExit.load()) { - Thread->RunningEvents.WaitingToStart = false; + // Now notify the thread that we are initialized + Thread->ThreadWaiting.NotifyAll(); - Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_NONE; + if (StartPaused || Thread->StartPaused) { + // Parent thread doesn't need to wait to run + Thread->StartRunning.Wait(); + } - Thread->RunningEvents.Running = true; + if (!Thread->RunningEvents.EarlyExit.load()) { + Thread->RunningEvents.WaitingToStart = false; - static_cast(Thread->CTX)->Dispatcher->ExecuteDispatch(Thread->CurrentFrame); + Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_NONE; - Thread->RunningEvents.Running = false; - } + Thread->RunningEvents.Running = true; - { - // Ensure the Code Object Serialization service has fully serialized this thread's data before clearing the cache - // Use the thread's object cache ref counter for this - CodeSerialize::CodeObjectSerializeService::WaitForEmptyJobQueue(&Thread->ObjectCacheRefCounter); - } + static_cast(Thread->CTX)->Dispatcher->ExecuteDispatch(Thread->CurrentFrame); - // If it is the parent thread that died then just leave - FEX_TODO("This doesn't make sense when the parent thread doesn't outlive its children"); + Thread->RunningEvents.Running = false; + } - if (Thread->ThreadManager.parent_tid == 0) { - CoreShuttingDown.store(true); - Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_SHUTDOWN; + { + // Ensure the Code Object Serialization service has fully serialized this thread's data before clearing the cache + // Use the thread's object cache ref counter for this + CodeSerialize::CodeObjectSerializeService::WaitForEmptyJobQueue(&Thread->ObjectCacheRefCounter); + } - if (CustomExitHandler) { - CustomExitHandler(Thread->ThreadManager.TID, Thread->ExitReason); - } + // If it is the parent thread that died then just leave + FEX_TODO("This doesn't make sense when the parent thread doesn't outlive its children"); + + if (Thread->ThreadManager.parent_tid == 0) { + CoreShuttingDown.store(true); + Thread->ExitReason = FEXCore::Context::ExitReason::EXIT_SHUTDOWN; + + if (CustomExitHandler) { + CustomExitHandler(Thread->ThreadManager.TID, Thread->ExitReason); } + } #ifndef _WIN32 - Alloc::OSAllocator::UninstallTLSData(Thread); + Alloc::OSAllocator::UninstallTLSData(Thread); #endif - SignalDelegation->UninstallTLSState(Thread); - } + SignalDelegation->UninstallTLSState(Thread); +} - static void InvalidateGuestThreadCodeRange(FEXCore::Core::InternalThreadState *Thread, uint64_t Start, uint64_t Length) { - std::lock_guard lk(Thread->LookupCache->WriteLock); +static void InvalidateGuestThreadCodeRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) { + std::lock_guard lk(Thread->LookupCache->WriteLock); - auto lower = Thread->LookupCache->CodePages.lower_bound(Start >> 12); - auto upper = Thread->LookupCache->CodePages.upper_bound((Start + Length - 1) >> 12); + auto lower = Thread->LookupCache->CodePages.lower_bound(Start >> 12); + auto upper = Thread->LookupCache->CodePages.upper_bound((Start + Length - 1) >> 12); - for (auto it = lower; it != upper; it++) { - for (auto Address: it->second) { - ContextImpl::ThreadRemoveCodeEntry(Thread, Address); - } - it->second.clear(); + for (auto it = lower; it != upper; it++) { + for (auto Address : it->second) { + ContextImpl::ThreadRemoveCodeEntry(Thread, Address); } + it->second.clear(); } +} - void ContextImpl::InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState *Thread, uint64_t Start, uint64_t Length) { - InvalidateGuestThreadCodeRange(Thread, Start, Length); - } +void ContextImpl::InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) { + InvalidateGuestThreadCodeRange(Thread, Start, Length); +} - void ContextImpl::InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState *Thread, uint64_t Start, uint64_t Length, CodeRangeInvalidationFn CallAfter) { - InvalidateGuestThreadCodeRange(Thread, Start, Length); - CallAfter(Start, Length); - } +void ContextImpl::InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length, + CodeRangeInvalidationFn CallAfter) { + InvalidateGuestThreadCodeRange(Thread, Start, Length); + CallAfter(Start, Length); +} - void ContextImpl::MarkMemoryShared(FEXCore::Core::InternalThreadState *Thread) { - if (!IsMemoryShared) { - IsMemoryShared = true; - UpdateAtomicTSOEmulationConfig(); +void ContextImpl::MarkMemoryShared(FEXCore::Core::InternalThreadState* Thread) { + if (!IsMemoryShared) { + IsMemoryShared = true; + UpdateAtomicTSOEmulationConfig(); - if (Config.TSOAutoMigration) { - // Only the lookup cache is cleared here, so that old code can keep running until next compilation - std::lock_guard lkLookupCache(Thread->LookupCache->WriteLock); - Thread->LookupCache->ClearCache(); + if (Config.TSOAutoMigration) { + // Only the lookup cache is cleared here, so that old code can keep running until next compilation + std::lock_guard lkLookupCache(Thread->LookupCache->WriteLock); + Thread->LookupCache->ClearCache(); - // DebugStore also needs to be cleared - Thread->DebugStore.clear(); - } + // DebugStore also needs to be cleared + Thread->DebugStore.clear(); } } +} - void ContextImpl::ThreadAddBlockLink(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestDestination, FEXCore::Context::ExitFunctionLinkData *HostLink, const FEXCore::Context::BlockDelinkerFunc &delinker) { - auto lk = GuardSignalDeferringSection(static_cast(Thread->CTX)->CodeInvalidationMutex, Thread); +void ContextImpl::ThreadAddBlockLink(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestDestination, + FEXCore::Context::ExitFunctionLinkData* HostLink, const FEXCore::Context::BlockDelinkerFunc& delinker) { + auto lk = GuardSignalDeferringSection(static_cast(Thread->CTX)->CodeInvalidationMutex, Thread); - Thread->LookupCache->AddBlockLink(GuestDestination, HostLink, delinker); - } + Thread->LookupCache->AddBlockLink(GuestDestination, HostLink, delinker); +} - void ContextImpl::ThreadRemoveCodeEntry(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP) { - LogMan::Throw::AFmt(static_cast(Thread->CTX)->CodeInvalidationMutex.try_lock() == false, "CodeInvalidationMutex needs to be unique_locked here"); +void ContextImpl::ThreadRemoveCodeEntry(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestRIP) { + LogMan::Throw::AFmt(static_cast(Thread->CTX)->CodeInvalidationMutex.try_lock() == false, "CodeInvalidationMutex needs to " + "be unique_locked here"); - std::lock_guard lk(Thread->LookupCache->WriteLock); + std::lock_guard lk(Thread->LookupCache->WriteLock); - Thread->DebugStore.erase(GuestRIP); - Thread->LookupCache->Erase(Thread->CurrentFrame, GuestRIP); - } + Thread->DebugStore.erase(GuestRIP); + Thread->LookupCache->Erase(Thread->CurrentFrame, GuestRIP); +} - CustomIRResult ContextImpl::AddCustomIREntrypoint(uintptr_t Entrypoint, CustomIREntrypointHandler Handler, void *Creator, void *Data) { - LOGMAN_THROW_A_FMT(Config.Is64BitMode || !(Entrypoint >> 32), "64-bit Entrypoint in 32-bit mode {:x}", Entrypoint); +CustomIRResult ContextImpl::AddCustomIREntrypoint(uintptr_t Entrypoint, CustomIREntrypointHandler Handler, void* Creator, void* Data) { + LOGMAN_THROW_A_FMT(Config.Is64BitMode || !(Entrypoint >> 32), "64-bit Entrypoint in 32-bit mode {:x}", Entrypoint); - std::unique_lock lk(CustomIRMutex); + std::unique_lock lk(CustomIRMutex); - auto InsertedIterator = CustomIRHandlers.emplace(Entrypoint, std::tuple(Handler, Creator, Data)); - HasCustomIRHandlers = true; + auto InsertedIterator = CustomIRHandlers.emplace(Entrypoint, std::tuple(Handler, Creator, Data)); + HasCustomIRHandlers = true; - if (!InsertedIterator.second) { - const auto &[fn, Creator, Data] = InsertedIterator.first->second; - return CustomIRResult(std::move(lk), Creator, Data); - } else { - lk.unlock(); - return CustomIRResult(std::move(lk), 0, 0); - } + if (!InsertedIterator.second) { + const auto& [fn, Creator, Data] = InsertedIterator.first->second; + return CustomIRResult(std::move(lk), Creator, Data); + } else { + lk.unlock(); + return CustomIRResult(std::move(lk), 0, 0); } +} - void ContextImpl::RemoveCustomIREntrypoint(uintptr_t Entrypoint) { - LOGMAN_THROW_A_FMT(Config.Is64BitMode || !(Entrypoint >> 32), "64-bit Entrypoint in 32-bit mode {:x}", Entrypoint); +void ContextImpl::RemoveCustomIREntrypoint(uintptr_t Entrypoint) { + LOGMAN_THROW_A_FMT(Config.Is64BitMode || !(Entrypoint >> 32), "64-bit Entrypoint in 32-bit mode {:x}", Entrypoint); - std::scoped_lock lk(CustomIRMutex); + std::scoped_lock lk(CustomIRMutex); - InvalidateGuestCodeRange(nullptr, Entrypoint, 1, [this](uint64_t Entrypoint, uint64_t) { - CustomIRHandlers.erase(Entrypoint); - }); + InvalidateGuestCodeRange(nullptr, Entrypoint, 1, [this](uint64_t Entrypoint, uint64_t) { CustomIRHandlers.erase(Entrypoint); }); - HasCustomIRHandlers = !CustomIRHandlers.empty(); - } + HasCustomIRHandlers = !CustomIRHandlers.empty(); +} - IR::AOTIRCacheEntry *ContextImpl::LoadAOTIRCacheEntry(const fextl::string &filename) { - auto rv = IRCaptureCache.LoadAOTIRCacheEntry(filename); - return rv; - } +IR::AOTIRCacheEntry* ContextImpl::LoadAOTIRCacheEntry(const fextl::string& filename) { + auto rv = IRCaptureCache.LoadAOTIRCacheEntry(filename); + return rv; +} - void ContextImpl::UnloadAOTIRCacheEntry(IR::AOTIRCacheEntry *Entry) { - IRCaptureCache.UnloadAOTIRCacheEntry(Entry); - } +void ContextImpl::UnloadAOTIRCacheEntry(IR::AOTIRCacheEntry* Entry) { + IRCaptureCache.UnloadAOTIRCacheEntry(Entry); +} - void ContextImpl::AppendThunkDefinitions(fextl::vector const& Definitions) { - if (ThunkHandler) { - ThunkHandler->AppendThunkDefinitions(Definitions); - } +void ContextImpl::AppendThunkDefinitions(const fextl::vector& Definitions) { + if (ThunkHandler) { + ThunkHandler->AppendThunkDefinitions(Definitions); } +} - void ContextImpl::ConfigureAOTGen(FEXCore::Core::InternalThreadState *Thread, fextl::set *ExternalBranches, uint64_t SectionMaxAddress) { - Thread->FrontendDecoder->SetExternalBranches(ExternalBranches); - Thread->FrontendDecoder->SetSectionMaxAddress(SectionMaxAddress); - } +void ContextImpl::ConfigureAOTGen(FEXCore::Core::InternalThreadState* Thread, fextl::set* ExternalBranches, uint64_t SectionMaxAddress) { + Thread->FrontendDecoder->SetExternalBranches(ExternalBranches); + Thread->FrontendDecoder->SetSectionMaxAddress(SectionMaxAddress); } +} // namespace FEXCore::Context diff --git a/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp b/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp index 943755bd0c..70fa258c62 100644 --- a/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp +++ b/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp @@ -25,13 +25,13 @@ namespace FEXCore::CPU { -static void SleepThread(FEXCore::Context::ContextImpl *CTX, FEXCore::Core::CpuStateFrame *Frame) { +static void SleepThread(FEXCore::Context::ContextImpl* CTX, FEXCore::Core::CpuStateFrame* Frame) { CTX->SyscallHandler->SleepThread(CTX, Frame); } constexpr size_t MAX_DISPATCHER_CODE_SIZE = 4096 * 2; -Dispatcher::Dispatcher(FEXCore::Context::ContextImpl *ctx) +Dispatcher::Dispatcher(FEXCore::Context::ContextImpl* ctx) : Arm64Emitter(ctx, FEXCore::Allocator::VirtualAlloc(MAX_DISPATCHER_CODE_SIZE, true), MAX_DISPATCHER_CODE_SIZE) , CTX {ctx} { EmitDispatcher(); @@ -82,9 +82,9 @@ void Dispatcher::EmitDispatcher() { // We want to ensure that we are 16 byte aligned at the top of this loop Align16B(); - ARMEmitter::BiDirectionalLabel FullLookup{}; - ARMEmitter::BiDirectionalLabel CallBlock{}; - ARMEmitter::BackwardLabel LoopTop{}; + ARMEmitter::BiDirectionalLabel FullLookup {}; + ARMEmitter::BiDirectionalLabel CallBlock {}; + ARMEmitter::BackwardLabel LoopTop {}; Bind(&LoopTop); AbsoluteLoopTopAddress = GetCursorAddress(); @@ -99,7 +99,7 @@ void Dispatcher::EmitDispatcher() { ldr(TMP1, STATE_PTR(CpuStateFrame, Pointers.Common.L1Pointer)); and_(ARMEmitter::Size::i64Bit, TMP4, RipReg.R(), LookupCache::L1_ENTRIES_MASK); - add(ARMEmitter::Size::i64Bit, TMP1, TMP1, TMP4, ARMEmitter::ShiftType::LSL , 4); + add(ARMEmitter::Size::i64Bit, TMP1, TMP1, TMP4, ARMEmitter::ShiftType::LSL, 4); ldp(TMP4, TMP1, TMP1, 0); sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, RipReg); cbnz(ARMEmitter::Size::i64Bit, TMP1, &FullLookup); @@ -117,8 +117,7 @@ void Dispatcher::EmitDispatcher() { uint64_t VirtualMemorySize = CTX->Config.VirtualMemSize; if (std::popcount(VirtualMemorySize) == 1) { and_(ARMEmitter::Size::i64Bit, TMP4, RipReg.R(), VirtualMemorySize - 1); - } - else { + } else { LoadConstant(ARMEmitter::Size::i64Bit, TMP4, VirtualMemorySize); and_(ARMEmitter::Size::i64Bit, TMP4, RipReg.R(), TMP4); } @@ -193,9 +192,8 @@ void Dispatcher::EmitDispatcher() { ldr(ARMEmitter::XReg::x2, STATE_PTR(CpuStateFrame, Pointers.Common.ExitFunctionLink)); if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall(ARMEmitter::Reg::r2); - } - else { + GenerateIndirectRuntimeCall(ARMEmitter::Reg::r2); + } else { blr(ARMEmitter::Reg::r2); } @@ -237,9 +235,8 @@ void Dispatcher::EmitDispatcher() { ldr(ARMEmitter::XReg::x4, &l_CompileBlock); if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall(ARMEmitter::Reg::r4); - } - else { + GenerateIndirectRuntimeCall(ARMEmitter::Reg::r4); + } else { blr(ARMEmitter::Reg::r4); // { CTX, Frame, RIP, MaxInst } } @@ -285,7 +282,7 @@ void Dispatcher::EmitDispatcher() { { // Guest SIGTRAP handler // Needs to be distinct from the SignalHandlerReturnAddress - GuestSignal_SIGTRAP = GetCursorAddress(); + GuestSignal_SIGTRAP = GetCursorAddress(); SpillStaticRegs(TMP1); @@ -308,8 +305,7 @@ void Dispatcher::EmitDispatcher() { add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::r0, 0); PopCalleeSavedRegisters(); ret(); - } - else { + } else { LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, 0); ldr(ARMEmitter::XReg::x1, ARMEmitter::Reg::r1); } @@ -328,9 +324,8 @@ void Dispatcher::EmitDispatcher() { mov(ARMEmitter::XReg::x1, STATE); ldr(ARMEmitter::XReg::x2, &l_Sleep); if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall(ARMEmitter::Reg::r2); - } - else { + GenerateIndirectRuntimeCall(ARMEmitter::Reg::r2); + } else { blr(ARMEmitter::Reg::r2); } @@ -405,8 +400,7 @@ void Dispatcher::EmitDispatcher() { ldr(ARMEmitter::XReg::x3, R, Offset); if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { GenerateIndirectRuntimeCall(ARMEmitter::Reg::r3); - } - else { + } else { blr(ARMEmitter::Reg::r3); } // Result is now in x0 @@ -463,23 +457,23 @@ void Dispatcher::EmitDispatcher() { } #ifdef VIXL_SIMULATOR -void Dispatcher::ExecuteDispatch(FEXCore::Core::CpuStateFrame *Frame) { +void Dispatcher::ExecuteDispatch(FEXCore::Core::CpuStateFrame* Frame) { Simulator.WriteXRegister(0, reinterpret_cast(Frame)); - Simulator.RunFrom(reinterpret_cast(DispatchPtr)); + Simulator.RunFrom(reinterpret_cast< const vixl::aarch64::Instruction*>(DispatchPtr)); } -void Dispatcher::ExecuteJITCallback(FEXCore::Core::CpuStateFrame *Frame, uint64_t RIP) { +void Dispatcher::ExecuteJITCallback(FEXCore::Core::CpuStateFrame* Frame, uint64_t RIP) { Simulator.WriteXRegister(0, reinterpret_cast(Frame)); Simulator.WriteXRegister(1, RIP); - Simulator.RunFrom(reinterpret_cast(CallbackPtr)); + Simulator.RunFrom(reinterpret_cast< const vixl::aarch64::Instruction*>(CallbackPtr)); } #endif -void Dispatcher::InitThreadPointers(FEXCore::Core::InternalThreadState *Thread) { +void Dispatcher::InitThreadPointers(FEXCore::Core::InternalThreadState* Thread) { // Setup dispatcher specific pointers that need to be accessed from JIT code { - auto &Common = Thread->CurrentFrame->Pointers.Common; + auto& Common = Thread->CurrentFrame->Pointers.Common; Common.DispatcherLoopTop = AbsoluteLoopTopAddress; Common.DispatcherLoopTopFillSRA = AbsoluteLoopTopAddressFillSRA; @@ -492,7 +486,7 @@ void Dispatcher::InitThreadPointers(FEXCore::Core::InternalThreadState *Thread) Common.SignalReturnHandler = SignalHandlerReturnAddress; Common.SignalReturnHandlerRT = SignalHandlerReturnAddressRT; - auto &AArch64 = Thread->CurrentFrame->Pointers.AArch64; + auto& AArch64 = Thread->CurrentFrame->Pointers.AArch64; AArch64.LUDIVHandler = LUDIVHandlerAddress; AArch64.LDIVHandler = LDIVHandlerAddress; AArch64.LUREMHandler = LUREMHandlerAddress; @@ -500,8 +494,8 @@ void Dispatcher::InitThreadPointers(FEXCore::Core::InternalThreadState *Thread) } } -fextl::unique_ptr Dispatcher::Create(FEXCore::Context::ContextImpl *CTX) { +fextl::unique_ptr Dispatcher::Create(FEXCore::Context::ContextImpl* CTX) { return fextl::make_unique(CTX); } -} +} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/Frontend.cpp b/FEXCore/Source/Interface/Core/Frontend.cpp index f7b5c8c198..42de61b5f6 100644 --- a/FEXCore/Source/Interface/Core/Frontend.cpp +++ b/FEXCore/Source/Interface/Core/Frontend.cpp @@ -33,22 +33,10 @@ static uint32_t MapModRMToReg(uint8_t REX, uint8_t bits, bool HighBits, bool Has static constexpr GPRArray GPR8BitHighIndexes = { // Classical ordering? - FEXCore::X86State::REG_RAX, - FEXCore::X86State::REG_RCX, - FEXCore::X86State::REG_RDX, - FEXCore::X86State::REG_RBX, - FEXCore::X86State::REG_RAX, - FEXCore::X86State::REG_RCX, - FEXCore::X86State::REG_RDX, - FEXCore::X86State::REG_RBX, - FEXCore::X86State::REG_R8, - FEXCore::X86State::REG_R9, - FEXCore::X86State::REG_R10, - FEXCore::X86State::REG_R11, - FEXCore::X86State::REG_R12, - FEXCore::X86State::REG_R13, - FEXCore::X86State::REG_R14, - FEXCore::X86State::REG_R15, + FEXCore::X86State::REG_RAX, FEXCore::X86State::REG_RCX, FEXCore::X86State::REG_RDX, FEXCore::X86State::REG_RBX, + FEXCore::X86State::REG_RAX, FEXCore::X86State::REG_RCX, FEXCore::X86State::REG_RDX, FEXCore::X86State::REG_RBX, + FEXCore::X86State::REG_R8, FEXCore::X86State::REG_R9, FEXCore::X86State::REG_R10, FEXCore::X86State::REG_R11, + FEXCore::X86State::REG_R12, FEXCore::X86State::REG_R13, FEXCore::X86State::REG_R14, FEXCore::X86State::REG_R15, }; uint8_t Offset = (REX << 3) | bits; @@ -59,11 +47,9 @@ static uint32_t MapModRMToReg(uint8_t REX, uint8_t bits, bool HighBits, bool Has if (HasXMM) { return FEXCore::X86State::REG_XMM_0 + Offset; - } - else if (HasMM) { + } else if (HasMM) { return FEXCore::X86State::REG_MM_0 + Offset; - } - else if (!(HighBits && !HasREX)) { + } else if (!(HighBits && !HasREX)) { return FEXCore::X86State::REG_RAX + Offset; } @@ -78,11 +64,10 @@ static uint32_t MapVEXToReg(uint8_t vvvv, bool HasXMM) { } } -Decoder::Decoder(FEXCore::Context::ContextImpl *ctx) +Decoder::Decoder(FEXCore::Context::ContextImpl* ctx) : CTX {ctx} - , OSABI { ctx->SyscallHandler ? ctx->SyscallHandler->GetOSABI() : FEXCore::HLE::SyscallOSABI::OS_UNKNOWN } - , PoolObject {ctx->FrontendAllocator, sizeof(FEXCore::X86Tables::DecodedInst) * DefaultDecodedBufferSize} { -} + , OSABI {ctx->SyscallHandler ? ctx->SyscallHandler->GetOSABI() : FEXCore::HLE::SyscallOSABI::OS_UNKNOWN} + , PoolObject {ctx->FrontendAllocator, sizeof(FEXCore::X86Tables::DecodedInst) * DefaultDecodedBufferSize} {} Decoder::~Decoder() { PoolObject.UnclaimBuffer(); @@ -108,7 +93,7 @@ uint64_t Decoder::ReadData(uint8_t Size) { std::memcpy(&Res, &InstStream[InstructionSize], Size); #if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED - for(size_t i = 0; i < Size; ++i) { + for (size_t i = 0; i < Size; ++i) { ReadByte(); } #else @@ -118,7 +103,7 @@ uint64_t Decoder::ReadData(uint8_t Size) { return Res; } -void Decoder::DecodeModRM_16(X86Tables::DecodedOperand *Operand, X86Tables::ModRMDecoded ModRM) { +void Decoder::DecodeModRM_16(X86Tables::DecodedOperand* Operand, X86Tables::ModRMDecoded ModRM) { // 16bit modrm behaves similar to SIB but encoded directly in modrm // mod != 0b11 case // RM | Result @@ -139,13 +124,11 @@ void Decoder::DecodeModRM_16(X86Tables::DecodedOperand *Operand, X86Tables::ModR // if mod = 0b10 // All encodings gain 16bit displacement // 0b110 = [BP] + disp16 - uint32_t Literal{}; - uint8_t DisplacementSize{}; - if ((ModRM.mod == 0 && ModRM.rm == 0b110) || - ModRM.mod == 0b10) { + uint32_t Literal {}; + uint8_t DisplacementSize {}; + if ((ModRM.mod == 0 && ModRM.rm == 0b110) || ModRM.mod == 0b10) { DisplacementSize = 2; - } - else if (ModRM.mod == 0b01) { + } else if (ModRM.mod == 0b01) { DisplacementSize = 1; } if (DisplacementSize) { @@ -200,29 +183,25 @@ void Decoder::DecodeModRM_16(X86Tables::DecodedOperand *Operand, X86Tables::ModR Operand->Data.SIB.Index = it.Index; } -void Decoder::DecodeModRM_64(X86Tables::DecodedOperand *Operand, X86Tables::ModRMDecoded ModRM) { - uint8_t Displacement{}; +void Decoder::DecodeModRM_64(X86Tables::DecodedOperand* Operand, X86Tables::ModRMDecoded ModRM) { + uint8_t Displacement {}; // Do we have an offset? if (ModRM.mod == 0b01) { Displacement = 1; - } - else if (ModRM.mod == 0b10) { + } else if (ModRM.mod == 0b10) { Displacement = 4; - } - else if (ModRM.mod == 0 && ModRM.rm == 0b101) { + } else if (ModRM.mod == 0 && ModRM.rm == 0b101) { Displacement = 4; } // Calculate SIB - bool HasSIB = ((ModRM.mod != 0b11) && - (ModRM.rm == 0b100)); + bool HasSIB = ((ModRM.mod != 0b11) && (ModRM.rm == 0b100)); if (HasSIB) { FEXCore::X86Tables::SIBDecoded SIB; if (DecodeInst->DecodedSIB) { SIB.Hex = DecodeInst->SIB; - } - else { + } else { // Haven't yet grabbed SIB, pull it now DecodeInst->SIB = ReadByte(); SIB.Hex = DecodeInst->SIB; @@ -250,7 +229,7 @@ void Decoder::DecodeModRM_64(X86Tables::DecodedOperand *Operand, X86Tables::ModR const uint8_t BaseREX = (DecodeInst->Flags & DecodeFlags::FLAG_REX_XGPR_B) != 0 ? 1 : 0; Operand->Data.SIB.Index = MapModRMToReg(IndexREX, SIB.index, false, false, IsIndexVector, false, 0b100); - Operand->Data.SIB.Base = MapModRMToReg(BaseREX, SIB.base, false, false, false, false, ModRM.mod == 0 ? 0b101 : 16); + Operand->Data.SIB.Base = MapModRMToReg(BaseREX, SIB.base, false, false, false, false, ModRM.mod == 0 ? 0b101 : 16); } LOGMAN_THROW_AA_FMT(Displacement <= 4, "Number of bytes should be <= 4 for literal src"); @@ -262,8 +241,7 @@ void Decoder::DecodeModRM_64(X86Tables::DecodedOperand *Operand, X86Tables::ModR } Operand->Data.SIB.Offset = Literal; } - } - else if (ModRM.mod == 0) { + } else if (ModRM.mod == 0) { // Explained in Table 1-14. "Operand Addressing Using ModRM and SIB Bytes" if (ModRM.rm == 0b101) { // 32bit Displacement @@ -271,14 +249,12 @@ void Decoder::DecodeModRM_64(X86Tables::DecodedOperand *Operand, X86Tables::ModR Operand->Type = DecodedOperand::OpType::RIPRelative; Operand->Data.RIPLiteral.Value.u = Literal; - } - else { + } else { // Register-direct addressing Operand->Type = DecodedOperand::OpType::GPRDirect; Operand->Data.GPR.GPR = MapModRMToReg(DecodeInst->Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, ModRM.rm, false, false, false, false); } - } - else { + } else { uint8_t DisplacementSize = ModRM.mod == 1 ? 1 : 4; uint32_t Literal = ReadData(DisplacementSize); if (DisplacementSize == 1) { @@ -291,7 +267,7 @@ void Decoder::DecodeModRM_64(X86Tables::DecodedOperand *Operand, X86Tables::ModR } } -bool Decoder::NormalOp(FEXCore::X86Tables::X86InstInfo const *Info, uint16_t Op, DecodedHeader Options) { +bool Decoder::NormalOp(const FEXCore::X86Tables::X86InstInfo* Info, uint16_t Op, DecodedHeader Options) { DecodeInst->OP = Op; DecodeInst->TableInfo = Info; @@ -305,42 +281,41 @@ bool Decoder::NormalOp(FEXCore::X86Tables::X86InstInfo const *Info, uint16_t Op, return false; } - LOGMAN_THROW_AA_FMT(!(Info->Type >= FEXCore::X86Tables::TYPE_GROUP_1 && Info->Type <= FEXCore::X86Tables::TYPE_GROUP_P), - "Group Ops should have been decoded before this!"); + LOGMAN_THROW_AA_FMT(!(Info->Type >= FEXCore::X86Tables::TYPE_GROUP_1 && Info->Type <= FEXCore::X86Tables::TYPE_GROUP_P), "Group Ops " + "should have " + "been decoded " + "before this!"); - uint8_t DestSize{}; - const bool HasWideningDisplacement = (FEXCore::X86Tables::DecodeFlags::GetOpAddr(DecodeInst->Flags, 0) & FEXCore::X86Tables::DecodeFlags::FLAG_WIDENING_SIZE_LAST) != 0 || - (Options.w && CTX->Config.Is64BitMode); - const bool HasNarrowingDisplacement = (FEXCore::X86Tables::DecodeFlags::GetOpAddr(DecodeInst->Flags, 0) & FEXCore::X86Tables::DecodeFlags::FLAG_OPERAND_SIZE_LAST) != 0; + uint8_t DestSize {}; + const bool HasWideningDisplacement = + (FEXCore::X86Tables::DecodeFlags::GetOpAddr(DecodeInst->Flags, 0) & FEXCore::X86Tables::DecodeFlags::FLAG_WIDENING_SIZE_LAST) != 0 || + (Options.w && CTX->Config.Is64BitMode); + const bool HasNarrowingDisplacement = + (FEXCore::X86Tables::DecodeFlags::GetOpAddr(DecodeInst->Flags, 0) & FEXCore::X86Tables::DecodeFlags::FLAG_OPERAND_SIZE_LAST) != 0; const bool HasXMMFlags = (Info->Flags & InstFlags::FLAGS_XMM_FLAGS) != 0; - bool HasXMMSrc = HasXMMFlags && - !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_SRC_GPR) && - !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_MMX_SRC); - bool HasXMMDst = HasXMMFlags && - !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_DST_GPR) && - !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_MMX_DST); - bool HasMMSrc = HasXMMFlags && - !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_SRC_GPR) && - HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_MMX_SRC); - bool HasMMDst = HasXMMFlags && - !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_DST_GPR) && - HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_MMX_DST); + bool HasXMMSrc = + HasXMMFlags && !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_SRC_GPR) && !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_MMX_SRC); + bool HasXMMDst = + HasXMMFlags && !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_DST_GPR) && !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_MMX_DST); + bool HasMMSrc = + HasXMMFlags && !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_SRC_GPR) && HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_MMX_SRC); + bool HasMMDst = + HasXMMFlags && !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_DST_GPR) && HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_MMX_DST); // Is ModRM present via explicit instruction encoded or REX? const bool HasMODRM = !!(Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_MODRM); const bool HasREX = !!(DecodeInst->Flags & DecodeFlags::FLAG_REX_PREFIX); - const bool Has16BitAddressing = !CTX->Config.Is64BitMode && - DecodeInst->Flags & DecodeFlags::FLAG_ADDRESS_SIZE; + const bool Has16BitAddressing = !CTX->Config.Is64BitMode && DecodeInst->Flags & DecodeFlags::FLAG_ADDRESS_SIZE; // This is used for ModRM register modification // For both modrm.reg and modrm.rm(when mod == 0b11) when value is >= 0b100 // then it changes from expected registers to the high 8bits of the lower registers // Bit annoying to support // In the case of no modrm (REX in byte situation) then it is unaffected - bool Is8BitSrc{}; - bool Is8BitDest{}; + bool Is8BitSrc {}; + bool Is8BitDest {}; // If we require ModRM and haven't decoded it yet, do it now // Some instructions have to read modrm upfront, others do it later @@ -359,12 +334,10 @@ bool Decoder::NormalOp(FEXCore::X86Tables::X86InstInfo const *Info, uint16_t Op, DecodeInst->Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_8BIT); DestSize = 1; Is8BitDest = true; - } - else if (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_16BIT) { + } else if (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_16BIT) { DecodeInst->Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_16BIT); DestSize = 2; - } - else if (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_128BIT) { + } else if (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_128BIT) { if (Options.L) { DecodeInst->Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_256BIT); DestSize = 32; @@ -372,28 +345,21 @@ bool Decoder::NormalOp(FEXCore::X86Tables::X86InstInfo const *Info, uint16_t Op, DecodeInst->Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_128BIT); DestSize = 16; } - } - else if (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_256BIT) { + } else if (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_256BIT) { DecodeInst->Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_256BIT); DestSize = 32; - } - else if (HasNarrowingDisplacement && - (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_DEF || - DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BITDEF)) { + } else if (HasNarrowingDisplacement && + (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_DEF || DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BITDEF)) { // See table 1-2. Operand-Size Overrides for this decoding // If the default operating mode is 32bit and we have the operand size flag then the operating size drops to 16bit DecodeInst->Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_16BIT); DestSize = 2; - } - else if ( - (HasXMMDst || HasMMDst || CTX->Config.Is64BitMode) && - (HasWideningDisplacement || - DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BIT || - DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BITDEF)) { + } else if ((HasXMMDst || HasMMDst || CTX->Config.Is64BitMode) && + (HasWideningDisplacement || DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BIT || + DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BITDEF)) { DecodeInst->Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_64BIT); DestSize = 8; - } - else { + } else { DecodeInst->Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_32BIT); DestSize = 4; } @@ -402,50 +368,41 @@ bool Decoder::NormalOp(FEXCore::X86Tables::X86InstInfo const *Info, uint16_t Op, if (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_8BIT) { DecodeInst->Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_8BIT); Is8BitSrc = true; - } - else if (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_16BIT) { + } else if (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_16BIT) { DecodeInst->Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_16BIT); - } - else if (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_128BIT) { + } else if (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_128BIT) { if (Options.L) { DecodeInst->Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_256BIT); } else { DecodeInst->Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_128BIT); } - } - else if (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_256BIT) { + } else if (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_256BIT) { DecodeInst->Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_256BIT); - } - else if (HasNarrowingDisplacement && - (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_DEF || - SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BITDEF)) { + } else if (HasNarrowingDisplacement && + (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_DEF || SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BITDEF)) { // See table 1-2. Operand-Size Overrides for this decoding // If the default operating mode is 32bit and we have the operand size flag then the operating size drops to 16bit DecodeInst->Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_16BIT); - } - else if ( - (HasXMMSrc || HasMMSrc || CTX->Config.Is64BitMode) && - (HasWideningDisplacement || - SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BIT || - SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BITDEF)) { + } else if ((HasXMMSrc || HasMMSrc || CTX->Config.Is64BitMode) && + (HasWideningDisplacement || SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BIT || + SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BITDEF)) { DecodeInst->Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_64BIT); - } - else { + } else { DecodeInst->Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_32BIT); } } - auto *CurrentDest = &DecodeInst->Dest; + auto* CurrentDest = &DecodeInst->Dest; if (HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_DST_RAX) || HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_DST_RDX)) { // Some instructions hardcode their destination as RAX CurrentDest->Type = DecodedOperand::OpType::GPR; CurrentDest->Data.GPR.HighBits = false; - CurrentDest->Data.GPR.GPR = HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_DST_RAX) ? FEXCore::X86State::REG_RAX : FEXCore::X86State::REG_RDX; + CurrentDest->Data.GPR.GPR = + HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_DST_RAX) ? FEXCore::X86State::REG_RAX : FEXCore::X86State::REG_RDX; CurrentDest = &DecodeInst->Src[0]; - } - else if (HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_REX_IN_BYTE)) { + } else if (HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_REX_IN_BYTE)) { LOGMAN_THROW_AA_FMT(!HasMODRM, "This instruction shouldn't have ModRM!"); // If the REX is in the byte that means the lower nibble of the OP contains the destination GPR @@ -454,10 +411,12 @@ bool Decoder::NormalOp(FEXCore::X86Tables::X86InstInfo const *Info, uint16_t Op, // If there is a REX prefix then that allows extended GPR usage CurrentDest->Type = DecodedOperand::OpType::GPR; DecodeInst->Dest.Data.GPR.HighBits = (Is8BitDest && !HasREX && (Op & 0b111) >= 0b100); - CurrentDest->Data.GPR.GPR = MapModRMToReg(DecodeInst->Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, Op & 0b111, Is8BitDest, HasREX, false, false); + CurrentDest->Data.GPR.GPR = + MapModRMToReg(DecodeInst->Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, Op & 0b111, Is8BitDest, HasREX, false, false); - if (CurrentDest->Data.GPR.GPR == FEXCore::X86State::REG_INVALID) + if (CurrentDest->Data.GPR.GPR == FEXCore::X86State::REG_INVALID) { return false; + } } uint8_t Bytes = Info->MoreBytes; @@ -469,13 +428,13 @@ bool Decoder::NormalOp(FEXCore::X86Tables::X86InstInfo const *Info, uint16_t Op, Bytes >>= 1; } - if ((Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_MEM_OFFSET) && - (DecodeInst->Flags & DecodeFlags::FLAG_ADDRESS_SIZE)) { + if ((Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_MEM_OFFSET) && (DecodeInst->Flags & DecodeFlags::FLAG_ADDRESS_SIZE)) { // If we have a memory offset and have the address size override then divide it just like narrowing displacement Bytes >>= 1; } - auto ModRMOperand = [&](FEXCore::X86Tables::DecodedOperand &GPR, FEXCore::X86Tables::DecodedOperand &NonGPR, bool HasXMMGPR, bool HasXMMNonGPR, bool HasMMGPR, bool HasMMNonGPR, bool GPR8Bit, bool NonGPR8Bit) { + auto ModRMOperand = [&](FEXCore::X86Tables::DecodedOperand& GPR, FEXCore::X86Tables::DecodedOperand& NonGPR, bool HasXMMGPR, + bool HasXMMNonGPR, bool HasMMGPR, bool HasMMNonGPR, bool GPR8Bit, bool NonGPR8Bit) { FEXCore::X86Tables::ModRMDecoded ModRM; ModRM.Hex = DecodeInst->ModRM; @@ -484,19 +443,21 @@ bool Decoder::NormalOp(FEXCore::X86Tables::X86InstInfo const *Info, uint16_t Op, GPR.Data.GPR.HighBits = (GPR8Bit && ModRM.reg >= 0b100 && !HasREX); GPR.Data.GPR.GPR = MapModRMToReg(DecodeInst->Flags & DecodeFlags::FLAG_REX_XGPR_R ? 1 : 0, ModRM.reg, GPR8Bit, HasREX, HasXMMGPR, HasMMGPR); - if (GPR.Data.GPR.GPR == FEXCore::X86State::REG_INVALID) + if (GPR.Data.GPR.GPR == FEXCore::X86State::REG_INVALID) { return false; + } // ModRM.mod == 0b11 == Register // ModRM.Mod != 0b11 == Register-direct addressing if (ModRM.mod == 0b11) { NonGPR.Type = DecodedOperand::OpType::GPR; NonGPR.Data.GPR.HighBits = (NonGPR8Bit && ModRM.rm >= 0b100 && !HasREX); - NonGPR.Data.GPR.GPR = MapModRMToReg(DecodeInst->Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, ModRM.rm, NonGPR8Bit, HasREX, HasXMMNonGPR, HasMMNonGPR); - if (NonGPR.Data.GPR.GPR == FEXCore::X86State::REG_INVALID) + NonGPR.Data.GPR.GPR = + MapModRMToReg(DecodeInst->Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, ModRM.rm, NonGPR8Bit, HasREX, HasXMMNonGPR, HasMMNonGPR); + if (NonGPR.Data.GPR.GPR == FEXCore::X86State::REG_INVALID) { return false; - } - else { + } + } else { // Only decode if we haven't pre-decoded if (NonGPR.IsNone()) { auto Disp = DecodeModRMs_Disp[Has16BitAddressing]; @@ -523,12 +484,13 @@ bool Decoder::NormalOp(FEXCore::X86Tables::X86InstInfo const *Info, uint16_t Op, if (Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_MODRM) { if (Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SF_MOD_DST) { - if (!ModRMOperand(DecodeInst->Src[CurrentSrc], DecodeInst->Dest, HasXMMSrc, HasXMMDst, HasMMSrc, HasMMDst, Is8BitSrc, Is8BitDest)) + if (!ModRMOperand(DecodeInst->Src[CurrentSrc], DecodeInst->Dest, HasXMMSrc, HasXMMDst, HasMMSrc, HasMMDst, Is8BitSrc, Is8BitDest)) { return false; - } - else { - if (!ModRMOperand(DecodeInst->Dest, DecodeInst->Src[CurrentSrc], HasXMMDst, HasXMMSrc, HasMMDst, HasMMSrc, Is8BitDest, Is8BitSrc)) + } + } else { + if (!ModRMOperand(DecodeInst->Dest, DecodeInst->Src[CurrentSrc], HasXMMDst, HasXMMSrc, HasMMDst, HasMMSrc, Is8BitDest, Is8BitSrc)) { return false; + } } ++CurrentSrc; } @@ -545,8 +507,7 @@ bool Decoder::NormalOp(FEXCore::X86Tables::X86InstInfo const *Info, uint16_t Op, DecodeInst->Src[CurrentSrc].Data.GPR.HighBits = false; DecodeInst->Src[CurrentSrc].Data.GPR.GPR = FEXCore::X86State::REG_RAX; ++CurrentSrc; - } - else if (HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_SRC_RCX)) { + } else if (HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_SRC_RCX)) { DecodeInst->Src[CurrentSrc].Type = DecodedOperand::OpType::GPR; DecodeInst->Src[CurrentSrc].Data.GPR.HighBits = false; DecodeInst->Src[CurrentSrc].Data.GPR.GPR = FEXCore::X86State::REG_RCX; @@ -566,15 +527,13 @@ bool Decoder::NormalOp(FEXCore::X86Tables::X86InstInfo const *Info, uint16_t Op, uint64_t Literal = ReadData(Bytes); - if ((Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SRC_SEXT) || - (DecodeFlags::GetSizeDstFlags(DecodeInst->Flags) == DecodeFlags::SIZE_64BIT && Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SRC_SEXT64BIT)) { + if ((Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SRC_SEXT) || (DecodeFlags::GetSizeDstFlags(DecodeInst->Flags) == DecodeFlags::SIZE_64BIT && + Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SRC_SEXT64BIT)) { if (Bytes == 1) { Literal = static_cast(Literal); - } - else if (Bytes == 2) { + } else if (Bytes == 2) { Literal = static_cast(Literal); - } - else { + } else { Literal = static_cast(Literal); } DecodeInst->Src[CurrentSrc].Data.Literal.Size = DestSize; @@ -585,13 +544,13 @@ bool Decoder::NormalOp(FEXCore::X86Tables::X86InstInfo const *Info, uint16_t Op, DecodeInst->Src[CurrentSrc].Data.Literal.Value = Literal; } - LOGMAN_THROW_AA_FMT(Bytes == 0, "Inst at 0x{:x}: 0x{:04x} '{}' Had an instruction of size {} with {} remaining", - DecodeInst->PC, DecodeInst->OP, DecodeInst->TableInfo->Name ?: "UND", InstructionSize, Bytes); + LOGMAN_THROW_AA_FMT(Bytes == 0, "Inst at 0x{:x}: 0x{:04x} '{}' Had an instruction of size {} with {} remaining", DecodeInst->PC, + DecodeInst->OP, DecodeInst->TableInfo->Name ?: "UND", InstructionSize, Bytes); DecodeInst->InstSize = InstructionSize; return true; } -bool Decoder::NormalOpHeader(FEXCore::X86Tables::X86InstInfo const *Info, uint16_t Op) { +bool Decoder::NormalOpHeader(const FEXCore::X86Tables::X86InstInfo* Info, uint16_t Op) { DecodeInst->OP = Op; DecodeInst->TableInfo = Info; @@ -605,15 +564,12 @@ bool Decoder::NormalOpHeader(FEXCore::X86Tables::X86InstInfo const *Info, uint16 return false; } - LOGMAN_THROW_AA_FMT(Info->Type != FEXCore::X86Tables::TYPE_REX_PREFIX, - "REX PREFIX should have been decoded before this!"); + LOGMAN_THROW_AA_FMT(Info->Type != FEXCore::X86Tables::TYPE_REX_PREFIX, "REX PREFIX should have been decoded before this!"); // A normal instruction is the most likely. if (Info->Type == FEXCore::X86Tables::TYPE_INST) [[likely]] { return NormalOp(Info, Op); - } - else if (Info->Type >= FEXCore::X86Tables::TYPE_GROUP_1 && - Info->Type <= FEXCore::X86Tables::TYPE_GROUP_11) { + } else if (Info->Type >= FEXCore::X86Tables::TYPE_GROUP_1 && Info->Type <= FEXCore::X86Tables::TYPE_GROUP_11) { uint8_t ModRMByte = ReadByte(); DecodeInst->ModRM = ModRMByte; DecodeInst->DecodedModRM = true; @@ -625,9 +581,7 @@ bool Decoder::NormalOpHeader(FEXCore::X86Tables::X86InstInfo const *Info, uint16 Op = OPD(Info->Type, Info->MoreBytes, ModRM.reg); return NormalOp(&PrimaryInstGroupOps[Op], Op); #undef OPD - } - else if (Info->Type >= FEXCore::X86Tables::TYPE_GROUP_6 && - Info->Type <= FEXCore::X86Tables::TYPE_GROUP_P) { + } else if (Info->Type >= FEXCore::X86Tables::TYPE_GROUP_6 && Info->Type <= FEXCore::X86Tables::TYPE_GROUP_P) { #define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_6) << 5) | (prefix) << 3 | (Reg)) constexpr uint16_t PF_NONE = 0; constexpr uint16_t PF_F3 = 1; @@ -635,12 +589,13 @@ bool Decoder::NormalOpHeader(FEXCore::X86Tables::X86InstInfo const *Info, uint16 constexpr uint16_t PF_F2 = 3; uint16_t PrefixType = PF_NONE; - if (DecodeInst->LastEscapePrefix == 0xF3) + if (DecodeInst->LastEscapePrefix == 0xF3) { PrefixType = PF_F3; - else if (DecodeInst->LastEscapePrefix == 0xF2) + } else if (DecodeInst->LastEscapePrefix == 0xF2) { PrefixType = PF_F2; - else if (DecodeInst->LastEscapePrefix == 0x66) + } else if (DecodeInst->LastEscapePrefix == 0x66) { PrefixType = PF_66; + } // We have ModRM uint8_t ModRMByte = ReadByte(); @@ -651,31 +606,22 @@ bool Decoder::NormalOpHeader(FEXCore::X86Tables::X86InstInfo const *Info, uint16 ModRM.Hex = DecodeInst->ModRM; uint16_t LocalOp = OPD(Info->Type, PrefixType, ModRM.reg); - FEXCore::X86Tables::X86InstInfo *LocalInfo = &SecondInstGroupOps[LocalOp]; + FEXCore::X86Tables::X86InstInfo* LocalInfo = &SecondInstGroupOps[LocalOp]; #undef OPD if (LocalInfo->Type == FEXCore::X86Tables::TYPE_SECOND_GROUP_MODRM) { // Everything in this group is privileged instructions aside from XGETBV constexpr std::array RegToField = { - 255, - 0, - 1, - 2, - 255, - 255, - 255, - 3, + 255, 0, 1, 2, 255, 255, 255, 3, }; uint8_t Field = RegToField[ModRM.reg]; LOGMAN_THROW_AA_FMT(Field != 255, "Invalid field selected!"); LocalOp = (Field << 3) | ModRM.rm; return NormalOp(&SecondModRMTableOps[LocalOp], LocalOp); - } - else { + } else { return NormalOp(&SecondInstGroupOps[LocalOp], LocalOp); } - } - else if (Info->Type == FEXCore::X86Tables::TYPE_X87_TABLE_PREFIX) { + } else if (Info->Type == FEXCore::X86Tables::TYPE_X87_TABLE_PREFIX) { // We have ModRM uint8_t ModRMByte = ReadByte(); DecodeInst->ModRM = ModRMByte; @@ -683,13 +629,12 @@ bool Decoder::NormalOpHeader(FEXCore::X86Tables::X86InstInfo const *Info, uint16 uint16_t X87Op = ((Op - 0xD8) << 8) | ModRMByte; return NormalOp(&X87Ops[X87Op], X87Op); - } - else if (Info->Type == FEXCore::X86Tables::TYPE_VEX_TABLE_PREFIX) { + } else if (Info->Type == FEXCore::X86Tables::TYPE_VEX_TABLE_PREFIX) { FEXCORE_TELEMETRY_SET(VEXOpTelem, 1); uint16_t map_select = 1; uint16_t pp = 0; const uint8_t Byte1 = ReadByte(); - DecodedHeader options{}; + DecodedHeader options {}; if ((Byte1 & 0b10000000) == 0) { LOGMAN_THROW_A_FMT(CTX->Config.Is64BitMode, "VEX.R shouldn't be 0 in 32-bit mode!"); @@ -700,8 +645,7 @@ bool Decoder::NormalOpHeader(FEXCore::X86Tables::X86InstInfo const *Info, uint16 pp = Byte1 & 0b11; options.vvvv = 15 - ((Byte1 & 0b01111000) >> 3); options.L = (Byte1 & 0b100) != 0; - } - else { // 0xC4 = Three byte VEX + } else { // 0xC4 = Three byte VEX const uint8_t Byte2 = ReadByte(); pp = Byte2 & 0b11; map_select = Byte1 & 0b11111; @@ -726,18 +670,17 @@ bool Decoder::NormalOpHeader(FEXCore::X86Tables::X86InstInfo const *Info, uint16 Op = OPD(map_select, pp, VEXOp); #undef OPD - FEXCore::X86Tables::X86InstInfo *LocalInfo = &VEXTableOps[Op]; + FEXCore::X86Tables::X86InstInfo* LocalInfo = &VEXTableOps[Op]; - if (LocalInfo->Type >= FEXCore::X86Tables::TYPE_VEX_GROUP_12 && - LocalInfo->Type <= FEXCore::X86Tables::TYPE_VEX_GROUP_17) { - FEXCORE_TELEMETRY_SET(VEXOpTelem, 1); - // We have ModRM - uint8_t ModRMByte = ReadByte(); - DecodeInst->ModRM = ModRMByte; - DecodeInst->DecodedModRM = true; + if (LocalInfo->Type >= FEXCore::X86Tables::TYPE_VEX_GROUP_12 && LocalInfo->Type <= FEXCore::X86Tables::TYPE_VEX_GROUP_17) { + FEXCORE_TELEMETRY_SET(VEXOpTelem, 1); + // We have ModRM + uint8_t ModRMByte = ReadByte(); + DecodeInst->ModRM = ModRMByte; + DecodeInst->DecodedModRM = true; - FEXCore::X86Tables::ModRMDecoded ModRM; - ModRM.Hex = DecodeInst->ModRM; + FEXCore::X86Tables::ModRMDecoded ModRM; + ModRM.Hex = DecodeInst->ModRM; #define OPD(group, pp, opcode) (((group - TYPE_VEX_GROUP_12) << 4) | (pp << 3) | (opcode)) Op = OPD(LocalInfo->Type, pp, ModRM.reg); @@ -746,8 +689,7 @@ bool Decoder::NormalOpHeader(FEXCore::X86Tables::X86InstInfo const *Info, uint16 } else { return NormalOp(LocalInfo, Op, options); } - } - else if (Info->Type == FEXCore::X86Tables::TYPE_GROUP_EVEX) { + } else if (Info->Type == FEXCore::X86Tables::TYPE_GROUP_EVEX) { FEXCORE_TELEMETRY_SET(EVEXOpTelem, 1); /* uint8_t P1 = */ ReadByte(); @@ -769,15 +711,17 @@ bool Decoder::DecodeInstruction(uint64_t PC) { memset(DecodeInst, 0, sizeof(DecodedInst)); DecodeInst->PC = PC; - for(;;) { - if (InstructionSize >= MAX_INST_SIZE) + for (;;) { + if (InstructionSize >= MAX_INST_SIZE) { return false; + } uint8_t Op = ReadByte(); switch (Op) { - case 0x0F: {// Escape Op + case 0x0F: { // Escape Op uint8_t EscapeOp = ReadByte(); switch (EscapeOp) { - case 0x0F: [[unlikely]] { // 3DNow! + case 0x0F: + [[unlikely]] { // 3DNow! // 3DNow! Instruction Encoding: 0F 0F [ModRM] [SIB] [Displacement] [Opcode] // Decode ModRM uint8_t ModRMByte = ReadByte(); @@ -787,8 +731,7 @@ bool Decoder::DecodeInstruction(uint64_t PC) { FEXCore::X86Tables::ModRMDecoded ModRM; ModRM.Hex = DecodeInst->ModRM; - const bool Has16BitAddressing = !CTX->Config.Is64BitMode && - DecodeInst->Flags & DecodeFlags::FLAG_ADDRESS_SIZE; + const bool Has16BitAddressing = !CTX->Config.Is64BitMode && DecodeInst->Flags & DecodeFlags::FLAG_ADDRESS_SIZE; // All 3DNow! instructions have the second argument as the rm handler // We need to decode it upfront to get the displacement out of the way @@ -800,55 +743,58 @@ bool Decoder::DecodeInstruction(uint64_t PC) { // Take a peek at the op just past the displacement uint8_t LocalOp = ReadByte(); return NormalOpHeader(&FEXCore::X86Tables::DDDNowOps[LocalOp], LocalOp); - break; + break; + } + case 0x38: { // F38 Table! + constexpr uint16_t PF_38_NONE = 0; + constexpr uint16_t PF_38_66 = (1U << 0); + constexpr uint16_t PF_38_F2 = (1U << 1); + constexpr uint16_t PF_38_F3 = (1U << 2); + + uint16_t Prefix = PF_38_NONE; + if (DecodeInst->Flags & DecodeFlags::FLAG_OPERAND_SIZE) { + Prefix |= PF_38_66; + } + if (DecodeInst->Flags & DecodeFlags::FLAG_REPNE_PREFIX) { + Prefix |= PF_38_F2; + } + if (DecodeInst->Flags & DecodeFlags::FLAG_REP_PREFIX) { + Prefix |= PF_38_F3; } - case 0x38: { // F38 Table! - constexpr uint16_t PF_38_NONE = 0; - constexpr uint16_t PF_38_66 = (1U << 0); - constexpr uint16_t PF_38_F2 = (1U << 1); - constexpr uint16_t PF_38_F3 = (1U << 2); - - uint16_t Prefix = PF_38_NONE; - if (DecodeInst->Flags & DecodeFlags::FLAG_OPERAND_SIZE) { - Prefix |= PF_38_66; - } - if (DecodeInst->Flags & DecodeFlags::FLAG_REPNE_PREFIX) { - Prefix |= PF_38_F2; - } - if (DecodeInst->Flags & DecodeFlags::FLAG_REP_PREFIX) { - Prefix |= PF_38_F3; - } - uint16_t LocalOp = (Prefix << 8) | ReadByte(); + uint16_t LocalOp = (Prefix << 8) | ReadByte(); - bool NoOverlay66 = (FEXCore::X86Tables::H0F38TableOps[LocalOp].Flags & InstFlags::FLAGS_NO_OVERLAY66) != 0; - if (DecodeInst->LastEscapePrefix == 0x66 && NoOverlay66) { // Operand Size - // Remove prefix so it doesn't effect calculations. - // This is only an escape prefix rather than modifier now - DecodeInst->Flags &= ~DecodeFlags::FLAG_OPERAND_SIZE; - DecodeFlags::PopOpAddrIf(&DecodeInst->Flags, DecodeFlags::FLAG_OPERAND_SIZE_LAST); - } + bool NoOverlay66 = (FEXCore::X86Tables::H0F38TableOps[LocalOp].Flags & InstFlags::FLAGS_NO_OVERLAY66) != 0; + if (DecodeInst->LastEscapePrefix == 0x66 && NoOverlay66) { // Operand Size + // Remove prefix so it doesn't effect calculations. + // This is only an escape prefix rather than modifier now + DecodeInst->Flags &= ~DecodeFlags::FLAG_OPERAND_SIZE; + DecodeFlags::PopOpAddrIf(&DecodeInst->Flags, DecodeFlags::FLAG_OPERAND_SIZE_LAST); + } - return NormalOpHeader(&FEXCore::X86Tables::H0F38TableOps[LocalOp], LocalOp); + return NormalOpHeader(&FEXCore::X86Tables::H0F38TableOps[LocalOp], LocalOp); break; + } + case 0x3A: { // F3A Table! + constexpr uint16_t PF_3A_NONE = 0; + constexpr uint16_t PF_3A_66 = (1 << 0); + constexpr uint16_t PF_3A_REX = (1 << 1); + + uint16_t Prefix = PF_3A_NONE; + if (DecodeInst->LastEscapePrefix == 0x66) { // Operand Size + Prefix = PF_3A_66; } - case 0x3A: { // F3A Table! - constexpr uint16_t PF_3A_NONE = 0; - constexpr uint16_t PF_3A_66 = (1 << 0); - constexpr uint16_t PF_3A_REX = (1 << 1); - uint16_t Prefix = PF_3A_NONE; - if (DecodeInst->LastEscapePrefix == 0x66) // Operand Size - Prefix = PF_3A_66; - - if (DecodeInst->Flags & DecodeFlags::FLAG_REX_WIDENING) - Prefix |= PF_3A_REX; + if (DecodeInst->Flags & DecodeFlags::FLAG_REX_WIDENING) { + Prefix |= PF_3A_REX; + } - uint16_t LocalOp = (Prefix << 8) | ReadByte(); - return NormalOpHeader(&FEXCore::X86Tables::H0F3ATableOps[LocalOp], LocalOp); + uint16_t LocalOp = (Prefix << 8) | ReadByte(); + return NormalOpHeader(&FEXCore::X86Tables::H0F3ATableOps[LocalOp], LocalOp); break; - } - default: [[likely]] { // Two byte table! + } + default: + [[likely]] { // Two byte table! // x86-64 abuses three legacy prefixes to extend the table encodings // 0x66 - Operand Size prefix // 0xF2 - REPNE prefix @@ -861,42 +807,38 @@ bool Decoder::DecodeInstruction(uint64_t PC) { if (NoOverlay) { // This section of the table ignores prefix extention return NormalOpHeader(&FEXCore::X86Tables::SecondBaseOps[EscapeOp], EscapeOp); - } - else if (DecodeInst->LastEscapePrefix == 0xF3) { // REP + } else if (DecodeInst->LastEscapePrefix == 0xF3) { // REP // Remove prefix so it doesn't effect calculations. // This is only an escape prefix rather tan modifier now DecodeInst->Flags &= ~DecodeFlags::FLAG_REP_PREFIX; return NormalOpHeader(&FEXCore::X86Tables::RepModOps[EscapeOp], EscapeOp); - } - else if (DecodeInst->LastEscapePrefix == 0xF2) { // REPNE + } else if (DecodeInst->LastEscapePrefix == 0xF2) { // REPNE // Remove prefix so it doesn't effect calculations. // This is only an escape prefix rather tan modifier now DecodeInst->Flags &= ~DecodeFlags::FLAG_REPNE_PREFIX; return NormalOpHeader(&FEXCore::X86Tables::RepNEModOps[EscapeOp], EscapeOp); - } - else if (DecodeInst->LastEscapePrefix == 0x66 && !NoOverlay66) { // Operand Size + } else if (DecodeInst->LastEscapePrefix == 0x66 && !NoOverlay66) { // Operand Size // Remove prefix so it doesn't effect calculations. // This is only an escape prefix rather tan modifier now DecodeInst->Flags &= ~DecodeFlags::FLAG_OPERAND_SIZE; DecodeFlags::PopOpAddrIf(&DecodeInst->Flags, DecodeFlags::FLAG_OPERAND_SIZE_LAST); return NormalOpHeader(&FEXCore::X86Tables::OpSizeModOps[EscapeOp], EscapeOp); - } - else { + } else { return NormalOpHeader(&FEXCore::X86Tables::SecondBaseOps[EscapeOp], EscapeOp); } - break; + break; } } - break; + break; } case 0x66: // Operand Size prefix DecodeInst->Flags |= DecodeFlags::FLAG_OPERAND_SIZE; DecodeInst->LastEscapePrefix = Op; DecodeFlags::PushOpAddr(&DecodeInst->Flags, DecodeFlags::FLAG_OPERAND_SIZE_LAST); - break; + break; case 0x67: // Address Size override prefix DecodeInst->Flags |= DecodeFlags::FLAG_ADDRESS_SIZE; - break; + break; case 0x26: // ES legacy prefix if (!CTX->Config.Is64BitMode) { DecodeInst->Flags |= DecodeFlags::FLAG_ES_PREFIX; @@ -920,56 +862,59 @@ bool Decoder::DecodeInstruction(uint64_t PC) { DecodeInst->Flags |= DecodeFlags::FLAG_DS_PREFIX; } break; - break; + break; case 0xF0: // LOCK prefix DecodeInst->Flags |= DecodeFlags::FLAG_LOCK; - break; + break; case 0xF2: // REPNE prefix DecodeInst->Flags |= DecodeFlags::FLAG_REPNE_PREFIX; DecodeInst->LastEscapePrefix = Op; - break; + break; case 0xF3: // REP prefix DecodeInst->Flags |= DecodeFlags::FLAG_REP_PREFIX; DecodeInst->LastEscapePrefix = Op; - break; + break; case 0x64: // FS prefix DecodeInst->Flags |= DecodeFlags::FLAG_FS_PREFIX; - break; + break; case 0x65: // GS prefix DecodeInst->Flags |= DecodeFlags::FLAG_GS_PREFIX; - break; - default: [[likely]] { // Default base table - auto Info = &FEXCore::X86Tables::BaseOps[Op]; + break; + default: + [[likely]] { // Default base table + auto Info = &FEXCore::X86Tables::BaseOps[Op]; - if (Info->Type == FEXCore::X86Tables::TYPE_REX_PREFIX) { - LOGMAN_THROW_A_FMT(CTX->Config.Is64BitMode, "Got REX prefix in 32bit mode"); - DecodeInst->Flags |= DecodeFlags::FLAG_REX_PREFIX; + if (Info->Type == FEXCore::X86Tables::TYPE_REX_PREFIX) { + LOGMAN_THROW_A_FMT(CTX->Config.Is64BitMode, "Got REX prefix in 32bit mode"); + DecodeInst->Flags |= DecodeFlags::FLAG_REX_PREFIX; - // Widening displacement - if (Op & 0b1000) { - DecodeInst->Flags |= DecodeFlags::FLAG_REX_WIDENING; - DecodeFlags::PushOpAddr(&DecodeInst->Flags, DecodeFlags::FLAG_WIDENING_SIZE_LAST); - } + // Widening displacement + if (Op & 0b1000) { + DecodeInst->Flags |= DecodeFlags::FLAG_REX_WIDENING; + DecodeFlags::PushOpAddr(&DecodeInst->Flags, DecodeFlags::FLAG_WIDENING_SIZE_LAST); + } + + // XGPR_B bit set + if (Op & 0b0001) { + DecodeInst->Flags |= DecodeFlags::FLAG_REX_XGPR_B; + } - // XGPR_B bit set - if (Op & 0b0001) - DecodeInst->Flags |= DecodeFlags::FLAG_REX_XGPR_B; + // XGPR_X bit set + if (Op & 0b0010) { + DecodeInst->Flags |= DecodeFlags::FLAG_REX_XGPR_X; + } - // XGPR_X bit set - if (Op & 0b0010) - DecodeInst->Flags |= DecodeFlags::FLAG_REX_XGPR_X; + // XGPR_R bit set + if (Op & 0b0100) { + DecodeInst->Flags |= DecodeFlags::FLAG_REX_XGPR_R; + } + } else { + return NormalOpHeader(Info, Op); + } - // XGPR_R bit set - if (Op & 0b0100) - DecodeInst->Flags |= DecodeFlags::FLAG_REX_XGPR_R; - } else { - return NormalOpHeader(Info, Op); + break; } - - break; - } } - } if (DecodeInst->Dest.IsGPR()) { @@ -980,8 +925,9 @@ bool Decoder::DecodeInstruction(uint64_t PC) { } void Decoder::BranchTargetInMultiblockRange() { - if (!CTX->Config.Multiblock) + if (!CTX->Config.Multiblock) { return; + } // If the RIP setting is conditional AND within our symbol range then it can be considered for multiblock uint64_t TargetRIP = 0; @@ -989,32 +935,30 @@ void Decoder::BranchTargetInMultiblockRange() { bool Conditional = true; switch (DecodeInst->OP) { - case 0x70 ... 0x7F: // Conditional JUMP - case 0x80 ... 0x8F: { // More conditional - // Source is a literal - // auto RIPOffset = LoadSource(Op, Op->Src[0], Op->Flags); - // auto RIPTargetConst = _Constant(Op->PC + Op->InstSize); - // Target offset is PC + InstSize + Literal - LOGMAN_THROW_A_FMT(DecodeInst->Src[0].IsLiteral(), "Had wrong operand type"); - TargetRIP = DecodeInst->PC + DecodeInst->InstSize + DecodeInst->Src[0].Data.Literal.Value; - break; - } - case 0xE9: - case 0xEB: // Both are unconditional JMP instructions - LOGMAN_THROW_A_FMT(DecodeInst->Src[0].IsLiteral(), "Had wrong operand type"); - TargetRIP = DecodeInst->PC + DecodeInst->InstSize + DecodeInst->Src[0].Data.Literal.Value; - Conditional = false; + case 0x70 ... 0x7F: // Conditional JUMP + case 0x80 ... 0x8F: { // More conditional + // Source is a literal + // auto RIPOffset = LoadSource(Op, Op->Src[0], Op->Flags); + // auto RIPTargetConst = _Constant(Op->PC + Op->InstSize); + // Target offset is PC + InstSize + Literal + LOGMAN_THROW_A_FMT(DecodeInst->Src[0].IsLiteral(), "Had wrong operand type"); + TargetRIP = DecodeInst->PC + DecodeInst->InstSize + DecodeInst->Src[0].Data.Literal.Value; break; - case 0xE8: // Call - Immediate target, We don't want to inline calls - if (ExternalBranches) { - ExternalBranches->insert(DecodeInst->PC + DecodeInst->InstSize); - } - [[fallthrough]]; - case 0xC2: // RET imm - case 0xC3: // RET - default: - return; + } + case 0xE9: + case 0xEB: // Both are unconditional JMP instructions + LOGMAN_THROW_A_FMT(DecodeInst->Src[0].IsLiteral(), "Had wrong operand type"); + TargetRIP = DecodeInst->PC + DecodeInst->InstSize + DecodeInst->Src[0].Data.Literal.Value; + Conditional = false; break; + case 0xE8: // Call - Immediate target, We don't want to inline calls + if (ExternalBranches) { + ExternalBranches->insert(DecodeInst->PC + DecodeInst->InstSize); + } + [[fallthrough]]; + case 0xC2: // RET imm + case 0xC3: // RET + default: return; break; } if (GPRSize == 4) { @@ -1075,13 +1019,11 @@ bool Decoder::BranchTargetCanContinue(bool FinalInstruction) const { return false; } -const uint8_t *Decoder::AdjustAddrForSpecialRegion(uint8_t const* _InstStream, uint64_t EntryPoint, uint64_t RIP) { +const uint8_t* Decoder::AdjustAddrForSpecialRegion(const uint8_t* _InstStream, uint64_t EntryPoint, uint64_t RIP) { constexpr uint64_t VSyscall_Base = 0xFFFF'FFFF'FF60'0000ULL; constexpr uint64_t VSyscall_End = VSyscall_Base + 0x1000; - if (OSABI == FEXCore::HLE::SyscallOSABI::OS_LINUX64 && - RIP >= VSyscall_Base && - RIP < VSyscall_End) { + if (OSABI == FEXCore::HLE::SyscallOSABI::OS_LINUX64 && RIP >= VSyscall_Base && RIP < VSyscall_End) { // VSyscall // This doesn't exist on AArch64 and on x86_64 hosts this is emulated with faults to a region mapped with --xp permissions // Offset 0: vgettimeofday @@ -1094,7 +1036,8 @@ const uint8_t *Decoder::AdjustAddrForSpecialRegion(uint8_t const* _InstStream, u return _InstStream - EntryPoint + RIP; } -void Decoder::DecodeInstructionsAtEntry(uint8_t const* _InstStream, uint64_t PC, uint64_t MaxInst, std::function AddContainedCodePage) { +void Decoder::DecodeInstructionsAtEntry(const uint8_t* _InstStream, uint64_t PC, uint64_t MaxInst, + std::function AddContainedCodePage) { FEXCORE_PROFILE_SCOPED("DecodeInstructions"); BlockInfo.TotalInstructionCount = 0; BlockInfo.Blocks.clear(); @@ -1111,7 +1054,7 @@ void Decoder::DecodeInstructionsAtEntry(uint8_t const* _InstStream, uint64_t PC, EntryPoint = PC; InstStream = _InstStream; - uint64_t TotalInstructions{}; + uint64_t TotalInstructions {}; // If we don't have symbols available then we become a bit optimistic about multiblock ranges if (!SymbolAvailable) { @@ -1128,7 +1071,7 @@ void Decoder::DecodeInstructionsAtEntry(uint8_t const* _InstStream, uint64_t PC, uint64_t CurrentCodePage = PC & FHU::FEX_PAGE_MASK; - fextl::set CodePages = { CurrentCodePage }; + fextl::set CodePages = {CurrentCodePage}; AddContainedCodePage(PC, CurrentCodePage, FHU::FEX_PAGE_SIZE); @@ -1140,12 +1083,12 @@ void Decoder::DecodeInstructionsAtEntry(uint8_t const* _InstStream, uint64_t PC, auto BlockDecodeIt = BlocksToDecode.begin(); uint64_t RIPToDecode = *BlockDecodeIt; BlockInfo.Blocks.emplace_back(); - DecodedBlocks &CurrentBlockDecoding = BlockInfo.Blocks.back(); + DecodedBlocks& CurrentBlockDecoding = BlockInfo.Blocks.back(); CurrentBlockDecoding.Entry = RIPToDecode; uint64_t PCOffset = 0; - uint64_t BlockNumberOfInstructions{}; + uint64_t BlockNumberOfInstructions {}; uint64_t BlockStartOffset = DecodedSize; // Do a bit of pointer math to figure out where we are in code @@ -1192,15 +1135,12 @@ void Decoder::DecodeInstructionsAtEntry(uint8_t const* _InstStream, uint64_t PC, } bool CanContinue = false; - if (!(DecodeInst->TableInfo->Flags & - (FEXCore::X86Tables::InstFlags::FLAGS_BLOCK_END | FEXCore::X86Tables::InstFlags::FLAGS_SETS_RIP))) { + if (!(DecodeInst->TableInfo->Flags & (FEXCore::X86Tables::InstFlags::FLAGS_BLOCK_END | FEXCore::X86Tables::InstFlags::FLAGS_SETS_RIP))) { // If this isn't a block ender then we can keep going regardless CanContinue = true; } - bool FinalInstruction = DecodedSize >= MaxInst || - DecodedSize >= DefaultDecodedBufferSize || - TotalInstructions >= MaxInst; + bool FinalInstruction = DecodedSize >= MaxInst || DecodedSize >= DefaultDecodedBufferSize || TotalInstructions >= MaxInst; if (DecodeInst->TableInfo->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SETS_RIP) { // If we have multiblock enabled @@ -1234,10 +1174,9 @@ void Decoder::DecodeInstructionsAtEntry(uint8_t const* _InstStream, uint64_t PC, } // sort for better branching - std::sort(BlockInfo.Blocks.begin(), BlockInfo.Blocks.end(), [](const FEXCore::Frontend::Decoder::DecodedBlocks& a, const FEXCore::Frontend::Decoder::DecodedBlocks& b) { - return a.Entry < b.Entry; - }); -} - + std::sort( + BlockInfo.Blocks.begin(), BlockInfo.Blocks.end(), + [](const FEXCore::Frontend::Decoder::DecodedBlocks& a, const FEXCore::Frontend::Decoder::DecodedBlocks& b) { return a.Entry < b.Entry; }); } +} // namespace FEXCore::Frontend diff --git a/FEXCore/Source/Interface/Core/HostFeatures.cpp b/FEXCore/Source/Interface/Core/HostFeatures.cpp index e5f2dda373..c55dd7867d 100644 --- a/FEXCore/Source/Interface/Core/HostFeatures.cpp +++ b/FEXCore/Source/Interface/Core/HostFeatures.cpp @@ -28,23 +28,21 @@ namespace FEXCore { [[maybe_unused]] constexpr uint32_t DCZID_BS_MASK = 0b0'1111; #ifdef _M_ARM_64 -[[maybe_unused]] static uint32_t GetDCZID() { - uint64_t Result{}; - __asm("mrs %[Res], DCZID_EL0" - : [Res] "=r" (Result)); +[[maybe_unused]] +static uint32_t GetDCZID() { + uint64_t Result {}; + __asm("mrs %[Res], DCZID_EL0" : [Res] "=r"(Result)); return Result; } static uint32_t GetFPCR() { - uint64_t Result{}; - __asm ("mrs %[Res], FPCR" - : [Res] "=r" (Result)); + uint64_t Result {}; + __asm("mrs %[Res], FPCR" : [Res] "=r"(Result)); return Result; } static void SetFPCR(uint64_t Value) { - __asm ("msr FPCR, %[Value]" - :: [Value] "r" (Value)); + __asm("msr FPCR, %[Value]" ::[Value] "r"(Value)); } #else static uint32_t GetDCZID() { @@ -53,7 +51,7 @@ static uint32_t GetDCZID() { } #endif -static void OverrideFeatures(HostFeatures *Features) { +static void OverrideFeatures(HostFeatures* Features) { // Override features if the user has specifically called for it. FEX_CONFIG_OPT(HostFeatures, HOSTFEATURES); if (!HostFeatures()) { @@ -62,19 +60,19 @@ static void OverrideFeatures(HostFeatures *Features) { } #define ENABLE_DISABLE_OPTION(FeatureName, name, enum_name) \ - do { \ - const bool Disable##name = (HostFeatures() & FEXCore::Config::HostFeatures::DISABLE##enum_name) != 0; \ - const bool Enable##name = (HostFeatures() & FEXCore::Config::HostFeatures::ENABLE##enum_name) != 0; \ - LogMan::Throw::AFmt(!(Disable##name && Enable##name), "Disabling and Enabling CPU feature (" #name ") is mutually exclusive"); \ - const bool AlreadyEnabled = Features->FeatureName; \ - const bool Result = (AlreadyEnabled | Enable##name) & !Disable##name; \ - Features->FeatureName = Result; \ - } while (0) + do { \ + const bool Disable##name = (HostFeatures() & FEXCore::Config::HostFeatures::DISABLE##enum_name) != 0; \ + const bool Enable##name = (HostFeatures() & FEXCore::Config::HostFeatures::ENABLE##enum_name) != 0; \ + LogMan::Throw::AFmt(!(Disable##name && Enable##name), "Disabling and Enabling CPU feature (" #name ") is mutually exclusive"); \ + const bool AlreadyEnabled = Features->FeatureName; \ + const bool Result = (AlreadyEnabled | Enable##name) & !Disable##name; \ + Features->FeatureName = Result; \ + } while (0) #define GET_SINGLE_OPTION(name, enum_name) \ - const bool Disable##name = (HostFeatures() & FEXCore::Config::HostFeatures::DISABLE##enum_name) != 0; \ - const bool Enable##name = (HostFeatures() & FEXCore::Config::HostFeatures::ENABLE##enum_name) != 0; \ - LogMan::Throw::AFmt(!(Disable##name && Enable##name), "Disabling and Enabling CPU feature (" #name ") is mutually exclusive"); + const bool Disable##name = (HostFeatures() & FEXCore::Config::HostFeatures::DISABLE##enum_name) != 0; \ + const bool Enable##name = (HostFeatures() & FEXCore::Config::HostFeatures::ENABLE##enum_name) != 0; \ + LogMan::Throw::AFmt(!(Disable##name && Enable##name), "Disabling and Enabling CPU feature (" #name ") is mutually exclusive"); ENABLE_DISABLE_OPTION(SupportsAVX, AVX, AVX); ENABLE_DISABLE_OPTION(SupportsAVX2, AVX2, AVX2); @@ -102,8 +100,7 @@ static void OverrideFeatures(HostFeatures *Features) { Features->SupportsCRC = true; Features->SupportsSHA = true; Features->SupportsPMULL_128Bit = true; - } - else if (DisableCrypto) { + } else if (DisableCrypto) { Features->SupportsAES = false; Features->SupportsCRC = false; Features->SupportsSHA = false; @@ -127,8 +124,7 @@ HostFeatures::HostFeatures() { SupportsAES = Features.Has(vixl::CPUFeatures::Feature::kAES); SupportsCRC = Features.Has(vixl::CPUFeatures::Feature::kCRC32); - SupportsSHA = Features.Has(vixl::CPUFeatures::Feature::kSHA1) && - Features.Has(vixl::CPUFeatures::Feature::kSHA2); + SupportsSHA = Features.Has(vixl::CPUFeatures::Feature::kSHA1) && Features.Has(vixl::CPUFeatures::Feature::kSHA2); SupportsAtomics = Features.Has(vixl::CPUFeatures::Feature::kAtomics); SupportsRAND = Features.Has(vixl::CPUFeatures::Feature::kRNG); @@ -151,8 +147,7 @@ HostFeatures::HostFeatures() { SupportsAVX = true; #else SupportsSVE = Features.Has(vixl::CPUFeatures::Feature::kSVE); - SupportsAVX = Features.Has(vixl::CPUFeatures::Feature::kSVE2) && - vixl::aarch64::CPU::ReadSVEVectorLengthInBits() >= 256; + SupportsAVX = Features.Has(vixl::CPUFeatures::Feature::kSVE2) && vixl::aarch64::CPU::ReadSVEVectorLengthInBits() >= 256; #endif // TODO: AVX2 is currently unsupported. Disable until the remaining features are implemented. SupportsAVX2 = false; @@ -173,21 +168,19 @@ HostFeatures::HostFeatures() { // We need to get the CPU's cache line size // We expect sane targets that have correct cacheline sizes across clusters uint64_t CTR; - __asm volatile ("mrs %[ctr], ctr_el0" - : [ctr] "=r"(CTR)); + __asm volatile("mrs %[ctr], ctr_el0" : [ctr] "=r"(CTR)); DCacheLineSize = 4 << ((CTR >> 16) & 0xF); ICacheLineSize = 4 << (CTR & 0xF); // Test if this CPU supports float exception trapping by attempting to enable // On unsupported these bits are architecturally defined as RAZ/WI - constexpr uint32_t ExceptionEnableTraps = - (1U << 8) | // Invalid Operation float exception trap enable - (1U << 9) | // Divide by zero float exception trap enable - (1U << 10) | // Overflow float exception trap enable - (1U << 11) | // Underflow float exception trap enable - (1U << 12) | // Inexact float exception trap enable - (1U << 15); // Input Denormal float exception trap enable + constexpr uint32_t ExceptionEnableTraps = (1U << 8) | // Invalid Operation float exception trap enable + (1U << 9) | // Divide by zero float exception trap enable + (1U << 10) | // Overflow float exception trap enable + (1U << 11) | // Underflow float exception trap enable + (1U << 12) | // Inexact float exception trap enable + (1U << 15); // Input Denormal float exception trap enable uint32_t OriginalFPCR = GetFPCR(); uint32_t FPCR = OriginalFPCR | ExceptionEnableTraps; @@ -222,7 +215,7 @@ HostFeatures::HostFeatures() { ICacheLineSize = 64U; #if !defined(VIXL_SIMULATOR) - Xbyak::util::Cpu X86Features{}; + Xbyak::util::Cpu X86Features {}; SupportsAES = X86Features.has(Xbyak::util::Cpu::tAESNI); SupportsCRC = X86Features.has(Xbyak::util::Cpu::tSSE42); SupportsRAND = X86Features.has(Xbyak::util::Cpu::tRDRAND) && X86Features.has(Xbyak::util::Cpu::tRDSEED); @@ -256,4 +249,4 @@ HostFeatures::HostFeatures() { SupportsPreserveAllABI = FEXCORE_HAS_PRESERVE_ALL_ATTR; OverrideFeatures(this); } -} +} // namespace FEXCore diff --git a/FEXCore/Source/Interface/Core/Interpreter/Fallbacks/InterpreterFallbacks.cpp b/FEXCore/Source/Interface/Core/Interpreter/Fallbacks/InterpreterFallbacks.cpp index 3c1ab9c31a..2587d8fced 100644 --- a/FEXCore/Source/Interface/Core/Interpreter/Fallbacks/InterpreterFallbacks.cpp +++ b/FEXCore/Source/Interface/Core/Interpreter/Fallbacks/InterpreterFallbacks.cpp @@ -10,23 +10,23 @@ namespace FEXCore::CPU { template -static FallbackInfo GetFallbackInfo(R(*fn)(Args...), FEXCore::Core::FallbackHandlerIndex HandlerIndex) { +static FallbackInfo GetFallbackInfo(R (*fn)(Args...), FEXCore::Core::FallbackHandlerIndex HandlerIndex) { return {FABI_UNKNOWN, (void*)fn, HandlerIndex, false}; } template<> -FallbackInfo GetFallbackInfo(double(*fn)(uint16_t, double), FEXCore::Core::FallbackHandlerIndex HandlerIndex) { +FallbackInfo GetFallbackInfo(double (*fn)(uint16_t, double), FEXCore::Core::FallbackHandlerIndex HandlerIndex) { return {FABI_F64_I16_F64, (void*)fn, HandlerIndex, false}; } template<> -FallbackInfo GetFallbackInfo(double(*fn)(uint16_t, double,double), FEXCore::Core::FallbackHandlerIndex HandlerIndex) { +FallbackInfo GetFallbackInfo(double (*fn)(uint16_t, double, double), FEXCore::Core::FallbackHandlerIndex HandlerIndex) { return {FABI_F64_I16_F64_F64, (void*)fn, HandlerIndex, false}; } -void InterpreterOps::FillFallbackIndexPointers(uint64_t *Info) { - Info[Core::OPINDEX_F80CVTTO_4] = reinterpret_cast(&FEXCore::CPU::OpHandlers::handle4); - Info[Core::OPINDEX_F80CVTTO_8] = reinterpret_cast(&FEXCore::CPU::OpHandlers::handle8); +void InterpreterOps::FillFallbackIndexPointers(uint64_t* Info) { + Info[Core::OPINDEX_F80CVTTO_4] = reinterpret_cast(&FEXCore::CPU::OpHandlers::handle4); + Info[Core::OPINDEX_F80CVTTO_8] = reinterpret_cast(&FEXCore::CPU::OpHandlers::handle8); Info[Core::OPINDEX_F80CVT_4] = reinterpret_cast(&FEXCore::CPU::OpHandlers::handle4); Info[Core::OPINDEX_F80CVT_8] = reinterpret_cast(&FEXCore::CPU::OpHandlers::handle8); Info[Core::OPINDEX_F80CVTINT_2] = reinterpret_cast(&FEXCore::CPU::OpHandlers::handle2); @@ -55,8 +55,8 @@ void InterpreterOps::FillFallbackIndexPointers(uint64_t *Info) { Info[Core::OPINDEX_F80COS] = reinterpret_cast(&FEXCore::CPU::OpHandlers::handle); Info[Core::OPINDEX_F80XTRACT_EXP] = reinterpret_cast(&FEXCore::CPU::OpHandlers::handle); Info[Core::OPINDEX_F80XTRACT_SIG] = reinterpret_cast(&FEXCore::CPU::OpHandlers::handle); - Info[Core::OPINDEX_F80BCDSTORE] = reinterpret_cast(&FEXCore::CPU::OpHandlers::handle); - Info[Core::OPINDEX_F80BCDLOAD] = reinterpret_cast(&FEXCore::CPU::OpHandlers::handle); + Info[Core::OPINDEX_F80BCDSTORE] = reinterpret_cast(&FEXCore::CPU::OpHandlers::handle); + Info[Core::OPINDEX_F80BCDLOAD] = reinterpret_cast(&FEXCore::CPU::OpHandlers::handle); // Binary Info[Core::OPINDEX_F80ADD] = reinterpret_cast(&FEXCore::CPU::OpHandlers::handle); @@ -85,126 +85,123 @@ void InterpreterOps::FillFallbackIndexPointers(uint64_t *Info) { Info[Core::OPINDEX_VPCMPISTRX] = reinterpret_cast(&FEXCore::CPU::OpHandlers::handle); } -bool InterpreterOps::GetFallbackHandler(bool SupportsPreserveAllABI, IR::IROp_Header const *IROp, FallbackInfo *Info) { +bool InterpreterOps::GetFallbackHandler(bool SupportsPreserveAllABI, const IR::IROp_Header* IROp, FallbackInfo* Info) { uint8_t OpSize = IROp->Size; - switch(IROp->Op) { - case IR::OP_F80CVTTO: { - auto Op = IROp->C(); + switch (IROp->Op) { + case IR::OP_F80CVTTO: { + auto Op = IROp->C(); - switch (Op->SrcSize) { - case 4: { - *Info = {FABI_F80_I16_F32, (void*)&FEXCore::CPU::OpHandlers::handle4, Core::OPINDEX_F80CVTTO_4, SupportsPreserveAllABI}; - return true; - } - case 8: { - *Info = {FABI_F80_I16_F64, (void*)&FEXCore::CPU::OpHandlers::handle8, Core::OPINDEX_F80CVTTO_8, SupportsPreserveAllABI}; - return true; - } - default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize); + switch (Op->SrcSize) { + case 4: { + *Info = {FABI_F80_I16_F32, (void*)&FEXCore::CPU::OpHandlers::handle4, Core::OPINDEX_F80CVTTO_4, SupportsPreserveAllABI}; + return true; + } + case 8: { + *Info = {FABI_F80_I16_F64, (void*)&FEXCore::CPU::OpHandlers::handle8, Core::OPINDEX_F80CVTTO_8, SupportsPreserveAllABI}; + return true; + } + default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize); + } + break; + } + case IR::OP_F80CVT: { + switch (OpSize) { + case 4: { + *Info = {FABI_F32_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle4, Core::OPINDEX_F80CVT_4, SupportsPreserveAllABI}; + return true; + } + case 8: { + *Info = {FABI_F64_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle8, Core::OPINDEX_F80CVT_8, SupportsPreserveAllABI}; + return true; + } + default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize); + } + break; + } + case IR::OP_F80CVTINT: { + auto Op = IROp->C(); + + switch (OpSize) { + case 2: { + if (Op->Truncate) { + *Info = {FABI_I16_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle2t, Core::OPINDEX_F80CVTINT_TRUNC2, + SupportsPreserveAllABI}; + } else { + *Info = {FABI_I16_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle2, Core::OPINDEX_F80CVTINT_2, SupportsPreserveAllABI}; } - break; + return true; } - case IR::OP_F80CVT: { - switch (OpSize) { - case 4: { - *Info = {FABI_F32_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle4, Core::OPINDEX_F80CVT_4, SupportsPreserveAllABI}; - return true; - } - case 8: { - *Info = {FABI_F64_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle8, Core::OPINDEX_F80CVT_8, SupportsPreserveAllABI}; - return true; - } - default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize); + case 4: { + if (Op->Truncate) { + *Info = {FABI_I32_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle4t, Core::OPINDEX_F80CVTINT_TRUNC4, + SupportsPreserveAllABI}; + } else { + *Info = {FABI_I32_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle4, Core::OPINDEX_F80CVTINT_4, SupportsPreserveAllABI}; } - break; + return true; } - case IR::OP_F80CVTINT: { - auto Op = IROp->C(); - - switch (OpSize) { - case 2: { - if (Op->Truncate) { - *Info = {FABI_I16_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle2t, Core::OPINDEX_F80CVTINT_TRUNC2, SupportsPreserveAllABI}; - } - else { - *Info = {FABI_I16_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle2, Core::OPINDEX_F80CVTINT_2, SupportsPreserveAllABI}; - } - return true; - } - case 4: { - if (Op->Truncate) { - *Info = {FABI_I32_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle4t, Core::OPINDEX_F80CVTINT_TRUNC4, SupportsPreserveAllABI}; - } - else { - *Info = {FABI_I32_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle4, Core::OPINDEX_F80CVTINT_4, SupportsPreserveAllABI}; - } - return true; - } - case 8: { - if (Op->Truncate) { - *Info = {FABI_I64_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle8t, Core::OPINDEX_F80CVTINT_TRUNC8, SupportsPreserveAllABI}; - } - else { - *Info = {FABI_I64_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle8, Core::OPINDEX_F80CVTINT_8, SupportsPreserveAllABI}; - } - return true; - } - default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize); + case 8: { + if (Op->Truncate) { + *Info = {FABI_I64_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle8t, Core::OPINDEX_F80CVTINT_TRUNC8, + SupportsPreserveAllABI}; + } else { + *Info = {FABI_I64_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle8, Core::OPINDEX_F80CVTINT_8, SupportsPreserveAllABI}; } - break; + return true; } - case IR::OP_F80CMP: { - auto Op = IROp->C(); + default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize); + } + break; + } + case IR::OP_F80CMP: { + auto Op = IROp->C(); - static constexpr std::array handlers{ - &FEXCore::CPU::OpHandlers::handle<0>, - &FEXCore::CPU::OpHandlers::handle<1>, - &FEXCore::CPU::OpHandlers::handle<2>, - &FEXCore::CPU::OpHandlers::handle<3>, - &FEXCore::CPU::OpHandlers::handle<4>, - &FEXCore::CPU::OpHandlers::handle<5>, - &FEXCore::CPU::OpHandlers::handle<6>, - &FEXCore::CPU::OpHandlers::handle<7>, - }; + static constexpr std::array handlers { + &FEXCore::CPU::OpHandlers::handle<0>, &FEXCore::CPU::OpHandlers::handle<1>, + &FEXCore::CPU::OpHandlers::handle<2>, &FEXCore::CPU::OpHandlers::handle<3>, + &FEXCore::CPU::OpHandlers::handle<4>, &FEXCore::CPU::OpHandlers::handle<5>, + &FEXCore::CPU::OpHandlers::handle<6>, &FEXCore::CPU::OpHandlers::handle<7>, + }; - *Info = {FABI_I64_I16_F80_F80, (void*)handlers[Op->Flags], (Core::FallbackHandlerIndex)(Core::OPINDEX_F80CMP_0 + Op->Flags), SupportsPreserveAllABI}; - return true; - } + *Info = {FABI_I64_I16_F80_F80, (void*)handlers[Op->Flags], (Core::FallbackHandlerIndex)(Core::OPINDEX_F80CMP_0 + Op->Flags), + SupportsPreserveAllABI}; + return true; + } - case IR::OP_F80CVTTOINT: { - auto Op = IROp->C(); + case IR::OP_F80CVTTOINT: { + auto Op = IROp->C(); - switch (Op->SrcSize) { - case 2: { - *Info = {FABI_F80_I16_I16, (void*)&FEXCore::CPU::OpHandlers::handle2, Core::OPINDEX_F80CVTTOINT_2, SupportsPreserveAllABI}; - return true; - } - case 4: { - *Info = {FABI_F80_I16_I32, (void*)&FEXCore::CPU::OpHandlers::handle4, Core::OPINDEX_F80CVTTOINT_4, SupportsPreserveAllABI}; - return true; - } - default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize); - } - break; + switch (Op->SrcSize) { + case 2: { + *Info = {FABI_F80_I16_I16, (void*)&FEXCore::CPU::OpHandlers::handle2, Core::OPINDEX_F80CVTTOINT_2, SupportsPreserveAllABI}; + return true; } + case 4: { + *Info = {FABI_F80_I16_I32, (void*)&FEXCore::CPU::OpHandlers::handle4, Core::OPINDEX_F80CVTTOINT_4, SupportsPreserveAllABI}; + return true; + } + default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize); + } + break; + } #define COMMON_UNARY_X87_OP(OP) \ - case IR::OP_F80##OP: { \ - *Info = {FABI_F80_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle, Core::OPINDEX_F80##OP, SupportsPreserveAllABI}; \ - return true; \ - } + case IR::OP_F80##OP: { \ + *Info = {FABI_F80_I16_F80, (void*)&FEXCore::CPU::OpHandlers::handle, Core::OPINDEX_F80##OP, SupportsPreserveAllABI}; \ + return true; \ + } #define COMMON_BINARY_X87_OP(OP) \ - case IR::OP_F80##OP: { \ - *Info = {FABI_F80_I16_F80_F80, (void*)&FEXCore::CPU::OpHandlers::handle, Core::OPINDEX_F80##OP, SupportsPreserveAllABI}; \ - return true; \ - } + case IR::OP_F80##OP: { \ + *Info = {FABI_F80_I16_F80_F80, (void*)&FEXCore::CPU::OpHandlers::handle, Core::OPINDEX_F80##OP, SupportsPreserveAllABI}; \ + return true; \ + } #define COMMON_F64_OP(OP) \ - case IR::OP_F64##OP: { \ - *Info = GetFallbackInfo(&FEXCore::CPU::OpHandlers::handle, Core::OPINDEX_F64##OP); \ - return true; \ - } + case IR::OP_F64##OP: { \ + *Info = GetFallbackInfo(&FEXCore::CPU::OpHandlers::handle, Core::OPINDEX_F64##OP); \ + return true; \ + } // Unary COMMON_UNARY_X87_OP(ROUND) @@ -242,20 +239,20 @@ bool InterpreterOps::GetFallbackHandler(bool SupportsPreserveAllABI, IR::IROp_He COMMON_F64_OP(FPREM) COMMON_F64_OP(SCALE) - // SSE4.2 Fallbacks - case IR::OP_VPCMPESTRX: - *Info = {FABI_I32_I64_I64_I128_I128_I16, (void*)&FEXCore::CPU::OpHandlers::handle, Core::OPINDEX_VPCMPESTRX, SupportsPreserveAllABI}; - return true; - case IR::OP_VPCMPISTRX: - *Info = {FABI_I32_I128_I128_I16, (void*)&FEXCore::CPU::OpHandlers::handle, Core::OPINDEX_VPCMPISTRX, SupportsPreserveAllABI}; - return true; + // SSE4.2 Fallbacks + case IR::OP_VPCMPESTRX: + *Info = {FABI_I32_I64_I64_I128_I128_I16, (void*)&FEXCore::CPU::OpHandlers::handle, Core::OPINDEX_VPCMPESTRX, + SupportsPreserveAllABI}; + return true; + case IR::OP_VPCMPISTRX: + *Info = {FABI_I32_I128_I128_I16, (void*)&FEXCore::CPU::OpHandlers::handle, Core::OPINDEX_VPCMPISTRX, SupportsPreserveAllABI}; + return true; - default: - break; + default: break; } return false; } -} +} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp index 3ba0bbf052..e930e02872 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp @@ -16,21 +16,19 @@ namespace FEXCore::CPU { #define GRD(Node) (IROp->Size <= 4 ? GetDst(Node) : GetDst(Node)) #define GRS(Node) (IROp->Size <= 4 ? GetReg(Node) : GetReg(Node)) -#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const *IROp, IR::NodeID Node) +#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const* IROp, IR::NodeID Node) DEF_OP(TruncElementPair) { auto Op = IROp->C(); switch (IROp->Size) { - case 4: { - auto Dst = GetRegPair(Node); - auto Src = GetRegPair(Op->Pair.ID()); - mov(ARMEmitter::Size::i32Bit, Dst.first, Src.first); - mov(ARMEmitter::Size::i32Bit, Dst.second, Src.second); - break; - } - default: - LOGMAN_MSG_A_FMT("Unhandled Truncation size: {}", IROp->Size); - break; + case 4: { + auto Dst = GetRegPair(Node); + auto Src = GetRegPair(Op->Pair.ID()); + mov(ARMEmitter::Size::i32Bit, Dst.first, Src.first); + mov(ARMEmitter::Size::i32Bit, Dst.second, Src.second); + break; + } + default: LOGMAN_MSG_A_FMT("Unhandled Truncation size: {}", IROp->Size); break; } } @@ -55,11 +53,11 @@ DEF_OP(EntrypointOffset) { } DEF_OP(InlineConstant) { - //nop + // nop } DEF_OP(InlineEntrypointOffset) { - //nop + // nop } DEF_OP(CycleCounter) { @@ -253,7 +251,7 @@ DEF_OP(AXFlag) { ARMEmitter::Condition MapSelectCC(IR::CondClassType Cond) { switch (Cond.Val) { - case FEXCore::IR::COND_EQ: return ARMEmitter::Condition::CC_EQ; + case FEXCore::IR::COND_EQ: return ARMEmitter::Condition::CC_EQ; case FEXCore::IR::COND_NEQ: return ARMEmitter::Condition::CC_NE; case FEXCore::IR::COND_SGE: return ARMEmitter::Condition::CC_GE; case FEXCore::IR::COND_SLT: return ARMEmitter::Condition::CC_LT; @@ -265,17 +263,15 @@ ARMEmitter::Condition MapSelectCC(IR::CondClassType Cond) { case FEXCore::IR::COND_ULE: return ARMEmitter::Condition::CC_LS; case FEXCore::IR::COND_FLU: return ARMEmitter::Condition::CC_LT; case FEXCore::IR::COND_FGE: return ARMEmitter::Condition::CC_GE; - case FEXCore::IR::COND_FLEU:return ARMEmitter::Condition::CC_LE; + case FEXCore::IR::COND_FLEU: return ARMEmitter::Condition::CC_LE; case FEXCore::IR::COND_FGT: return ARMEmitter::Condition::CC_GT; - case FEXCore::IR::COND_FU: return ARMEmitter::Condition::CC_VS; + case FEXCore::IR::COND_FU: return ARMEmitter::Condition::CC_VS; case FEXCore::IR::COND_FNU: return ARMEmitter::Condition::CC_VC; case FEXCore::IR::COND_VS: case FEXCore::IR::COND_VC: case FEXCore::IR::COND_MI: return ARMEmitter::Condition::CC_MI; case FEXCore::IR::COND_PL: return ARMEmitter::Condition::CC_PL; - default: - LOGMAN_MSG_A_FMT("Unsupported compare type"); - return ARMEmitter::Condition::CC_NV; + default: LOGMAN_MSG_A_FMT("Unsupported compare type"); return ARMEmitter::Condition::CC_NV; } } @@ -288,8 +284,7 @@ DEF_OP(CondAddNZCV) { ARMEmitter::StatusFlags Flags = (ARMEmitter::StatusFlags)Op->FalseNZCV; uint64_t Const = 0; - auto Src1 = IsInlineConstant(Op->Src1, &Const) ? ARMEmitter::Reg::zr : - GetReg(Op->Src1.ID()); + auto Src1 = IsInlineConstant(Op->Src1, &Const) ? ARMEmitter::Reg::zr : GetReg(Op->Src1.ID()); LOGMAN_THROW_A_FMT(Const == 0, "Unsupported inline constant"); if (IsInlineConstant(Op->Src2, &Const)) { @@ -306,10 +301,11 @@ DEF_OP(Neg) { LOGMAN_THROW_AA_FMT(OpSize == 4 || OpSize == 8, "Unsupported {} size: {}", __func__, OpSize); const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; - if (Op->Cond == FEXCore::IR::COND_AL) + if (Op->Cond == FEXCore::IR::COND_AL) { neg(EmitSize, GetReg(Node), GetReg(Op->Src.ID())); - else + } else { cneg(EmitSize, GetReg(Node), GetReg(Op->Src.ID()), MapSelectCC(Op->Cond)); + } } DEF_OP(Mul) { @@ -361,8 +357,7 @@ DEF_OP(Div) { Src1 = TMP1; Src2 = TMP2; - } - else if (OpSize == 2) { + } else if (OpSize == 2) { sxth(EmitSize, TMP1, Src1); sxth(EmitSize, TMP2, Src2); @@ -392,8 +387,7 @@ DEF_OP(UDiv) { Src1 = TMP1; Src2 = TMP2; - } - else if (OpSize == 2) { + } else if (OpSize == 2) { uxth(EmitSize, TMP1, Src1); uxth(EmitSize, TMP2, Src2); @@ -422,8 +416,7 @@ DEF_OP(Rem) { Src1 = TMP1; Src2 = TMP2; - } - else if (OpSize == 2) { + } else if (OpSize == 2) { sxth(EmitSize, TMP1, Src1); sxth(EmitSize, TMP2, Src2); @@ -452,8 +445,7 @@ DEF_OP(URem) { Src1 = TMP1; Src2 = TMP2; - } - else if (OpSize == 2) { + } else if (OpSize == 2) { uxth(EmitSize, TMP1, Src1); uxth(EmitSize, TMP2, Src2); @@ -480,8 +472,7 @@ DEF_OP(MulH) { sxtw(TMP2, Src2.W()); mul(ARMEmitter::Size::i32Bit, Dst, TMP1, TMP2); ubfx(ARMEmitter::Size::i32Bit, Dst, Dst, 32, 32); - } - else { + } else { smulh(Dst.X(), Src1.X(), Src2.X()); } } @@ -501,8 +492,7 @@ DEF_OP(UMulH) { uxtw(ARMEmitter::Size::i64Bit, TMP2, Src2); mul(ARMEmitter::Size::i64Bit, Dst, TMP1, TMP2); ubfx(ARMEmitter::Size::i64Bit, Dst, Dst, 32, 32); - } - else { + } else { umulh(Dst.X(), Src1.X(), Src2.X()); } } @@ -713,8 +703,7 @@ DEF_OP(Ashr) { if (IsInlineConstant(Op->Src2, &Const)) { if (OpSize >= 4) { asr(EmitSize, Dst, Src1, (unsigned int)Const); - } - else { + } else { sbfx(EmitSize, TMP1, Src1, 0, OpSize * 8); asr(EmitSize, Dst, TMP1, (unsigned int)Const); ubfx(EmitSize, Dst, Dst, 0, OpSize * 8); @@ -723,8 +712,7 @@ DEF_OP(Ashr) { const auto Src2 = GetReg(Op->Src2.ID()); if (OpSize >= 4) { asrv(EmitSize, Dst, Src1, Src2); - } - else { + } else { sbfx(EmitSize, TMP1, Src1, 0, OpSize * 8); asrv(EmitSize, Dst, TMP1, Src2); ubfx(EmitSize, Dst, Dst, 0, OpSize * 8); @@ -791,11 +779,11 @@ DEF_OP(PDep) { // So we have shadow as temporaries const auto Input = TMP1.R(); - const auto Mask = TMP2.R(); + const auto Mask = TMP2.R(); // these get used variously as scratch - const auto T0 = TMP3.R(); - const auto T1 = TMP4.R(); + const auto T0 = TMP3.R(); + const auto T1 = TMP4.R(); ARMEmitter::BackwardLabel NextBit; ARMEmitter::SingleUseForwardLabel Done; @@ -841,8 +829,8 @@ DEF_OP(PExt) { const auto Mask = GetReg(Op->Mask.ID()); const auto Dest = GetReg(Node); - const auto MaskReg = TMP1; - const auto BitReg = TMP2; + const auto MaskReg = TMP1; + const auto BitReg = TMP2; const auto ValueReg = TMP3; ARMEmitter::SingleUseForwardLabel EarlyExit; @@ -885,64 +873,60 @@ DEF_OP(LDiv) { // Each source is OpSize in size // So you can have up to a 128bit divide from x86-64 switch (OpSize) { - case 2: { - uxth(EmitSize, TMP1, Lower); - bfi(EmitSize, TMP1, Upper, 16, 16); - sxth(EmitSize, TMP2, Divisor); - sdiv(EmitSize, Dst, TMP1, TMP2); + case 2: { + uxth(EmitSize, TMP1, Lower); + bfi(EmitSize, TMP1, Upper, 16, 16); + sxth(EmitSize, TMP2, Divisor); + sdiv(EmitSize, Dst, TMP1, TMP2); break; - } - case 4: { - // TODO: 32-bit operation should be guaranteed not to leave garbage in the upper bits. - mov(EmitSize, TMP1, Lower); - bfi(EmitSize, TMP1, Upper, 32, 32); - sxtw(TMP2, Divisor.W()); - sdiv(EmitSize, Dst, TMP1, TMP2); + } + case 4: { + // TODO: 32-bit operation should be guaranteed not to leave garbage in the upper bits. + mov(EmitSize, TMP1, Lower); + bfi(EmitSize, TMP1, Upper, 32, 32); + sxtw(TMP2, Divisor.W()); + sdiv(EmitSize, Dst, TMP1, TMP2); break; - } - case 8: { - ARMEmitter::SingleUseForwardLabel Only64Bit{}; - ARMEmitter::SingleUseForwardLabel LongDIVRet{}; + } + case 8: { + ARMEmitter::SingleUseForwardLabel Only64Bit {}; + ARMEmitter::SingleUseForwardLabel LongDIVRet {}; - // Check if the upper bits match the top bit of the lower 64-bits - // Sign extend the top bit of lower bits - sbfx(EmitSize, TMP1, Lower, 63, 1); - eor(EmitSize, TMP1, TMP1, Upper); + // Check if the upper bits match the top bit of the lower 64-bits + // Sign extend the top bit of lower bits + sbfx(EmitSize, TMP1, Lower, 63, 1); + eor(EmitSize, TMP1, TMP1, Upper); - // If the sign bit matches then the result is zero - cbz(EmitSize, TMP1, &Only64Bit); + // If the sign bit matches then the result is zero + cbz(EmitSize, TMP1, &Only64Bit); - // Long divide - { - mov(EmitSize, TMP1, Upper); - mov(EmitSize, TMP2, Lower); - mov(EmitSize, TMP3, Divisor); + // Long divide + { + mov(EmitSize, TMP1, Upper); + mov(EmitSize, TMP2, Lower); + mov(EmitSize, TMP3, Divisor); - ldr(TMP4, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.AArch64.LDIVHandler)); + ldr(TMP4, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.AArch64.LDIVHandler)); - str(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16); - blr(TMP4); - ldr(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16); + str(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16); + blr(TMP4); + ldr(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16); - // Move result to its destination register - mov(EmitSize, Dst, TMP1); + // Move result to its destination register + mov(EmitSize, Dst, TMP1); - // Skip 64-bit path - b(&LongDIVRet); - } + // Skip 64-bit path + b(&LongDIVRet); + } - Bind(&Only64Bit); - // 64-Bit only - { - sdiv(EmitSize, Dst, Lower, Divisor); - } + Bind(&Only64Bit); + // 64-Bit only + { sdiv(EmitSize, Dst, Lower, Divisor); } - Bind(&LongDIVRet); + Bind(&LongDIVRet); break; - } - default: - LOGMAN_MSG_A_FMT("Unknown LDIV Size: {}", OpSize); - break; + } + default: LOGMAN_MSG_A_FMT("Unknown LDIV Size: {}", OpSize); break; } } @@ -959,58 +943,54 @@ DEF_OP(LUDiv) { // Each source is OpSize in size // So you can have up to a 128bit divide from x86-64= switch (OpSize) { - case 2: { - uxth(EmitSize, TMP1, Lower); - bfi(EmitSize, TMP1, Upper, 16, 16); - udiv(EmitSize, Dst, TMP1, Divisor); + case 2: { + uxth(EmitSize, TMP1, Lower); + bfi(EmitSize, TMP1, Upper, 16, 16); + udiv(EmitSize, Dst, TMP1, Divisor); break; - } - case 4: { - // TODO: 32-bit operation should be guaranteed not to leave garbage in the upper bits. - mov(EmitSize, TMP1, Lower); - bfi(EmitSize, TMP1, Upper, 32, 32); - udiv(EmitSize, Dst, TMP1, Divisor); + } + case 4: { + // TODO: 32-bit operation should be guaranteed not to leave garbage in the upper bits. + mov(EmitSize, TMP1, Lower); + bfi(EmitSize, TMP1, Upper, 32, 32); + udiv(EmitSize, Dst, TMP1, Divisor); break; - } - case 8: { - ARMEmitter::SingleUseForwardLabel Only64Bit{}; - ARMEmitter::SingleUseForwardLabel LongDIVRet{}; + } + case 8: { + ARMEmitter::SingleUseForwardLabel Only64Bit {}; + ARMEmitter::SingleUseForwardLabel LongDIVRet {}; - // Check the upper bits for zero - // If the upper bits are zero then we can do a 64-bit divide - cbz(EmitSize, Upper, &Only64Bit); + // Check the upper bits for zero + // If the upper bits are zero then we can do a 64-bit divide + cbz(EmitSize, Upper, &Only64Bit); - // Long divide - { - mov(EmitSize, TMP1, Upper); - mov(EmitSize, TMP2, Lower); - mov(EmitSize, TMP3, Divisor); + // Long divide + { + mov(EmitSize, TMP1, Upper); + mov(EmitSize, TMP2, Lower); + mov(EmitSize, TMP3, Divisor); - ldr(TMP4, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.AArch64.LUDIVHandler)); + ldr(TMP4, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.AArch64.LUDIVHandler)); - str(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16); - blr(TMP4); - ldr(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16); + str(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16); + blr(TMP4); + ldr(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16); - // Move result to its destination register - mov(EmitSize, Dst, TMP1); + // Move result to its destination register + mov(EmitSize, Dst, TMP1); - // Skip 64-bit path - b(&LongDIVRet); - } + // Skip 64-bit path + b(&LongDIVRet); + } - Bind(&Only64Bit); - // 64-Bit only - { - udiv(EmitSize, Dst, Lower, Divisor); - } + Bind(&Only64Bit); + // 64-Bit only + { udiv(EmitSize, Dst, Lower, Divisor); } - Bind(&LongDIVRet); + Bind(&LongDIVRet); break; - } - default: - LOGMAN_MSG_A_FMT("Unknown LUDIV Size: {}", OpSize); - break; + } + default: LOGMAN_MSG_A_FMT("Unknown LUDIV Size: {}", OpSize); break; } } @@ -1027,66 +1007,64 @@ DEF_OP(LRem) { // Each source is OpSize in size // So you can have up to a 128bit divide from x86-64 switch (OpSize) { - case 2: { - uxth(EmitSize, TMP1, Lower); - bfi(EmitSize, TMP1, Upper, 16, 16); - sxth(EmitSize, TMP2, Divisor); - sdiv(EmitSize, TMP3, TMP1, TMP2); - msub(EmitSize, Dst, TMP3, TMP2, TMP1); + case 2: { + uxth(EmitSize, TMP1, Lower); + bfi(EmitSize, TMP1, Upper, 16, 16); + sxth(EmitSize, TMP2, Divisor); + sdiv(EmitSize, TMP3, TMP1, TMP2); + msub(EmitSize, Dst, TMP3, TMP2, TMP1); break; - } - case 4: { - // TODO: 32-bit operation should be guaranteed not to leave garbage in the upper bits. - mov(EmitSize, TMP1, Lower); - bfi(EmitSize, TMP1, Upper, 32, 32); - sxtw(TMP3, Divisor.W()); - sdiv(EmitSize, TMP2, TMP1, TMP3); - msub(EmitSize, Dst, TMP2, TMP3, TMP1); + } + case 4: { + // TODO: 32-bit operation should be guaranteed not to leave garbage in the upper bits. + mov(EmitSize, TMP1, Lower); + bfi(EmitSize, TMP1, Upper, 32, 32); + sxtw(TMP3, Divisor.W()); + sdiv(EmitSize, TMP2, TMP1, TMP3); + msub(EmitSize, Dst, TMP2, TMP3, TMP1); break; - } - case 8: { - ARMEmitter::SingleUseForwardLabel Only64Bit{}; - ARMEmitter::SingleUseForwardLabel LongDIVRet{}; + } + case 8: { + ARMEmitter::SingleUseForwardLabel Only64Bit {}; + ARMEmitter::SingleUseForwardLabel LongDIVRet {}; - // Check if the upper bits match the top bit of the lower 64-bits - // Sign extend the top bit of lower bits - sbfx(EmitSize, TMP1, Lower, 63, 1); - eor(EmitSize, TMP1, TMP1, Upper); + // Check if the upper bits match the top bit of the lower 64-bits + // Sign extend the top bit of lower bits + sbfx(EmitSize, TMP1, Lower, 63, 1); + eor(EmitSize, TMP1, TMP1, Upper); - // If the sign bit matches then the result is zero - cbz(EmitSize, TMP1, &Only64Bit); + // If the sign bit matches then the result is zero + cbz(EmitSize, TMP1, &Only64Bit); - // Long divide - { - mov(EmitSize, TMP1, Upper); - mov(EmitSize, TMP2, Lower); - mov(EmitSize, TMP3, Divisor); + // Long divide + { + mov(EmitSize, TMP1, Upper); + mov(EmitSize, TMP2, Lower); + mov(EmitSize, TMP3, Divisor); - ldr(TMP4, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.AArch64.LREMHandler)); + ldr(TMP4, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.AArch64.LREMHandler)); - str(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16); - blr(TMP4); - ldr(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16); + str(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16); + blr(TMP4); + ldr(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16); - // Move result to its destination register - mov(EmitSize, Dst, TMP1); + // Move result to its destination register + mov(EmitSize, Dst, TMP1); - // Skip 64-bit path - b(&LongDIVRet); - } + // Skip 64-bit path + b(&LongDIVRet); + } - Bind(&Only64Bit); - // 64-Bit only - { - sdiv(EmitSize, TMP1, Lower, Divisor); - msub(EmitSize, Dst, TMP1, Divisor, Lower); - } - Bind(&LongDIVRet); - break; + Bind(&Only64Bit); + // 64-Bit only + { + sdiv(EmitSize, TMP1, Lower, Divisor); + msub(EmitSize, Dst, TMP1, Divisor, Lower); } - default: - LOGMAN_MSG_A_FMT("Unknown LREM Size: {}", OpSize); - break; + Bind(&LongDIVRet); + break; + } + default: LOGMAN_MSG_A_FMT("Unknown LREM Size: {}", OpSize); break; } } @@ -1103,61 +1081,59 @@ DEF_OP(LURem) { // Each source is OpSize in size // So you can have up to a 128bit divide from x86-64 switch (OpSize) { - case 2: { - uxth(EmitSize, TMP1, Lower); - bfi(EmitSize, TMP1, Upper, 16, 16); - udiv(EmitSize, TMP2, TMP1, Divisor); - msub(EmitSize, Dst, TMP2, Divisor, TMP1); + case 2: { + uxth(EmitSize, TMP1, Lower); + bfi(EmitSize, TMP1, Upper, 16, 16); + udiv(EmitSize, TMP2, TMP1, Divisor); + msub(EmitSize, Dst, TMP2, Divisor, TMP1); break; - } - case 4: { - // TODO: 32-bit operation should be guaranteed not to leave garbage in the upper bits. - mov(EmitSize, TMP1, Lower); - bfi(EmitSize, TMP1, Upper, 32, 32); - udiv(EmitSize, TMP2, TMP1, Divisor); - msub(EmitSize, Dst, TMP2, Divisor, TMP1); + } + case 4: { + // TODO: 32-bit operation should be guaranteed not to leave garbage in the upper bits. + mov(EmitSize, TMP1, Lower); + bfi(EmitSize, TMP1, Upper, 32, 32); + udiv(EmitSize, TMP2, TMP1, Divisor); + msub(EmitSize, Dst, TMP2, Divisor, TMP1); break; - } - case 8: { - ARMEmitter::SingleUseForwardLabel Only64Bit{}; - ARMEmitter::SingleUseForwardLabel LongDIVRet{}; + } + case 8: { + ARMEmitter::SingleUseForwardLabel Only64Bit {}; + ARMEmitter::SingleUseForwardLabel LongDIVRet {}; - // Check the upper bits for zero - // If the upper bits are zero then we can do a 64-bit divide - cbz(EmitSize, Upper, &Only64Bit); + // Check the upper bits for zero + // If the upper bits are zero then we can do a 64-bit divide + cbz(EmitSize, Upper, &Only64Bit); - // Long divide - { - mov(EmitSize, TMP1, Upper); - mov(EmitSize, TMP2, Lower); - mov(EmitSize, TMP3, Divisor); + // Long divide + { + mov(EmitSize, TMP1, Upper); + mov(EmitSize, TMP2, Lower); + mov(EmitSize, TMP3, Divisor); - ldr(TMP4, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.AArch64.LUREMHandler)); + ldr(TMP4, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.AArch64.LUREMHandler)); - str(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16); - blr(TMP4); - ldr(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16); + str(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16); + blr(TMP4); + ldr(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16); - // Move result to its destination register - mov(EmitSize, Dst, TMP1); + // Move result to its destination register + mov(EmitSize, Dst, TMP1); - // Skip 64-bit path - b(&LongDIVRet); - } + // Skip 64-bit path + b(&LongDIVRet); + } - Bind(&Only64Bit); - // 64-Bit only - { - udiv(EmitSize, TMP1, Lower, Divisor); - msub(EmitSize, Dst, TMP1, Divisor, Lower); - } + Bind(&Only64Bit); + // 64-Bit only + { + udiv(EmitSize, TMP1, Lower, Divisor); + msub(EmitSize, Dst, TMP1, Divisor, Lower); + } - Bind(&LongDIVRet); + Bind(&LongDIVRet); break; - } - default: - LOGMAN_MSG_A_FMT("Unknown LUREM Size: {}", OpSize); - break; + } + default: LOGMAN_MSG_A_FMT("Unknown LUREM Size: {}", OpSize); break; } } @@ -1182,30 +1158,30 @@ DEF_OP(Popcount) { const auto Src = GetReg(Op->Src.ID()); switch (OpSize) { - case 0x1: - fmov(ARMEmitter::Size::i32Bit, VTMP1.S(), Src); - // only use lowest byte - cnt(FEXCore::ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D()); - break; - case 0x2: - fmov(ARMEmitter::Size::i32Bit, VTMP1.S(), Src); - cnt(FEXCore::ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D()); - // only count two lowest bytes - addp(FEXCore::ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D(), VTMP1.D()); - break; - case 0x4: - fmov(ARMEmitter::Size::i32Bit, VTMP1.S(), Src); - cnt(FEXCore::ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D()); - // fmov has zero extended, unused bytes are zero - addv(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D()); - break; - case 0x8: - fmov(ARMEmitter::Size::i64Bit, VTMP1.D(), Src); - cnt(FEXCore::ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D()); - // fmov has zero extended, unused bytes are zero - addv(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D()); - break; - default: LOGMAN_MSG_A_FMT("Unsupported Popcount size: {}", OpSize); + case 0x1: + fmov(ARMEmitter::Size::i32Bit, VTMP1.S(), Src); + // only use lowest byte + cnt(FEXCore::ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D()); + break; + case 0x2: + fmov(ARMEmitter::Size::i32Bit, VTMP1.S(), Src); + cnt(FEXCore::ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D()); + // only count two lowest bytes + addp(FEXCore::ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D(), VTMP1.D()); + break; + case 0x4: + fmov(ARMEmitter::Size::i32Bit, VTMP1.S(), Src); + cnt(FEXCore::ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D()); + // fmov has zero extended, unused bytes are zero + addv(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D()); + break; + case 0x8: + fmov(ARMEmitter::Size::i64Bit, VTMP1.D(), Src); + cnt(FEXCore::ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D()); + // fmov has zero extended, unused bytes are zero + addv(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D()); + break; + default: LOGMAN_MSG_A_FMT("Unsupported Popcount size: {}", OpSize); } umov(Dst, VTMP1, 0); @@ -1224,15 +1200,13 @@ DEF_OP(FindLSB) { ubfx(EmitSize, TMP1, Src, 0, OpSize * 8); cmp(EmitSize, TMP1, 0); rbit(EmitSize, TMP1, TMP1); - } - else { + } else { rbit(EmitSize, TMP1, Src); cmp(EmitSize, Src, 0); } clz(EmitSize, Dst, TMP1); csinv(EmitSize, Dst, Dst, ARMEmitter::Reg::zr, ARMEmitter::Condition::CC_NE); - } DEF_OP(FindMSB) { @@ -1251,8 +1225,7 @@ DEF_OP(FindMSB) { lsl(EmitSize, Dst, Src, 16); orr(EmitSize, Dst, Dst, 0x8000); clz(EmitSize, Dst, Dst); - } - else { + } else { clz(EmitSize, Dst, Src); } @@ -1299,8 +1272,7 @@ DEF_OP(CountLeadingZeroes) { lsl(EmitSize, Dst, Src, 16); orr(EmitSize, Dst, Dst, 0x8000); clz(EmitSize, Dst, Dst); - } - else { + } else { clz(EmitSize, Dst, Src); } } @@ -1334,13 +1306,11 @@ DEF_OP(Bfi) { if (Dst == SrcDst) { // If Dst and SrcDst match then this turns in to a simple BFI instruction. bfi(EmitSize, Dst, Src, Op->lsb, Op->Width); - } - else if (Dst != Src) { + } else if (Dst != Src) { // If the destination isn't the source then we can move the DstSrc and insert directly. mov(EmitSize, Dst, SrcDst); bfi(EmitSize, Dst, Src, Op->lsb, Op->Width); - } - else { + } else { // Destination didn't match the dst source register. // TODO: Inefficient until FEX can have RA constraints here. mov(EmitSize, TMP1, SrcDst); @@ -1348,8 +1318,7 @@ DEF_OP(Bfi) { if (OpSize >= 4) { mov(EmitSize, Dst, TMP1.R()); - } - else { + } else { ubfx(EmitSize, Dst, TMP1, 0, OpSize * 8); } } @@ -1368,13 +1337,11 @@ DEF_OP(Bfxil) { if (Dst == SrcDst) { // If Dst and SrcDst match then this turns in to a single instruction. bfxil(EmitSize, Dst, Src, Op->lsb, Op->Width); - } - else if (Dst != Src) { + } else if (Dst != Src) { // If the destination isn't the source then we can move the DstSrc and insert directly. mov(EmitSize, Dst, SrcDst); bfxil(EmitSize, Dst, Src, Op->lsb, Op->Width); - } - else { + } else { // Destination didn't match the dst source register. // TODO: Inefficient until FEX can have RA constraints here. mov(EmitSize, TMP1, SrcDst); @@ -1388,8 +1355,7 @@ DEF_OP(Bfe) { LOGMAN_THROW_AA_FMT(IROp->Size <= 8, "OpSize is too large for BFE: {}", IROp->Size); LOGMAN_THROW_AA_FMT(Op->Width != 0, "Invalid BFE width of 0"); const uint8_t OpSize = IROp->Size; - const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit - : ARMEmitter::Size::i32Bit; + const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto Dst = GetReg(Node); const auto Src = GetReg(Op->Src.ID()); @@ -1427,20 +1393,18 @@ DEF_OP(Select) { if (IsGPR(Op->Cmp1.ID())) { const auto Src1 = GetReg(Op->Cmp1.ID()); - if (IsInlineConstant(Op->Cmp2, &Const)) + if (IsInlineConstant(Op->Cmp2, &Const)) { cmp(CompareEmitSize, Src1, Const); - else { + } else { const auto Src2 = GetReg(Op->Cmp2.ID()); cmp(CompareEmitSize, Src1, Src2); } - } - else if (IsGPRPair(Op->Cmp1.ID())) { + } else if (IsGPRPair(Op->Cmp1.ID())) { const auto Src1 = GetRegPair(Op->Cmp1.ID()); const auto Src2 = GetRegPair(Op->Cmp2.ID()); cmp(EmitSize, Src1.first, Src2.first); ccmp(EmitSize, Src1.second, Src2.second, ARMEmitter::StatusFlags::None, cc); - } - else if (IsFPR(Op->Cmp1.ID())) { + } else if (IsFPR(Op->Cmp1.ID())) { const auto Src1 = GetVReg(Op->Cmp1.ID()); const auto Src2 = GetVReg(Op->Cmp2.ID()); fcmp(Op->CompareSize == 8 ? ARMEmitter::ScalarRegSize::i64Bit : ARMEmitter::ScalarRegSize::i32Bit, Src1, Src2); @@ -1461,10 +1425,11 @@ DEF_OP(Select) { LOGMAN_MSG_A_FMT("Select: Unsupported compare inline parameters"); } - if (const_true == all_ones) + if (const_true == all_ones) { csetm(EmitSize, Dst, cc); - else + } else { cset(EmitSize, Dst, cc); + } } else { csel(EmitSize, Dst, GetReg(Op->TrueVal.ID()), GetReg(Op->FalseVal.ID()), cc); } @@ -1490,10 +1455,11 @@ DEF_OP(NZCVSelect) { LOGMAN_MSG_A_FMT("NZCVSelect: Unsupported constant"); } - if (const_true == all_ones) + if (const_true == all_ones) { csetm(EmitSize, Dst, cc); - else + } else { cset(EmitSize, Dst, cc); + } } else if (is_const_false) { LOGMAN_THROW_A_FMT(const_false == 0, "NZCVSelect: unsupported constant"); csel(EmitSize, Dst, GetReg(Op->TrueVal.ID()), ARMEmitter::Reg::zr, cc); @@ -1518,21 +1484,11 @@ DEF_OP(VExtractToGPR) { const auto PerformMove = [&](const ARMEmitter::VRegister reg, int index) { switch (OpSize) { - case 1: - umov(Dst, Vector, index); - break; - case 2: - umov(Dst, Vector, index); - break; - case 4: - umov(Dst, Vector, index); - break; - case 8: - umov(Dst, Vector, index); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled ExtractElementSize: {}", OpSize); - break; + case 1: umov(Dst, Vector, index); break; + case 2: umov(Dst, Vector, index); break; + case 4: umov(Dst, Vector, index); break; + case 8: umov(Dst, Vector, index); break; + default: LOGMAN_MSG_A_FMT("Unhandled ExtractElementSize: {}", OpSize); break; } }; @@ -1542,13 +1498,9 @@ DEF_OP(VExtractToGPR) { // when acting on larger register sizes. PerformMove(Vector, Op->Index); } else { - LOGMAN_THROW_AA_FMT(HostSupportsSVE256, - "Host doesn't support SVE. Cannot perform 256-bit operation."); - LOGMAN_THROW_AA_FMT(Is256Bit, - "Can't perform 256-bit extraction with op side: {}", OpSize); - LOGMAN_THROW_AA_FMT(Offset < AVXRegBitSize, - "Trying to extract element outside bounds of register. Offset={}, Index={}", - Offset, Op->Index); + LOGMAN_THROW_AA_FMT(HostSupportsSVE256, "Host doesn't support SVE. Cannot perform 256-bit operation."); + LOGMAN_THROW_AA_FMT(Is256Bit, "Can't perform 256-bit extraction with op side: {}", OpSize); + LOGMAN_THROW_AA_FMT(Offset < AVXRegBitSize, "Trying to extract element outside bounds of register. Offset={}, Index={}", Offset, Op->Index); // We need to use the upper 128-bit lane, so lets move it down. // Inverting our dedicated predicate for 128-bit operations selects @@ -1561,17 +1513,11 @@ DEF_OP(VExtractToGPR) { // upper half of the vector. const auto SanitizedIndex = [OpSize, Op] { switch (OpSize) { - case 1: - return Op->Index - 16; - case 2: - return Op->Index - 8; - case 4: - return Op->Index - 4; - case 8: - return Op->Index - 2; - default: - LOGMAN_MSG_A_FMT("Unhandled OpSize: {}", OpSize); - return 0; + case 1: return Op->Index - 16; + case 2: return Op->Index - 8; + case 4: return Op->Index - 4; + case 8: return Op->Index - 2; + default: LOGMAN_MSG_A_FMT("Unhandled OpSize: {}", OpSize); return 0; } }(); @@ -1589,8 +1535,7 @@ DEF_OP(Float_ToGPR_ZS) { if (Op->SrcElementSize == 8) { fcvtzs(DestSize, Dst, Src.D()); - } - else { + } else { fcvtzs(DestSize, Dst, Src.S()); } } @@ -1605,8 +1550,7 @@ DEF_OP(Float_ToGPR_S) { if (Op->SrcElementSize == 8) { frinti(VTMP1.D(), Src.D()); fcvtzs(DestSize, Dst, VTMP1.D()); - } - else { + } else { frinti(VTMP1.S(), Src.S()); fcvtzs(DestSize, Dst, VTMP1.S()); } @@ -1624,4 +1568,4 @@ DEF_OP(FCmp) { #undef DEF_OP -} +} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/Arm64Relocations.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/Arm64Relocations.cpp index f8c225f915..67f7fc4dc5 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/Arm64Relocations.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/Arm64Relocations.cpp @@ -13,21 +13,19 @@ namespace FEXCore::CPU { uint64_t Arm64JITCore::GetNamedSymbolLiteral(FEXCore::CPU::RelocNamedSymbolLiteral::NamedSymbol Op) { switch (Op) { - case FEXCore::CPU::RelocNamedSymbolLiteral::NamedSymbol::SYMBOL_LITERAL_EXITFUNCTION_LINKER: - return ThreadState->CurrentFrame->Pointers.Common.ExitFunctionLinker; - break; - default: - ERROR_AND_DIE_FMT("Unknown named symbol literal: {}", static_cast(Op)); + case FEXCore::CPU::RelocNamedSymbolLiteral::NamedSymbol::SYMBOL_LITERAL_EXITFUNCTION_LINKER: + return ThreadState->CurrentFrame->Pointers.Common.ExitFunctionLinker; break; + default: ERROR_AND_DIE_FMT("Unknown named symbol literal: {}", static_cast(Op)); break; } return ~0ULL; } -void Arm64JITCore::InsertNamedThunkRelocation(ARMEmitter::Register Reg, const IR::SHA256Sum &Sum) { - Relocation MoveABI{}; +void Arm64JITCore::InsertNamedThunkRelocation(ARMEmitter::Register Reg, const IR::SHA256Sum& Sum) { + Relocation MoveABI {}; MoveABI.NamedThunkMove.Header.Type = FEXCore::CPU::RelocationTypes::RELOC_NAMED_THUNK_MOVE; // Offset is the offset from the entrypoint of the block - auto CurrentCursor = GetCursorAddress(); + auto CurrentCursor = GetCursorAddress(); MoveABI.NamedThunkMove.Offset = CurrentCursor - CodeData.BlockBegin; MoveABI.NamedThunkMove.Symbol = Sum; MoveABI.NamedThunkMove.RegisterIndex = Reg.Idx(); @@ -43,22 +41,25 @@ Arm64JITCore::NamedSymbolLiteralPair Arm64JITCore::InsertNamedSymbolLiteral(FEXC Arm64JITCore::NamedSymbolLiteralPair Lit { .Lit = Pointer, - .MoveABI = { - .NamedSymbolLiteral = { - .Header = { - .Type = FEXCore::CPU::RelocationTypes::RELOC_NAMED_SYMBOL_LITERAL, - }, - .Symbol = Op, - .Offset = 0, + .MoveABI = + { + .NamedSymbolLiteral = + { + .Header = + { + .Type = FEXCore::CPU::RelocationTypes::RELOC_NAMED_SYMBOL_LITERAL, + }, + .Symbol = Op, + .Offset = 0, + }, }, - }, }; return Lit; } -void Arm64JITCore::PlaceNamedSymbolLiteral(NamedSymbolLiteralPair &Lit) { +void Arm64JITCore::PlaceNamedSymbolLiteral(NamedSymbolLiteralPair& Lit) { // Offset is the offset from the entrypoint of the block - auto CurrentCursor = GetCursorAddress(); + auto CurrentCursor = GetCursorAddress(); Lit.MoveABI.NamedSymbolLiteral.Offset = CurrentCursor - CodeData.BlockBegin; Bind(&Lit.Loc); @@ -67,10 +68,10 @@ void Arm64JITCore::PlaceNamedSymbolLiteral(NamedSymbolLiteralPair &Lit) { } void Arm64JITCore::InsertGuestRIPMove(ARMEmitter::Register Reg, uint64_t Constant) { - Relocation MoveABI{}; + Relocation MoveABI {}; MoveABI.GuestRIPMove.Header.Type = FEXCore::CPU::RelocationTypes::RELOC_GUEST_RIP_MOVE; // Offset is the offset from the entrypoint of the block - auto CurrentCursor = GetCursorAddress(); + auto CurrentCursor = GetCursorAddress(); MoveABI.GuestRIPMove.Offset = CurrentCursor - CodeData.BlockBegin; MoveABI.GuestRIPMove.GuestRIP = Constant; MoveABI.GuestRIPMove.RegisterIndex = Reg.Idx(); @@ -79,54 +80,54 @@ void Arm64JITCore::InsertGuestRIPMove(ARMEmitter::Register Reg, uint64_t Constan Relocations.emplace_back(MoveABI); } -bool Arm64JITCore::ApplyRelocations(uint64_t GuestEntry, uint64_t CodeEntry, uint64_t CursorEntry, size_t NumRelocations, const char* EntryRelocations) { - size_t DataIndex{}; +bool Arm64JITCore::ApplyRelocations(uint64_t GuestEntry, uint64_t CodeEntry, uint64_t CursorEntry, size_t NumRelocations, + const char* EntryRelocations) { + size_t DataIndex {}; for (size_t j = 0; j < NumRelocations; ++j) { - const FEXCore::CPU::Relocation *Reloc = reinterpret_cast(&EntryRelocations[DataIndex]); + const FEXCore::CPU::Relocation* Reloc = reinterpret_cast(&EntryRelocations[DataIndex]); LOGMAN_THROW_AA_FMT((DataIndex % alignof(Relocation)) == 0, "Alignment of relocation wasn't adhered to"); switch (Reloc->Header.Type) { - case FEXCore::CPU::RelocationTypes::RELOC_NAMED_SYMBOL_LITERAL: { - uint64_t Pointer = GetNamedSymbolLiteral(Reloc->NamedSymbolLiteral.Symbol); - // Relocation occurs at the cursorEntry + offset relative to that cursor - SetCursorOffset(CursorEntry + Reloc->NamedSymbolLiteral.Offset); + case FEXCore::CPU::RelocationTypes::RELOC_NAMED_SYMBOL_LITERAL: { + uint64_t Pointer = GetNamedSymbolLiteral(Reloc->NamedSymbolLiteral.Symbol); + // Relocation occurs at the cursorEntry + offset relative to that cursor + SetCursorOffset(CursorEntry + Reloc->NamedSymbolLiteral.Offset); - // Generate a literal so we can place it - dc64(Pointer); + // Generate a literal so we can place it + dc64(Pointer); - DataIndex += sizeof(Reloc->NamedSymbolLiteral); - break; - } - case FEXCore::CPU::RelocationTypes::RELOC_NAMED_THUNK_MOVE: { - uint64_t Pointer = reinterpret_cast(EmitterCTX->ThunkHandler->LookupThunk(Reloc->NamedThunkMove.Symbol)); - if (Pointer == ~0ULL) { - return false; - } - - // Relocation occurs at the cursorEntry + offset relative to that cursor. - SetCursorOffset(CursorEntry + Reloc->NamedThunkMove.Offset); - LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Register(Reloc->NamedThunkMove.RegisterIndex), Pointer, true); - DataIndex += sizeof(Reloc->NamedThunkMove); - break; + DataIndex += sizeof(Reloc->NamedSymbolLiteral); + break; + } + case FEXCore::CPU::RelocationTypes::RELOC_NAMED_THUNK_MOVE: { + uint64_t Pointer = reinterpret_cast(EmitterCTX->ThunkHandler->LookupThunk(Reloc->NamedThunkMove.Symbol)); + if (Pointer == ~0ULL) { + return false; } - case FEXCore::CPU::RelocationTypes::RELOC_GUEST_RIP_MOVE: { - // XXX: Reenable once the JIT Object Cache is upstream - // XXX: Should spin the relocation list, create a list of guest RIP moves, and ask for them all once, reduces lock contention. - uint64_t Pointer = ~0ULL; // EmitterCTX->JITObjectCache->FindRelocatedRIP(Reloc->GuestRIPMove.GuestRIP); - if (Pointer == ~0ULL) { - return false; - } - - // Relocation occurs at the cursorEntry + offset relative to that cursor. - SetCursorOffset(CursorEntry + Reloc->GuestRIPMove.Offset); - LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Register(Reloc->GuestRIPMove.RegisterIndex), Pointer, true); - DataIndex += sizeof(Reloc->GuestRIPMove); - break; + + // Relocation occurs at the cursorEntry + offset relative to that cursor. + SetCursorOffset(CursorEntry + Reloc->NamedThunkMove.Offset); + LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Register(Reloc->NamedThunkMove.RegisterIndex), Pointer, true); + DataIndex += sizeof(Reloc->NamedThunkMove); + break; + } + case FEXCore::CPU::RelocationTypes::RELOC_GUEST_RIP_MOVE: { + // XXX: Reenable once the JIT Object Cache is upstream + // XXX: Should spin the relocation list, create a list of guest RIP moves, and ask for them all once, reduces lock contention. + uint64_t Pointer = ~0ULL; // EmitterCTX->JITObjectCache->FindRelocatedRIP(Reloc->GuestRIPMove.GuestRIP); + if (Pointer == ~0ULL) { + return false; } + + // Relocation occurs at the cursorEntry + offset relative to that cursor. + SetCursorOffset(CursorEntry + Reloc->GuestRIPMove.Offset); + LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Register(Reloc->GuestRIPMove.RegisterIndex), Pointer, true); + DataIndex += sizeof(Reloc->GuestRIPMove); + break; + } } } return true; } -} - +} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp index 39b67f8f56..e963fac466 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp @@ -11,7 +11,7 @@ tags: backend|arm64 #include "Interface/Core/JIT/Arm64/JITClass.h" namespace FEXCore::CPU { -#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const *IROp, IR::NodeID Node) +#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const* IROp, IR::NodeID Node) DEF_OP(CASPair) { auto Op = IROp->C(); LOGMAN_THROW_AA_FMT(IROp->ElementSize == 4 || IROp->ElementSize == 8, "Wrong element size"); @@ -29,8 +29,7 @@ DEF_OP(CASPair) { caspal(EmitSize, TMP3, TMP4, Desired.first, Desired.second, MemSrc); mov(EmitSize, Dst.first, TMP3.R()); mov(EmitSize, Dst.second, TMP4.R()); - } - else { + } else { ARMEmitter::BackwardLabel LoopTop; ARMEmitter::SingleUseForwardLabel LoopNotExpected; ARMEmitter::SingleUseForwardLabel LoopExpected; @@ -47,12 +46,12 @@ DEF_OP(CASPair) { b(&LoopExpected); - Bind(&LoopNotExpected); - mov(EmitSize, Dst.first, TMP2.R()); - mov(EmitSize, Dst.second, TMP3.R()); - // exclusive monitor needs to be cleared here - // Might have hit the case where ldaxr was hit but stlxr wasn't - clrex(); + Bind(&LoopNotExpected); + mov(EmitSize, Dst.first, TMP2.R()); + mov(EmitSize, Dst.second, TMP3.R()); + // exclusive monitor needs to be cleared here + // Might have hit the case where ldaxr was hit but stlxr wasn't + clrex(); Bind(&LoopExpected); } } @@ -71,16 +70,16 @@ DEF_OP(CAS) { const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto SubEmitSize = OpSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; if (CTX->HostFeatures.SupportsAtomics) { mov(EmitSize, TMP2, Expected); casal(SubEmitSize, TMP2, Desired, MemSrc); mov(EmitSize, GetReg(Node), TMP2.R()); - } - else { + } else { ARMEmitter::BackwardLabel LoopTop; ARMEmitter::SingleUseForwardLabel LoopNotExpected; ARMEmitter::SingleUseForwardLabel LoopExpected; @@ -88,11 +87,9 @@ DEF_OP(CAS) { ldaxr(SubEmitSize, TMP2, MemSrc); if (OpSize == 1) { cmp(EmitSize, TMP2, Expected, ARMEmitter::ExtendedType::UXTB, 0); - } - else if (OpSize == 2) { + } else if (OpSize == 2) { cmp(EmitSize, TMP2, Expected, ARMEmitter::ExtendedType::UXTH, 0); - } - else { + } else { cmp(EmitSize, TMP2, Expected); } b(ARMEmitter::Condition::CC_NE, &LoopNotExpected); @@ -101,11 +98,11 @@ DEF_OP(CAS) { mov(EmitSize, GetReg(Node), Expected); b(&LoopExpected); - Bind(&LoopNotExpected); - mov(EmitSize, GetReg(Node), TMP2.R()); - // exclusive monitor needs to be cleared here - // Might have hit the case where ldaxr was hit but stlxr wasn't - clrex(); + Bind(&LoopNotExpected); + mov(EmitSize, GetReg(Node), TMP2.R()); + // exclusive monitor needs to be cleared here + // Might have hit the case where ldaxr was hit but stlxr wasn't + clrex(); Bind(&LoopExpected); } } @@ -120,14 +117,14 @@ DEF_OP(AtomicAdd) { const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto SubEmitSize = OpSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; if (CTX->HostFeatures.SupportsAtomics) { staddl(SubEmitSize, Src, MemSrc); - } - else { + } else { ARMEmitter::BackwardLabel LoopTop; Bind(&LoopTop); ldaxr(SubEmitSize, TMP2, MemSrc); @@ -147,15 +144,15 @@ DEF_OP(AtomicSub) { const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto SubEmitSize = OpSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; if (CTX->HostFeatures.SupportsAtomics) { neg(EmitSize, TMP2, Src); staddl(SubEmitSize, TMP2, MemSrc); - } - else { + } else { ARMEmitter::BackwardLabel LoopTop; Bind(&LoopTop); ldaxr(SubEmitSize, TMP2, MemSrc); @@ -175,15 +172,15 @@ DEF_OP(AtomicAnd) { const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto SubEmitSize = OpSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; if (CTX->HostFeatures.SupportsAtomics) { mvn(EmitSize, TMP2, Src); stclrl(SubEmitSize, TMP2, MemSrc); - } - else { + } else { ARMEmitter::BackwardLabel LoopTop; Bind(&LoopTop); ldaxr(SubEmitSize, TMP2, MemSrc); @@ -203,14 +200,14 @@ DEF_OP(AtomicCLR) { const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto SubEmitSize = OpSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; if (CTX->HostFeatures.SupportsAtomics) { stclrl(SubEmitSize, Src, MemSrc); - } - else { + } else { ARMEmitter::BackwardLabel LoopTop; Bind(&LoopTop); ldaxr(SubEmitSize, TMP2, MemSrc); @@ -230,14 +227,14 @@ DEF_OP(AtomicOr) { const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto SubEmitSize = OpSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; if (CTX->HostFeatures.SupportsAtomics) { stsetl(SubEmitSize, Src, MemSrc); - } - else { + } else { ARMEmitter::BackwardLabel LoopTop; Bind(&LoopTop); ldaxr(SubEmitSize, TMP2, MemSrc); @@ -257,14 +254,14 @@ DEF_OP(AtomicXor) { const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto SubEmitSize = OpSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; if (CTX->HostFeatures.SupportsAtomics) { steorl(SubEmitSize, Src, MemSrc); - } - else { + } else { ARMEmitter::BackwardLabel LoopTop; Bind(&LoopTop); ldaxr(SubEmitSize, TMP2, MemSrc); @@ -283,9 +280,10 @@ DEF_OP(AtomicNeg) { const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto SubEmitSize = OpSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; ARMEmitter::BackwardLabel LoopTop; Bind(&LoopTop); @@ -305,15 +303,15 @@ DEF_OP(AtomicSwap) { const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto SubEmitSize = OpSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; if (CTX->HostFeatures.SupportsAtomics) { mov(EmitSize, TMP2, Src); ldswpal(SubEmitSize, TMP2, GetReg(Node), MemSrc); - } - else { + } else { ARMEmitter::BackwardLabel LoopTop; Bind(&LoopTop); ldaxr(SubEmitSize, TMP2, MemSrc); @@ -333,14 +331,14 @@ DEF_OP(AtomicFetchAdd) { const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto SubEmitSize = OpSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; if (CTX->HostFeatures.SupportsAtomics) { ldaddal(SubEmitSize, Src, GetReg(Node), MemSrc); - } - else { + } else { ARMEmitter::BackwardLabel LoopTop; Bind(&LoopTop); ldaxr(SubEmitSize, TMP2, MemSrc); @@ -361,15 +359,15 @@ DEF_OP(AtomicFetchSub) { const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto SubEmitSize = OpSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; if (CTX->HostFeatures.SupportsAtomics) { neg(EmitSize, TMP2, Src); ldaddal(SubEmitSize, TMP2, GetReg(Node), MemSrc); - } - else { + } else { ARMEmitter::BackwardLabel LoopTop; Bind(&LoopTop); ldaxr(SubEmitSize, TMP2, MemSrc); @@ -390,15 +388,15 @@ DEF_OP(AtomicFetchAnd) { const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto SubEmitSize = OpSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; if (CTX->HostFeatures.SupportsAtomics) { mvn(EmitSize, TMP2, Src); ldclral(SubEmitSize, TMP2, GetReg(Node), MemSrc); - } - else { + } else { ARMEmitter::BackwardLabel LoopTop; Bind(&LoopTop); ldaxr(SubEmitSize, TMP2, MemSrc); @@ -419,14 +417,14 @@ DEF_OP(AtomicFetchCLR) { const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto SubEmitSize = OpSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; if (CTX->HostFeatures.SupportsAtomics) { ldclral(SubEmitSize, Src, GetReg(Node), MemSrc); - } - else { + } else { ARMEmitter::BackwardLabel LoopTop; Bind(&LoopTop); ldaxr(SubEmitSize, TMP2, MemSrc); @@ -447,14 +445,14 @@ DEF_OP(AtomicFetchOr) { const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto SubEmitSize = OpSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; if (CTX->HostFeatures.SupportsAtomics) { ldsetal(SubEmitSize, Src, GetReg(Node), MemSrc); - } - else { + } else { ARMEmitter::BackwardLabel LoopTop; Bind(&LoopTop); ldaxr(SubEmitSize, TMP2, MemSrc); @@ -475,14 +473,14 @@ DEF_OP(AtomicFetchXor) { const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto SubEmitSize = OpSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; if (CTX->HostFeatures.SupportsAtomics) { ldeoral(SubEmitSize, Src, GetReg(Node), MemSrc); - } - else { + } else { ARMEmitter::BackwardLabel LoopTop; Bind(&LoopTop); ldaxr(SubEmitSize, TMP2, MemSrc); @@ -502,9 +500,10 @@ DEF_OP(AtomicFetchNeg) { const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto SubEmitSize = OpSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; ARMEmitter::BackwardLabel LoopTop; Bind(&LoopTop); @@ -528,8 +527,7 @@ DEF_OP(TelemetrySetValue) { if (CTX->HostFeatures.SupportsAtomics) { stsetl(ARMEmitter::SubRegSize::i64Bit, TMP1, TMP2); - } - else { + } else { ARMEmitter::BackwardLabel LoopTop; Bind(&LoopTop); ldaxr(ARMEmitter::SubRegSize::i64Bit, TMP3, TMP2); @@ -541,5 +539,4 @@ DEF_OP(TelemetrySetValue) { } #undef DEF_OP -} - +} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp index c982e84df5..4880e09265 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp @@ -19,7 +19,7 @@ tags: backend|arm64 #include namespace FEXCore::CPU { -#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const *IROp, IR::NodeID Node) +#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const* IROp, IR::NodeID Node) DEF_OP(CallbackReturn) { // spill back to CTX @@ -94,7 +94,7 @@ DEF_OP(Jump) { static ARMEmitter::Condition MapBranchCC(IR::CondClassType Cond) { switch (Cond.Val) { - case FEXCore::IR::COND_EQ: return ARMEmitter::Condition::CC_EQ; + case FEXCore::IR::COND_EQ: return ARMEmitter::Condition::CC_EQ; case FEXCore::IR::COND_NEQ: return ARMEmitter::Condition::CC_NE; case FEXCore::IR::COND_SGE: return ARMEmitter::Condition::CC_GE; case FEXCore::IR::COND_SLT: return ARMEmitter::Condition::CC_LT; @@ -106,17 +106,15 @@ static ARMEmitter::Condition MapBranchCC(IR::CondClassType Cond) { case FEXCore::IR::COND_ULE: return ARMEmitter::Condition::CC_LS; case FEXCore::IR::COND_FLU: return ARMEmitter::Condition::CC_LT; case FEXCore::IR::COND_FGE: return ARMEmitter::Condition::CC_GE; - case FEXCore::IR::COND_FLEU:return ARMEmitter::Condition::CC_LE; + case FEXCore::IR::COND_FLEU: return ARMEmitter::Condition::CC_LE; case FEXCore::IR::COND_FGT: return ARMEmitter::Condition::CC_GT; - case FEXCore::IR::COND_FU: return ARMEmitter::Condition::CC_VS; + case FEXCore::IR::COND_FU: return ARMEmitter::Condition::CC_VS; case FEXCore::IR::COND_FNU: return ARMEmitter::Condition::CC_VC; case FEXCore::IR::COND_VS: case FEXCore::IR::COND_VC: - case FEXCore::IR::COND_MI: return ARMEmitter::Condition::CC_MI; - case FEXCore::IR::COND_PL: return ARMEmitter::Condition::CC_PL; - default: - LOGMAN_MSG_A_FMT("Unsupported compare type"); - return ARMEmitter::Condition::CC_NV; + case FEXCore::IR::COND_MI: return ARMEmitter::Condition::CC_MI; + case FEXCore::IR::COND_PL: return ARMEmitter::Condition::CC_PL; + default: LOGMAN_MSG_A_FMT("Unsupported compare type"); return ARMEmitter::Condition::CC_NV; } } @@ -135,13 +133,12 @@ DEF_OP(CondJump) { LOGMAN_THROW_A_FMT(IsGPR(Op->Cmp1.ID()), "CondJump: Expected GPR"); LOGMAN_THROW_A_FMT(isConst && Const == 0, "CondJump: Expected 0 source"); - LOGMAN_THROW_A_FMT(Op->Cond.Val == FEXCore::IR::COND_EQ || - Op->Cond.Val == FEXCore::IR::COND_NEQ, - "CondJump: Expected simple condition"); + LOGMAN_THROW_A_FMT(Op->Cond.Val == FEXCore::IR::COND_EQ || Op->Cond.Val == FEXCore::IR::COND_NEQ, "CondJump: Expected simple " + "condition"); if (Op->Cond.Val == FEXCore::IR::COND_EQ) { cbz(Size, GetReg(Op->Cmp1.ID()), TrueTargetLabel); - } else { + } else { cbnz(Size, GetReg(Op->Cmp1.ID()), TrueTargetLabel); } @@ -181,7 +178,9 @@ DEF_OP(Syscall) { uint64_t SPOffset = AlignUp(FEXCore::HLE::SyscallArguments::MAX_ARGS * 8, 16); sub(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, SPOffset); for (uint32_t i = 0; i < FEXCore::HLE::SyscallArguments::MAX_ARGS; ++i) { - if (Op->Header.Args[i].IsInvalid()) continue; + if (Op->Header.Args[i].IsInvalid()) { + continue; + } str(GetReg(Op->Header.Args[i].ID()).X(), ARMEmitter::Reg::rsp, i * 8); } @@ -193,8 +192,7 @@ DEF_OP(Syscall) { add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r2, ARMEmitter::Reg::rsp, 0); if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { GenerateIndirectRuntimeCall(ARMEmitter::Reg::r3); - } - else { + } else { blr(ARMEmitter::Reg::r3); } @@ -232,20 +230,19 @@ DEF_OP(InlineSyscall) { // X6: Arg6 - Doesn't exist in x86-64 land. RA INTERSECT // One argument is removed from the SyscallArguments::MAX_ARGS since the first argument was syscall number - const static std::array RegArgs = {{ - ARMEmitter::XReg::x0, ARMEmitter::XReg::x1, ARMEmitter::XReg::x2, ARMEmitter::XReg::x3, ARMEmitter::XReg::x4, ARMEmitter::XReg::x5 - }}; + const static std::array RegArgs = { + {ARMEmitter::XReg::x0, ARMEmitter::XReg::x1, ARMEmitter::XReg::x2, ARMEmitter::XReg::x3, ARMEmitter::XReg::x4, ARMEmitter::XReg::x5}}; - bool Intersects{}; + bool Intersects {}; // We always need to spill x8 since we can't know if it is live at this SSA location uint32_t SpillMask = 1U << 8; - for (uint32_t i = 0; i < FEXCore::HLE::SyscallArguments::MAX_ARGS-1; ++i) { - if (Op->Header.Args[i].IsInvalid()) break; + for (uint32_t i = 0; i < FEXCore::HLE::SyscallArguments::MAX_ARGS - 1; ++i) { + if (Op->Header.Args[i].IsInvalid()) { + break; + } auto Reg = GetReg(Op->Header.Args[i].ID()); - if (Reg == ARMEmitter::Reg::r8 || - Reg == ARMEmitter::Reg::r4 || - Reg == ARMEmitter::Reg::r5) { + if (Reg == ARMEmitter::Reg::r8 || Reg == ARMEmitter::Reg::r4 || Reg == ARMEmitter::Reg::r5) { SpillMask |= (1U << Reg.Idx()); Intersects = true; @@ -269,8 +266,10 @@ DEF_OP(InlineSyscall) { const auto EmitSize = CTX->Config.Is64BitMode() ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit; const auto EmitSubSize = CTX->Config.Is64BitMode() ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i32Bit; if (Intersects) { - for (uint32_t i = 0; i < FEXCore::HLE::SyscallArguments::MAX_ARGS-1; ++i) { - if (Op->Header.Args[i].IsInvalid()) break; + for (uint32_t i = 0; i < FEXCore::HLE::SyscallArguments::MAX_ARGS - 1; ++i) { + if (Op->Header.Args[i].IsInvalid()) { + break; + } auto Reg = GetReg(Op->Header.Args[i].ID()); // In the case of intersection with x4, x5, or x8 then these are currently SRA @@ -278,21 +277,19 @@ DEF_OP(InlineSyscall) { // Just load back from the context. Could be slightly smarter but this is fairly uncommon if (Reg == ARMEmitter::Reg::r8) { ldr(EmitSubSize, RegArgs[i].R(), STATE, offsetof(FEXCore::Core::CpuStateFrame, State.gregs[X86State::REG_RSP])); - } - else if (Reg == ARMEmitter::Reg::r4) { + } else if (Reg == ARMEmitter::Reg::r4) { ldr(EmitSubSize, RegArgs[i].R(), STATE, offsetof(FEXCore::Core::CpuStateFrame, State.gregs[X86State::REG_RAX])); - } - else if (Reg == ARMEmitter::Reg::r5) { + } else if (Reg == ARMEmitter::Reg::r5) { ldr(EmitSubSize, RegArgs[i].R(), STATE, offsetof(FEXCore::Core::CpuStateFrame, State.gregs[X86State::REG_RCX])); - } - else { + } else { mov(EmitSize, RegArgs[i].R(), Reg); } } - } - else { - for (uint32_t i = 0; i < FEXCore::HLE::SyscallArguments::MAX_ARGS-1; ++i) { - if (Op->Header.Args[i].IsInvalid()) break; + } else { + for (uint32_t i = 0; i < FEXCore::HLE::SyscallArguments::MAX_ARGS - 1; ++i) { + if (Op->Header.Args[i].IsInvalid()) { + break; + } mov(EmitSize, RegArgs[i].R(), GetReg(Op->Header.Args[i].ID())); } @@ -333,8 +330,7 @@ DEF_OP(Thunk) { LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r2, (uintptr_t)thunkFn); if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { GenerateIndirectRuntimeCall(ARMEmitter::Reg::r2); - } - else { + } else { blr(ARMEmitter::Reg::r2); } @@ -345,7 +341,7 @@ DEF_OP(Thunk) { DEF_OP(ValidateCode) { auto Op = IROp->C(); - const auto *OldCode = (const uint8_t *)&Op->CodeOriginalLow; + const auto* OldCode = (const uint8_t*)&Op->CodeOriginalLow; int len = Op->CodeLength; int idx = 0; @@ -355,37 +351,33 @@ DEF_OP(ValidateCode) { const auto Dst = GetReg(Node); - while (len >= 8) - { + while (len >= 8) { ldr(ARMEmitter::XReg::x2, TMP1, idx); - LoadConstant(ARMEmitter::Size::i64Bit, TMP4, *(const uint32_t *)(OldCode + idx)); + LoadConstant(ARMEmitter::Size::i64Bit, TMP4, *(const uint32_t*)(OldCode + idx)); cmp(ARMEmitter::Size::i64Bit, TMP3, TMP4); csel(ARMEmitter::Size::i64Bit, Dst, Dst, TMP2, ARMEmitter::Condition::CC_EQ); len -= 8; idx += 8; } - while (len >= 4) - { + while (len >= 4) { ldr(ARMEmitter::WReg::w2, TMP1, idx); - LoadConstant(ARMEmitter::Size::i64Bit, TMP4, *(const uint32_t *)(OldCode + idx)); + LoadConstant(ARMEmitter::Size::i64Bit, TMP4, *(const uint32_t*)(OldCode + idx)); cmp(ARMEmitter::Size::i32Bit, TMP3, TMP4); csel(ARMEmitter::Size::i64Bit, Dst, Dst, TMP2, ARMEmitter::Condition::CC_EQ); len -= 4; idx += 4; } - while (len >= 2) - { + while (len >= 2) { ldrh(TMP3, TMP1, idx); - LoadConstant(ARMEmitter::Size::i64Bit, TMP4, *(const uint16_t *)(OldCode + idx)); + LoadConstant(ARMEmitter::Size::i64Bit, TMP4, *(const uint16_t*)(OldCode + idx)); cmp(ARMEmitter::Size::i32Bit, TMP3, TMP4); csel(ARMEmitter::Size::i64Bit, Dst, Dst, TMP2, ARMEmitter::Condition::CC_EQ); len -= 2; idx += 2; } - while (len >= 1) - { + while (len >= 1) { ldrb(TMP3, TMP1, idx); - LoadConstant(ARMEmitter::Size::i64Bit, TMP4, *(const uint8_t *)(OldCode + idx)); + LoadConstant(ARMEmitter::Size::i64Bit, TMP4, *(const uint8_t*)(OldCode + idx)); cmp(ARMEmitter::Size::i32Bit, TMP3, TMP4); csel(ARMEmitter::Size::i64Bit, Dst, Dst, TMP2, ARMEmitter::Condition::CC_EQ); len -= 1; @@ -407,8 +399,7 @@ DEF_OP(ThreadRemoveCodeEntry) { ldr(ARMEmitter::XReg::x2, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.ThreadRemoveCodeEntryFromJIT)); if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { GenerateIndirectRuntimeCall(ARMEmitter::Reg::r2); - } - else { + } else { blr(ARMEmitter::Reg::r2); } FillStaticRegs(); @@ -439,8 +430,7 @@ DEF_OP(CPUID) { if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { GenerateIndirectRuntimeCall<__uint128_t, void*, uint64_t, uint64_t>(ARMEmitter::Reg::r3); - } - else { + } else { blr(ARMEmitter::Reg::r3); } @@ -456,7 +446,7 @@ DEF_OP(CPUID) { // Results are in x0, x1 // Results want to be in a i64v2 vector auto Dst = GetRegPair(Node); - mov(ARMEmitter::Size::i64Bit, Dst.first, TMP1); + mov(ARMEmitter::Size::i64Bit, Dst.first, TMP1); mov(ARMEmitter::Size::i64Bit, Dst.second, TMP2); } @@ -474,8 +464,7 @@ DEF_OP(XGetBV) { ldr(ARMEmitter::XReg::x2, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.XCRFunction)); if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { GenerateIndirectRuntimeCall(ARMEmitter::Reg::r2); - } - else { + } else { blr(ARMEmitter::Reg::r2); } @@ -490,10 +479,9 @@ DEF_OP(XGetBV) { // Results are in x0 // Results want to be in a i32v2 vector auto Dst = GetRegPair(Node); - mov(ARMEmitter::Size::i32Bit, Dst.first, TMP1); + mov(ARMEmitter::Size::i32Bit, Dst.first, TMP1); lsr(ARMEmitter::Size::i64Bit, Dst.second, TMP1, 32); } #undef DEF_OP -} - +} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp index 8a0f218467..7f77fbef68 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/ConversionOps.cpp @@ -9,7 +9,7 @@ tags: backend|arm64 #include "Interface/Core/JIT/Arm64/JITClass.h" namespace FEXCore::CPU { -#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const *IROp, IR::NodeID Node) +#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const* IROp, IR::NodeID Node) DEF_OP(VInsGPR) { const auto Op = IROp->C(); const auto OpSize = IROp->Size; @@ -20,9 +20,10 @@ DEF_OP(VInsGPR) { LOGMAN_THROW_AA_FMT(ElementSize == 8 || ElementSize == 4 || ElementSize == 2 || ElementSize == 1, "Unexpected {} size", __func__); const auto SubEmitSize = ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; const auto ElementsPer128Bit = 16 / ElementSize; const auto Dst = GetVReg(Node); @@ -94,21 +95,17 @@ DEF_OP(VCastFromGPR) { auto Src = GetReg(Op->Src.ID()); switch (Op->Header.ElementSize) { - case 1: - uxtb(ARMEmitter::Size::i32Bit, TMP1, Src); - fmov(ARMEmitter::Size::i32Bit, Dst.S(), TMP1); - break; - case 2: - uxth(ARMEmitter::Size::i32Bit, TMP1, Src); - fmov(ARMEmitter::Size::i32Bit, Dst.S(), TMP1); - break; - case 4: - fmov(ARMEmitter::Size::i32Bit, Dst.S(), Src); - break; - case 8: - fmov(ARMEmitter::Size::i64Bit, Dst.D(), Src); - break; - default: LOGMAN_MSG_A_FMT("Unknown castGPR element size: {}", Op->Header.ElementSize); + case 1: + uxtb(ARMEmitter::Size::i32Bit, TMP1, Src); + fmov(ARMEmitter::Size::i32Bit, Dst.S(), TMP1); + break; + case 2: + uxth(ARMEmitter::Size::i32Bit, TMP1, Src); + fmov(ARMEmitter::Size::i32Bit, Dst.S(), TMP1); + break; + case 4: fmov(ARMEmitter::Size::i32Bit, Dst.S(), Src); break; + case 8: fmov(ARMEmitter::Size::i64Bit, Dst.D(), Src); break; + default: LOGMAN_MSG_A_FMT("Unknown castGPR element size: {}", Op->Header.ElementSize); } } @@ -122,14 +119,14 @@ DEF_OP(VDupFromGPR) { const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; const auto ElementSize = IROp->ElementSize; - LOGMAN_THROW_AA_FMT(ElementSize == 8 || ElementSize == 4 || ElementSize == 2 || ElementSize == 1, - "Unexpected {} element size: {}", __func__, ElementSize); + LOGMAN_THROW_AA_FMT(ElementSize == 8 || ElementSize == 4 || ElementSize == 2 || ElementSize == 1, "Unexpected {} element size: {}", + __func__, ElementSize); - const auto SubEmitSize = - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubEmitSize = ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { dup(SubEmitSize, Dst.Z(), Src); @@ -148,34 +145,33 @@ DEF_OP(Float_FromGPR_S) { auto Src = GetReg(Op->Src.ID()); switch (Conv) { - case 0x0204: { // Half <- int32_t - scvtf(ARMEmitter::Size::i32Bit, Dst.H(), Src); - break; - } - case 0x0208: { // Half <- int64_t - scvtf(ARMEmitter::Size::i64Bit, Dst.H(), Src); - break; - } - case 0x0404: { // Float <- int32_t - scvtf(ARMEmitter::Size::i32Bit, Dst.S(), Src); - break; - } - case 0x0408: { // Float <- int64_t - scvtf(ARMEmitter::Size::i64Bit, Dst.S(), Src); - break; - } - case 0x0804: { // Double <- int32_t - scvtf(ARMEmitter::Size::i32Bit, Dst.D(), Src); - break; - } - case 0x0808: { // Double <- int64_t - scvtf(ARMEmitter::Size::i64Bit, Dst.D(), Src); - break; - } - default: - LOGMAN_MSG_A_FMT("Unhandled conversion mask: Mask=0x{:04x}, ElementSize={}, SrcElementSize={}", - Conv, ElementSize, Op->SrcElementSize); - break; + case 0x0204: { // Half <- int32_t + scvtf(ARMEmitter::Size::i32Bit, Dst.H(), Src); + break; + } + case 0x0208: { // Half <- int64_t + scvtf(ARMEmitter::Size::i64Bit, Dst.H(), Src); + break; + } + case 0x0404: { // Float <- int32_t + scvtf(ARMEmitter::Size::i32Bit, Dst.S(), Src); + break; + } + case 0x0408: { // Float <- int64_t + scvtf(ARMEmitter::Size::i64Bit, Dst.S(), Src); + break; + } + case 0x0804: { // Double <- int32_t + scvtf(ARMEmitter::Size::i32Bit, Dst.D(), Src); + break; + } + case 0x0808: { // Double <- int64_t + scvtf(ARMEmitter::Size::i64Bit, Dst.D(), Src); + break; + } + default: + LOGMAN_MSG_A_FMT("Unhandled conversion mask: Mask=0x{:04x}, ElementSize={}, SrcElementSize={}", Conv, ElementSize, Op->SrcElementSize); + break; } } @@ -187,31 +183,31 @@ DEF_OP(Float_FToF) { auto Src = GetVReg(Op->Scalar.ID()); switch (Conv) { - case 0x0204: { // Half <- Float - fcvt(Dst.H(), Src.S()); - break; - } - case 0x0208: { // Half <- Double - fcvt(Dst.H(), Src.D()); - break; - } - case 0x0402: { // Float <- Half - fcvt(Dst.S(), Src.H()); - break; - } - case 0x0802: { // Double <- Half - fcvt(Dst.D(), Src.H()); - break; - } - case 0x0804: { // Double <- Float - fcvt(Dst.D(), Src.S()); - break; - } - case 0x0408: { // Float <- Double - fcvt(Dst.S(), Src.D()); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown FCVT sizes: 0x{:x}", Conv); + case 0x0204: { // Half <- Float + fcvt(Dst.H(), Src.S()); + break; + } + case 0x0208: { // Half <- Double + fcvt(Dst.H(), Src.D()); + break; + } + case 0x0402: { // Float <- Half + fcvt(Dst.S(), Src.H()); + break; + } + case 0x0802: { // Double <- Half + fcvt(Dst.D(), Src.H()); + break; + } + case 0x0804: { // Double <- Float + fcvt(Dst.D(), Src.S()); + break; + } + case 0x0408: { // Float <- Double + fcvt(Dst.S(), Src.D()); + break; + } + default: LOGMAN_MSG_A_FMT("Unknown FCVT sizes: 0x{:x}", Conv); } } @@ -224,8 +220,9 @@ DEF_OP(Vector_SToF) { LOGMAN_THROW_AA_FMT(ElementSize == 8 || ElementSize == 4 || ElementSize == 2, "Unexpected {} size", __func__); const auto SubEmitSize = ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : ARMEmitter::SubRegSize::i16Bit; + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ARMEmitter::SubRegSize::i16Bit; const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -236,19 +233,15 @@ DEF_OP(Vector_SToF) { if (OpSize == ElementSize) { if (ElementSize == 8) { scvtf(ARMEmitter::ScalarRegSize::i64Bit, Dst.D(), Vector.D()); - } - else if (ElementSize == 4) { + } else if (ElementSize == 4) { scvtf(ARMEmitter::ScalarRegSize::i32Bit, Dst.S(), Vector.S()); - } - else { + } else { scvtf(ARMEmitter::ScalarRegSize::i16Bit, Dst.H(), Vector.H()); } - } - else { + } else { if (OpSize == 8) { scvtf(SubEmitSize, Dst.D(), Vector.D()); - } - else { + } else { scvtf(SubEmitSize, Dst.Q(), Vector.Q()); } } @@ -264,8 +257,9 @@ DEF_OP(Vector_FToZS) { LOGMAN_THROW_AA_FMT(ElementSize == 8 || ElementSize == 4 || ElementSize == 2, "Unexpected {} size", __func__); const auto SubEmitSize = ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : ARMEmitter::SubRegSize::i16Bit; + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ARMEmitter::SubRegSize::i16Bit; const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -276,19 +270,15 @@ DEF_OP(Vector_FToZS) { if (OpSize == ElementSize) { if (ElementSize == 8) { fcvtzs(ARMEmitter::ScalarRegSize::i64Bit, Dst.D(), Vector.D()); - } - else if (ElementSize == 4) { + } else if (ElementSize == 4) { fcvtzs(ARMEmitter::ScalarRegSize::i32Bit, Dst.S(), Vector.S()); - } - else { + } else { fcvtzs(ARMEmitter::ScalarRegSize::i16Bit, Dst.H(), Vector.H()); } - } - else { + } else { if (OpSize == 8) { fcvtzs(SubEmitSize, Dst.D(), Vector.D()); - } - else { + } else { fcvtzs(SubEmitSize, Dst.Q(), Vector.Q()); } } @@ -304,8 +294,9 @@ DEF_OP(Vector_FToS) { LOGMAN_THROW_AA_FMT(ElementSize == 8 || ElementSize == 4 || ElementSize == 2, "Unexpected {} size", __func__); const auto SubEmitSize = ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : ARMEmitter::SubRegSize::i16Bit; + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ARMEmitter::SubRegSize::i16Bit; const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -320,8 +311,7 @@ DEF_OP(Vector_FToS) { if (OpSize == 8) { frinti(SubEmitSize, Dst.D(), Vector.D()); fcvtzs(SubEmitSize, Dst.D(), Dst.D()); - } - else { + } else { frinti(SubEmitSize, Dst.Q(), Vector.Q()); fcvtzs(SubEmitSize, Dst.Q(), Dst.Q()); } @@ -338,8 +328,9 @@ DEF_OP(Vector_FToF) { LOGMAN_THROW_AA_FMT(ElementSize == 8 || ElementSize == 4 || ElementSize == 2, "Unexpected {} size", __func__); const auto SubEmitSize = ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : ARMEmitter::SubRegSize::i16Bit; + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ARMEmitter::SubRegSize::i16Bit; const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -361,45 +352,41 @@ DEF_OP(Vector_FToF) { const auto Mask = PRED_TMP_32B.Merging(); switch (Conv) { - case 0x0402: { // Float <- Half - zip1(FEXCore::ARMEmitter::SubRegSize::i16Bit, Dst.Z(), Vector.Z(), Vector.Z()); - fcvtlt(FEXCore::ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Mask, Dst.Z()); - break; - } - case 0x0804: { // Double <- Float - zip1(FEXCore::ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Vector.Z(), Vector.Z()); - fcvtlt(FEXCore::ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Dst.Z()); - break; - } - case 0x0204: { // Half <- Float - fcvtnt(FEXCore::ARMEmitter::SubRegSize::i16Bit, Dst.Z(), Mask, Vector.Z()); - uzp2(FEXCore::ARMEmitter::SubRegSize::i16Bit, Dst.Z(), Dst.Z(), Dst.Z()); - break; - } - case 0x0408: { // Float <- Double - fcvtnt(FEXCore::ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Mask, Vector.Z()); - uzp2(FEXCore::ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Dst.Z(), Dst.Z()); - break; - } - default: - LOGMAN_MSG_A_FMT("Unknown Vector_FToF Type : 0x{:04x}", Conv); - break; + case 0x0402: { // Float <- Half + zip1(FEXCore::ARMEmitter::SubRegSize::i16Bit, Dst.Z(), Vector.Z(), Vector.Z()); + fcvtlt(FEXCore::ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Mask, Dst.Z()); + break; + } + case 0x0804: { // Double <- Float + zip1(FEXCore::ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Vector.Z(), Vector.Z()); + fcvtlt(FEXCore::ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Dst.Z()); + break; + } + case 0x0204: { // Half <- Float + fcvtnt(FEXCore::ARMEmitter::SubRegSize::i16Bit, Dst.Z(), Mask, Vector.Z()); + uzp2(FEXCore::ARMEmitter::SubRegSize::i16Bit, Dst.Z(), Dst.Z(), Dst.Z()); + break; + } + case 0x0408: { // Float <- Double + fcvtnt(FEXCore::ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Mask, Vector.Z()); + uzp2(FEXCore::ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Dst.Z(), Dst.Z()); + break; + } + default: LOGMAN_MSG_A_FMT("Unknown Vector_FToF Type : 0x{:04x}", Conv); break; } } else { switch (Conv) { - case 0x0402: // Float <- Half - case 0x0804: { // Double <- Float - fcvtl(SubEmitSize, Dst.D(), Vector.D()); - break; - } - case 0x0204: // Half <- Float - case 0x0408: { // Float <- Double - fcvtn(SubEmitSize, Dst.D(), Vector.D()); - break; - } - default: - LOGMAN_MSG_A_FMT("Unknown Vector_FToF Type : 0x{:04x}", Conv); - break; + case 0x0402: // Float <- Half + case 0x0804: { // Double <- Float + fcvtl(SubEmitSize, Dst.D(), Vector.D()); + break; + } + case 0x0204: // Half <- Float + case 0x0408: { // Float <- Double + fcvtn(SubEmitSize, Dst.D(), Vector.D()); + break; + } + default: LOGMAN_MSG_A_FMT("Unknown Vector_FToF Type : 0x{:04x}", Conv); break; } } } @@ -413,8 +400,9 @@ DEF_OP(Vector_FToI) { LOGMAN_THROW_AA_FMT(ElementSize == 8 || ElementSize == 4 || ElementSize == 2, "Unexpected {} size", __func__); const auto SubEmitSize = ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : ARMEmitter::SubRegSize::i16Bit; + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ARMEmitter::SubRegSize::i16Bit; const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); @@ -423,82 +411,51 @@ DEF_OP(Vector_FToI) { const auto Mask = PRED_TMP_32B.Merging(); switch (Op->Round) { - case FEXCore::IR::Round_Nearest.Val: - frintn(SubEmitSize, Dst.Z(), Mask, Vector.Z()); - break; - case FEXCore::IR::Round_Negative_Infinity.Val: - frintm(SubEmitSize, Dst.Z(), Mask, Vector.Z()); - break; - case FEXCore::IR::Round_Positive_Infinity.Val: - frintp(SubEmitSize, Dst.Z(), Mask, Vector.Z()); - break; - case FEXCore::IR::Round_Towards_Zero.Val: - frintz(SubEmitSize, Dst.Z(), Mask, Vector.Z()); - break; - case FEXCore::IR::Round_Host.Val: - frinti(SubEmitSize, Dst.Z(), Mask, Vector.Z()); - break; + case FEXCore::IR::Round_Nearest.Val: frintn(SubEmitSize, Dst.Z(), Mask, Vector.Z()); break; + case FEXCore::IR::Round_Negative_Infinity.Val: frintm(SubEmitSize, Dst.Z(), Mask, Vector.Z()); break; + case FEXCore::IR::Round_Positive_Infinity.Val: frintp(SubEmitSize, Dst.Z(), Mask, Vector.Z()); break; + case FEXCore::IR::Round_Towards_Zero.Val: frintz(SubEmitSize, Dst.Z(), Mask, Vector.Z()); break; + case FEXCore::IR::Round_Host.Val: frinti(SubEmitSize, Dst.Z(), Mask, Vector.Z()); break; } } else { const auto IsScalar = ElementSize == OpSize; if (IsScalar) { - // Since we have multiple overloads of the same name (e.g. - // frinti having AdvSIMD, AdvSIMD scalar, and an SVE version), - // we can't just use a lambda without some seriously ugly casting. - // This is fairly self-contained otherwise. - #define ROUNDING_FN(name) \ - if (ElementSize == 2) { \ - name(Dst.H(), Vector.H()); \ - } else if (ElementSize == 4) { \ - name(Dst.S(), Vector.S()); \ - } else if (ElementSize == 8) { \ - name(Dst.D(), Vector.D()); \ - } else { \ - FEX_UNREACHABLE; \ - } +// Since we have multiple overloads of the same name (e.g. +// frinti having AdvSIMD, AdvSIMD scalar, and an SVE version), +// we can't just use a lambda without some seriously ugly casting. +// This is fairly self-contained otherwise. +#define ROUNDING_FN(name) \ + if (ElementSize == 2) { \ + name(Dst.H(), Vector.H()); \ + } else if (ElementSize == 4) { \ + name(Dst.S(), Vector.S()); \ + } else if (ElementSize == 8) { \ + name(Dst.D(), Vector.D()); \ + } else { \ + FEX_UNREACHABLE; \ + } switch (Op->Round) { - case IR::Round_Nearest.Val: - ROUNDING_FN(frintn); - break; - case IR::Round_Negative_Infinity.Val: - ROUNDING_FN(frintm); - break; - case IR::Round_Positive_Infinity.Val: - ROUNDING_FN(frintp); - break; - case IR::Round_Towards_Zero.Val: - ROUNDING_FN(frintz); - break; - case IR::Round_Host.Val: - ROUNDING_FN(frinti); - break; + case IR::Round_Nearest.Val: ROUNDING_FN(frintn); break; + case IR::Round_Negative_Infinity.Val: ROUNDING_FN(frintm); break; + case IR::Round_Positive_Infinity.Val: ROUNDING_FN(frintp); break; + case IR::Round_Towards_Zero.Val: ROUNDING_FN(frintz); break; + case IR::Round_Host.Val: ROUNDING_FN(frinti); break; } - #undef ROUNDING_FN +#undef ROUNDING_FN } else { switch (Op->Round) { - case FEXCore::IR::Round_Nearest.Val: - frintn(SubEmitSize, Dst.Q(), Vector.Q()); - break; - case FEXCore::IR::Round_Negative_Infinity.Val: - frintm(SubEmitSize, Dst.Q(), Vector.Q()); - break; - case FEXCore::IR::Round_Positive_Infinity.Val: - frintp(SubEmitSize, Dst.Q(), Vector.Q()); - break; - case FEXCore::IR::Round_Towards_Zero.Val: - frintz(SubEmitSize, Dst.Q(), Vector.Q()); - break; - case FEXCore::IR::Round_Host.Val: - frinti(SubEmitSize, Dst.Q(), Vector.Q()); - break; + case FEXCore::IR::Round_Nearest.Val: frintn(SubEmitSize, Dst.Q(), Vector.Q()); break; + case FEXCore::IR::Round_Negative_Infinity.Val: frintm(SubEmitSize, Dst.Q(), Vector.Q()); break; + case FEXCore::IR::Round_Positive_Infinity.Val: frintp(SubEmitSize, Dst.Q(), Vector.Q()); break; + case FEXCore::IR::Round_Towards_Zero.Val: frintz(SubEmitSize, Dst.Q(), Vector.Q()); break; + case FEXCore::IR::Round_Host.Val: frinti(SubEmitSize, Dst.Q(), Vector.Q()); break; } } } } #undef DEF_OP -} - +} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/EncryptionOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/EncryptionOps.cpp index 0276c3306b..62db614ace 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/EncryptionOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/EncryptionOps.cpp @@ -10,7 +10,7 @@ tags: backend|arm64 #include "Interface/IR/Passes/RegisterAllocationPass.h" namespace FEXCore::CPU { -#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const *IROp, IR::NodeID Node) +#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const* IROp, IR::NodeID Node) DEF_OP(VAESImc) { auto Op = IROp->C(); @@ -26,8 +26,7 @@ DEF_OP(VAESEnc) { const auto State = GetVReg(Op->State.ID()); const auto ZeroReg = GetVReg(Op->ZeroReg.ID()); - LOGMAN_THROW_AA_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE, - "Currently only supports 128-bit operations."); + LOGMAN_THROW_AA_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE, "Currently only supports 128-bit operations."); if (Dst == State && Dst != Key) { // Optimal case in which Dst already contains the starting state. @@ -35,8 +34,7 @@ DEF_OP(VAESEnc) { aese(Dst.Q(), ZeroReg.Q()); aesmc(Dst.Q(), Dst.Q()); eor(Dst.Q(), Dst.Q(), Key.Q()); - } - else { + } else { mov(VTMP1.Q(), State.Q()); aese(VTMP1, ZeroReg.Q()); aesmc(VTMP1, VTMP1); @@ -53,16 +51,14 @@ DEF_OP(VAESEncLast) { const auto State = GetVReg(Op->State.ID()); const auto ZeroReg = GetVReg(Op->ZeroReg.ID()); - LOGMAN_THROW_AA_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE, - "Currently only supports 128-bit operations."); + LOGMAN_THROW_AA_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE, "Currently only supports 128-bit operations."); if (Dst == State && Dst != Key) { // Optimal case in which Dst already contains the starting state. // This matches the common case of XMM AES. aese(Dst.Q(), ZeroReg.Q()); eor(Dst.Q(), Dst.Q(), Key.Q()); - } - else { + } else { mov(VTMP1.Q(), State.Q()); aese(VTMP1, ZeroReg.Q()); eor(Dst.Q(), VTMP1.Q(), Key.Q()); @@ -78,8 +74,7 @@ DEF_OP(VAESDec) { const auto State = GetVReg(Op->State.ID()); const auto ZeroReg = GetVReg(Op->ZeroReg.ID()); - LOGMAN_THROW_AA_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE, - "Currently only supports 128-bit operations."); + LOGMAN_THROW_AA_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE, "Currently only supports 128-bit operations."); if (Dst == State && Dst != Key) { // Optimal case in which Dst already contains the starting state. @@ -87,8 +82,7 @@ DEF_OP(VAESDec) { aesd(Dst.Q(), ZeroReg.Q()); aesimc(Dst.Q(), Dst.Q()); eor(Dst.Q(), Dst.Q(), Key.Q()); - } - else { + } else { mov(VTMP1.Q(), State.Q()); aesd(VTMP1, ZeroReg.Q()); aesimc(VTMP1, VTMP1); @@ -105,16 +99,14 @@ DEF_OP(VAESDecLast) { const auto State = GetVReg(Op->State.ID()); const auto ZeroReg = GetVReg(Op->ZeroReg.ID()); - LOGMAN_THROW_AA_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE, - "Currently only supports 128-bit operations."); + LOGMAN_THROW_AA_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE, "Currently only supports 128-bit operations."); if (Dst == State && Dst != Key) { // Optimal case in which Dst already contains the starting state. // This matches the common case of XMM AES. aesd(Dst.Q(), ZeroReg.Q()); eor(Dst.Q(), Dst.Q(), Key.Q()); - } - else { + } else { mov(VTMP1.Q(), State.Q()); aesd(VTMP1, ZeroReg.Q()); eor(Dst.Q(), VTMP1.Q(), Key.Q()); @@ -149,8 +141,7 @@ DEF_OP(VAESKeyGenAssist) { LoadConstant(ARMEmitter::Size::i64Bit, TMP1, static_cast(Op->RCON) << 32); dup(ARMEmitter::SubRegSize::i64Bit, VTMP2.Q(), TMP1); eor(Dst.Q(), Dst.Q(), VTMP2.Q()); - } - else { + } else { tbl(Dst.Q(), Dst.Q(), Swizzle.Q()); } } @@ -163,19 +154,11 @@ DEF_OP(CRC32) { const auto Src2 = GetReg(Op->Src2.ID()); switch (Op->SrcSize) { - case 1: - crc32cb(Dst.W(), Src1.W(), Src2.W()); - break; - case 2: - crc32ch(Dst.W(), Src1.W(), Src2.W()); - break; - case 4: - crc32cw(Dst.W(), Src1.W(), Src2.W()); - break; - case 8: - crc32cx(Dst.X(), Src1.X(), Src2.X()); - break; - default: LOGMAN_MSG_A_FMT("Unknown CRC32 size: {}", Op->SrcSize); + case 1: crc32cb(Dst.W(), Src1.W(), Src2.W()); break; + case 2: crc32ch(Dst.W(), Src1.W(), Src2.W()); break; + case 4: crc32cw(Dst.W(), Src1.W(), Src2.W()); break; + case 8: crc32cx(Dst.X(), Src1.X(), Src2.X()); break; + default: LOGMAN_MSG_A_FMT("Unknown CRC32 size: {}", Op->SrcSize); } } @@ -197,8 +180,7 @@ DEF_OP(VSha256U0) { if (Dst == Src1) { sha256su0(Dst, Src2); - } - else { + } else { mov(VTMP1.Q(), Src1.Q()); sha256su0(VTMP1, Src2); mov(Dst.Q(), Src1.Q()); @@ -209,17 +191,14 @@ DEF_OP(PCLMUL) { const auto Op = IROp->C(); const auto OpSize = IROp->Size; - const auto Dst = GetVReg(Node); + const auto Dst = GetVReg(Node); const auto Src1 = GetVReg(Op->Src1.ID()); const auto Src2 = GetVReg(Op->Src2.ID()); - LOGMAN_THROW_AA_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE, - "Currently only supports 128-bit operations."); + LOGMAN_THROW_AA_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE, "Currently only supports 128-bit operations."); switch (Op->Selector) { - case 0b00000000: - pmull(ARMEmitter::SubRegSize::i128Bit, Dst.D(), Src1.D(), Src2.D()); - break; + case 0b00000000: pmull(ARMEmitter::SubRegSize::i128Bit, Dst.D(), Src1.D(), Src2.D()); break; case 0b00000001: dup(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), Src1.Q(), 1); pmull(ARMEmitter::SubRegSize::i128Bit, Dst.D(), VTMP1.D(), Src2.D()); @@ -228,14 +207,10 @@ DEF_OP(PCLMUL) { dup(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), Src2.Q(), 1); pmull(ARMEmitter::SubRegSize::i128Bit, Dst.D(), VTMP1.D(), Src1.D()); break; - case 0b00010001: - pmull2(ARMEmitter::SubRegSize::i128Bit, Dst.Q(), Src1.Q(), Src2.Q()); - break; - default: - LOGMAN_MSG_A_FMT("Unknown PCLMUL selector: {}", Op->Selector); - break; + case 0b00010001: pmull2(ARMEmitter::SubRegSize::i128Bit, Dst.Q(), Src1.Q(), Src2.Q()); break; + default: LOGMAN_MSG_A_FMT("Unknown PCLMUL selector: {}", Op->Selector); break; } } #undef DEF_OP -} +} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/FlagOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/FlagOps.cpp index 92ec9659b5..b4d1e6d967 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/FlagOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/FlagOps.cpp @@ -8,12 +8,11 @@ tags: backend|arm64 #include "Interface/Core/JIT/Arm64/JITClass.h" namespace FEXCore::CPU { -#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const *IROp, IR::NodeID Node) +#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const* IROp, IR::NodeID Node) DEF_OP(GetHostFlag) { auto Op = IROp->C(); ubfx(ARMEmitter::Size::i64Bit, GetReg(Node), GetReg(Op->Value.ID()), Op->Flag, 1); } #undef DEF_OP -} - +} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp index adaa1c2f92..9db0e7c525 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp @@ -73,11 +73,11 @@ static void PrintValue(uint64_t Value) { static void PrintVectorValue(uint64_t Value, uint64_t ValueUpper) { LogMan::Msg::DFmt("Value: 0x{:016x}'{:016x}", ValueUpper, Value); } -} +} // namespace namespace FEXCore::CPU { -void Arm64JITCore::Op_Unhandled(IR::IROp_Header const *IROp, IR::NodeID Node) { +void Arm64JITCore::Op_Unhandled(const IR::IROp_Header* IROp, IR::NodeID Node) { FallbackInfo Info; if (!InterpreterOps::GetFallbackHandler(CTX->HostFeatures.SupportsPreserveAllABI, IROp, &Info)) { #if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED @@ -118,379 +118,347 @@ void Arm64JITCore::Op_Unhandled(IR::IROp_Header const *IROp, IR::NodeID Node) { mov(Dst.W(), TMP1.W()); }; - switch(Info.ABI) { - case FABI_F80_I16_F32:{ - SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); - - const auto Src1 = GetVReg(IROp->Args[0].ID()); - fmov(ARMEmitter::SReg::s0, Src1.S()); - ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); - ldr(ARMEmitter::XReg::x1, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); - if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall<__uint128_t, uint16_t, float>(ARMEmitter::Reg::r1); - } - else { - blr(ARMEmitter::Reg::r1); - } - - FillF80Result(); + switch (Info.ABI) { + case FABI_F80_I16_F32: { + SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); + + const auto Src1 = GetVReg(IROp->Args[0].ID()); + fmov(ARMEmitter::SReg::s0, Src1.S()); + ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); + ldr(ARMEmitter::XReg::x1, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); + if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { + GenerateIndirectRuntimeCall<__uint128_t, uint16_t, float>(ARMEmitter::Reg::r1); + } else { + blr(ARMEmitter::Reg::r1); } - break; - case FABI_F80_I16_F64:{ - SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); - - const auto Src1 = GetVReg(IROp->Args[0].ID()); - mov(ARMEmitter::DReg::d0, Src1.D()); - ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); - ldr(ARMEmitter::XReg::x1, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); - if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall<__uint128_t, uint16_t, double>(ARMEmitter::Reg::r1); - } - else { - blr(ARMEmitter::Reg::r1); - } - - FillF80Result(); + FillF80Result(); + } break; + + case FABI_F80_I16_F64: { + SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); + + const auto Src1 = GetVReg(IROp->Args[0].ID()); + mov(ARMEmitter::DReg::d0, Src1.D()); + ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); + ldr(ARMEmitter::XReg::x1, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); + if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { + GenerateIndirectRuntimeCall<__uint128_t, uint16_t, double>(ARMEmitter::Reg::r1); + } else { + blr(ARMEmitter::Reg::r1); } - break; - case FABI_F80_I16_I16: - case FABI_F80_I16_I32: { - SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); - - const auto Src1 = GetReg(IROp->Args[0].ID()); - if (Info.ABI == FABI_F80_I16_I16) { - sxth(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::r1, Src1); - } - else { - mov(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::r1, Src1); - } - ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); - ldr(ARMEmitter::XReg::x2, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); - if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall<__uint128_t, uint16_t, uint32_t>(ARMEmitter::Reg::r2); - } - else { - blr(ARMEmitter::Reg::r2); - } - - FillF80Result(); + FillF80Result(); + } break; + + case FABI_F80_I16_I16: + case FABI_F80_I16_I32: { + SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); + + const auto Src1 = GetReg(IROp->Args[0].ID()); + if (Info.ABI == FABI_F80_I16_I16) { + sxth(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::r1, Src1); + } else { + mov(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::r1, Src1); + } + ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); + ldr(ARMEmitter::XReg::x2, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); + if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { + GenerateIndirectRuntimeCall<__uint128_t, uint16_t, uint32_t>(ARMEmitter::Reg::r2); + } else { + blr(ARMEmitter::Reg::r2); } - break; - case FABI_F32_I16_F80:{ - SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); + FillF80Result(); + } break; - const auto Src1 = GetVReg(IROp->Args[0].ID()); + case FABI_F32_I16_F80: { + SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); - ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); - umov(ARMEmitter::Reg::r1, Src1, 0); - umov(ARMEmitter::Reg::r2, Src1, 4); + const auto Src1 = GetVReg(IROp->Args[0].ID()); - ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); - if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall(ARMEmitter::Reg::r3); - } - else { - blr(ARMEmitter::Reg::r3); - } + ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); + umov(ARMEmitter::Reg::r1, Src1, 0); + umov(ARMEmitter::Reg::r2, Src1, 4); - if (!TMP_ABIARGS) { - fmov(VTMP1.S(), ARMEmitter::SReg::s0); - } - FillForABICall(Info.SupportsPreserveAllABI, true); + ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); + if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { + GenerateIndirectRuntimeCall(ARMEmitter::Reg::r3); + } else { + blr(ARMEmitter::Reg::r3); + } - const auto Dst = GetVReg(Node); - fmov(Dst.S(), VTMP1.S()); + if (!TMP_ABIARGS) { + fmov(VTMP1.S(), ARMEmitter::SReg::s0); } - break; + FillForABICall(Info.SupportsPreserveAllABI, true); - case FABI_F64_I16_F80:{ - SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); + const auto Dst = GetVReg(Node); + fmov(Dst.S(), VTMP1.S()); + } break; - const auto Src1 = GetVReg(IROp->Args[0].ID()); + case FABI_F64_I16_F80: { + SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); - ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); - umov(ARMEmitter::Reg::r1, Src1, 0); - umov(ARMEmitter::Reg::r2, Src1, 4); + const auto Src1 = GetVReg(IROp->Args[0].ID()); - ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); - if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall(ARMEmitter::Reg::r3); - } - else { - blr(ARMEmitter::Reg::r3); - } + ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); + umov(ARMEmitter::Reg::r1, Src1, 0); + umov(ARMEmitter::Reg::r2, Src1, 4); - FillF64Result(); + ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); + if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { + GenerateIndirectRuntimeCall(ARMEmitter::Reg::r3); + } else { + blr(ARMEmitter::Reg::r3); } - break; - case FABI_F64_I16_F64: { - SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); + FillF64Result(); + } break; - const auto Src1 = GetVReg(IROp->Args[0].ID()); + case FABI_F64_I16_F64: { + SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); - mov(ARMEmitter::DReg::d0, Src1.D()); - ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); - ldr(ARMEmitter::XReg::x1, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); - if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall(ARMEmitter::Reg::r1); - } - else { - blr(ARMEmitter::Reg::r1); - } + const auto Src1 = GetVReg(IROp->Args[0].ID()); - FillF64Result(); + mov(ARMEmitter::DReg::d0, Src1.D()); + ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); + ldr(ARMEmitter::XReg::x1, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); + if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { + GenerateIndirectRuntimeCall(ARMEmitter::Reg::r1); + } else { + blr(ARMEmitter::Reg::r1); } - break; - case FABI_F64_I16_F64_F64: { - const auto Src1 = GetVReg(IROp->Args[0].ID()); - const auto Src2 = GetVReg(IROp->Args[1].ID()); + FillF64Result(); + } break; - mov(VTMP1.D(), Src1.D()); - mov(VTMP2.D(), Src2.D()); + case FABI_F64_I16_F64_F64: { + const auto Src1 = GetVReg(IROp->Args[0].ID()); + const auto Src2 = GetVReg(IROp->Args[1].ID()); - SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); + mov(VTMP1.D(), Src1.D()); + mov(VTMP2.D(), Src2.D()); - if (!TMP_ABIARGS) { - mov(ARMEmitter::DReg::d0, VTMP1.D()); - mov(ARMEmitter::DReg::d1, VTMP2.D()); - } + SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); - ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); - ldr(ARMEmitter::XReg::x1, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); - if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall(ARMEmitter::Reg::r1); - } - else { - blr(ARMEmitter::Reg::r1); - } + if (!TMP_ABIARGS) { + mov(ARMEmitter::DReg::d0, VTMP1.D()); + mov(ARMEmitter::DReg::d1, VTMP2.D()); + } - FillF64Result(); + ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); + ldr(ARMEmitter::XReg::x1, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); + if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { + GenerateIndirectRuntimeCall(ARMEmitter::Reg::r1); + } else { + blr(ARMEmitter::Reg::r1); } - break; - case FABI_I16_I16_F80:{ - SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); + FillF64Result(); + } break; - const auto Src1 = GetVReg(IROp->Args[0].ID()); + case FABI_I16_I16_F80: { + SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); - ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); - umov(ARMEmitter::Reg::r1, Src1, 0); - umov(ARMEmitter::Reg::r2, Src1, 4); + const auto Src1 = GetVReg(IROp->Args[0].ID()); - ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); - if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall(ARMEmitter::Reg::r3); - } - else { - blr(ARMEmitter::Reg::r3); - } + ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); + umov(ARMEmitter::Reg::r1, Src1, 0); + umov(ARMEmitter::Reg::r2, Src1, 4); - if (!TMP_ABIARGS) { - mov(TMP1, ARMEmitter::XReg::x0); - } - FillForABICall(Info.SupportsPreserveAllABI, true); + ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); + if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { + GenerateIndirectRuntimeCall(ARMEmitter::Reg::r3); + } else { + blr(ARMEmitter::Reg::r3); + } - const auto Dst = GetReg(Node); - sxth(ARMEmitter::Size::i64Bit, Dst, TMP1); + if (!TMP_ABIARGS) { + mov(TMP1, ARMEmitter::XReg::x0); } - break; - case FABI_I32_I16_F80:{ - SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); + FillForABICall(Info.SupportsPreserveAllABI, true); - const auto Src1 = GetVReg(IROp->Args[0].ID()); + const auto Dst = GetReg(Node); + sxth(ARMEmitter::Size::i64Bit, Dst, TMP1); + } break; + case FABI_I32_I16_F80: { + SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); + + const auto Src1 = GetVReg(IROp->Args[0].ID()); + + ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); + umov(ARMEmitter::Reg::r1, Src1, 0); + umov(ARMEmitter::Reg::r2, Src1, 4); + + ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); + if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { + GenerateIndirectRuntimeCall(ARMEmitter::Reg::r3); + } else { + blr(ARMEmitter::Reg::r3); + } - ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); - umov(ARMEmitter::Reg::r1, Src1, 0); - umov(ARMEmitter::Reg::r2, Src1, 4); + FillI32Result(); + } break; + case FABI_I64_I16_F80: { + SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); - ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); - if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall(ARMEmitter::Reg::r3); - } - else { - blr(ARMEmitter::Reg::r3); - } + const auto Src1 = GetVReg(IROp->Args[0].ID()); - FillI32Result(); - } - break; - case FABI_I64_I16_F80:{ - SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); - - const auto Src1 = GetVReg(IROp->Args[0].ID()); - - ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); - umov(ARMEmitter::Reg::r1, Src1, 0); - umov(ARMEmitter::Reg::r2, Src1, 4); - - ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); - if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall(ARMEmitter::Reg::r3); - } - else { - blr(ARMEmitter::Reg::r3); - } - - if (!TMP_ABIARGS) { - mov(TMP1, ARMEmitter::XReg::x0); - } - FillForABICall(Info.SupportsPreserveAllABI, true); - - const auto Dst = GetReg(Node); - mov(ARMEmitter::Size::i64Bit, Dst, TMP1); - } - break; - case FABI_I64_I16_F80_F80:{ - SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); - - const auto Src1 = GetVReg(IROp->Args[0].ID()); - const auto Src2 = GetVReg(IROp->Args[1].ID()); - - ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); - umov(ARMEmitter::Reg::r1, Src1, 0); - umov(ARMEmitter::Reg::r2, Src1, 4); - - umov(ARMEmitter::Reg::r3, Src2, 0); - umov(ARMEmitter::Reg::r4, Src2, 4); - - ldr(ARMEmitter::XReg::x5, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); - if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall(ARMEmitter::Reg::r5); - } - else { - blr(ARMEmitter::Reg::r5); - } - - if (!TMP_ABIARGS) { - mov(TMP1, ARMEmitter::XReg::x0); - } - FillForABICall(Info.SupportsPreserveAllABI, true); - - const auto Dst = GetReg(Node); - mov(ARMEmitter::Size::i64Bit, Dst, TMP1); + ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); + umov(ARMEmitter::Reg::r1, Src1, 0); + umov(ARMEmitter::Reg::r2, Src1, 4); + + ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); + if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { + GenerateIndirectRuntimeCall(ARMEmitter::Reg::r3); + } else { + blr(ARMEmitter::Reg::r3); } - break; - case FABI_F80_I16_F80:{ - SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); - const auto Src1 = GetVReg(IROp->Args[0].ID()); + if (!TMP_ABIARGS) { + mov(TMP1, ARMEmitter::XReg::x0); + } + FillForABICall(Info.SupportsPreserveAllABI, true); - ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); - umov(ARMEmitter::Reg::r1, Src1, 0); - umov(ARMEmitter::Reg::r2, Src1, 4); + const auto Dst = GetReg(Node); + mov(ARMEmitter::Size::i64Bit, Dst, TMP1); + } break; + case FABI_I64_I16_F80_F80: { + SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); + + const auto Src1 = GetVReg(IROp->Args[0].ID()); + const auto Src2 = GetVReg(IROp->Args[1].ID()); + + ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); + umov(ARMEmitter::Reg::r1, Src1, 0); + umov(ARMEmitter::Reg::r2, Src1, 4); + + umov(ARMEmitter::Reg::r3, Src2, 0); + umov(ARMEmitter::Reg::r4, Src2, 4); + + ldr(ARMEmitter::XReg::x5, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); + if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { + GenerateIndirectRuntimeCall(ARMEmitter::Reg::r5); + } else { + blr(ARMEmitter::Reg::r5); + } - ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); - if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall<__uint128_t, uint16_t, uint64_t, uint64_t>(ARMEmitter::Reg::r3); - } - else { - blr(ARMEmitter::Reg::r3); - } + if (!TMP_ABIARGS) { + mov(TMP1, ARMEmitter::XReg::x0); + } + FillForABICall(Info.SupportsPreserveAllABI, true); - FillF80Result(); + const auto Dst = GetReg(Node); + mov(ARMEmitter::Size::i64Bit, Dst, TMP1); + } break; + case FABI_F80_I16_F80: { + SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); + + const auto Src1 = GetVReg(IROp->Args[0].ID()); + + ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); + umov(ARMEmitter::Reg::r1, Src1, 0); + umov(ARMEmitter::Reg::r2, Src1, 4); + + ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); + if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { + GenerateIndirectRuntimeCall<__uint128_t, uint16_t, uint64_t, uint64_t>(ARMEmitter::Reg::r3); + } else { + blr(ARMEmitter::Reg::r3); } - break; - case FABI_F80_I16_F80_F80:{ - SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); - const auto Src1 = GetVReg(IROp->Args[0].ID()); - const auto Src2 = GetVReg(IROp->Args[1].ID()); + FillF80Result(); + } break; + case FABI_F80_I16_F80_F80: { + SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); - ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); - umov(ARMEmitter::Reg::r1, Src1, 0); - umov(ARMEmitter::Reg::r2, Src1, 4); + const auto Src1 = GetVReg(IROp->Args[0].ID()); + const auto Src2 = GetVReg(IROp->Args[1].ID()); - umov(ARMEmitter::Reg::r3, Src2, 0); - umov(ARMEmitter::Reg::r4, Src2, 4); + ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW)); + umov(ARMEmitter::Reg::r1, Src1, 0); + umov(ARMEmitter::Reg::r2, Src1, 4); - ldr(ARMEmitter::XReg::x5, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); - if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall<__uint128_t, uint16_t, uint64_t, uint64_t, uint64_t, uint64_t>(ARMEmitter::Reg::r5); - } - else { - blr(ARMEmitter::Reg::r5); - } + umov(ARMEmitter::Reg::r3, Src2, 0); + umov(ARMEmitter::Reg::r4, Src2, 4); - FillF80Result(); + ldr(ARMEmitter::XReg::x5, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); + if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { + GenerateIndirectRuntimeCall<__uint128_t, uint16_t, uint64_t, uint64_t, uint64_t, uint64_t>(ARMEmitter::Reg::r5); + } else { + blr(ARMEmitter::Reg::r5); } - break; - case FABI_I32_I64_I64_I128_I128_I16: { - const auto Op = IROp->C(); - const auto SrcRAX = GetReg(Op->RAX.ID()); - const auto SrcRDX = GetReg(Op->RDX.ID()); - mov(TMP1, SrcRAX.X()); - mov(TMP2, SrcRDX.X()); + FillF80Result(); + } break; + case FABI_I32_I64_I64_I128_I128_I16: { + const auto Op = IROp->C(); + const auto SrcRAX = GetReg(Op->RAX.ID()); + const auto SrcRDX = GetReg(Op->RDX.ID()); - SpillForABICall(Info.SupportsPreserveAllABI, TMP3, true); + mov(TMP1, SrcRAX.X()); + mov(TMP2, SrcRDX.X()); - const auto Control = Op->Control; + SpillForABICall(Info.SupportsPreserveAllABI, TMP3, true); - const auto Src1 = GetVReg(Op->LHS.ID()); - const auto Src2 = GetVReg(Op->RHS.ID()); + const auto Control = Op->Control; - if (!TMP_ABIARGS) { - mov(ARMEmitter::XReg::x0, TMP1); - mov(ARMEmitter::XReg::x1, TMP2); - } + const auto Src1 = GetVReg(Op->LHS.ID()); + const auto Src2 = GetVReg(Op->RHS.ID()); - umov(ARMEmitter::Reg::r2, Src1, 0); - umov(ARMEmitter::Reg::r3, Src1, 1); + if (!TMP_ABIARGS) { + mov(ARMEmitter::XReg::x0, TMP1); + mov(ARMEmitter::XReg::x1, TMP2); + } - umov(ARMEmitter::Reg::r4, Src2, 0); - umov(ARMEmitter::Reg::r5, Src2, 1); + umov(ARMEmitter::Reg::r2, Src1, 0); + umov(ARMEmitter::Reg::r3, Src1, 1); - movz(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::r6, Control); + umov(ARMEmitter::Reg::r4, Src2, 0); + umov(ARMEmitter::Reg::r5, Src2, 1); - ldr(ARMEmitter::XReg::x7, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); - if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall(ARMEmitter::Reg::r7); - } - else { - blr(ARMEmitter::Reg::r7); - } + movz(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::r6, Control); - FillI32Result(); + ldr(ARMEmitter::XReg::x7, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); + if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { + GenerateIndirectRuntimeCall(ARMEmitter::Reg::r7); + } else { + blr(ARMEmitter::Reg::r7); } - break; - case FABI_I32_I128_I128_I16: { - SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); - const auto Op = IROp->C(); + FillI32Result(); + } break; + case FABI_I32_I128_I128_I16: { + SpillForABICall(Info.SupportsPreserveAllABI, TMP1, true); - const auto Src1 = GetVReg(Op->LHS.ID()); - const auto Src2 = GetVReg(Op->RHS.ID()); - const auto Control = Op->Control; + const auto Op = IROp->C(); - umov(ARMEmitter::Reg::r0, Src1, 0); - umov(ARMEmitter::Reg::r1, Src1, 1); + const auto Src1 = GetVReg(Op->LHS.ID()); + const auto Src2 = GetVReg(Op->RHS.ID()); + const auto Control = Op->Control; - umov(ARMEmitter::Reg::r2, Src2, 0); - umov(ARMEmitter::Reg::r3, Src2, 1); + umov(ARMEmitter::Reg::r0, Src1, 0); + umov(ARMEmitter::Reg::r1, Src1, 1); - movz(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::r4, Control); + umov(ARMEmitter::Reg::r2, Src2, 0); + umov(ARMEmitter::Reg::r3, Src2, 1); - ldr(ARMEmitter::XReg::x5, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); - if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { - GenerateIndirectRuntimeCall(ARMEmitter::Reg::r5); - } - else { - blr(ARMEmitter::Reg::r5); - } + movz(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::r4, Control); - FillI32Result(); + ldr(ARMEmitter::XReg::x5, STATE_PTR(CpuStateFrame, Pointers.Common.FallbackHandlerPointers[Info.HandlerIndex])); + if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { + GenerateIndirectRuntimeCall(ARMEmitter::Reg::r5); + } else { + blr(ARMEmitter::Reg::r5); } - break; - case FABI_UNKNOWN: - default: + + FillI32Result(); + } break; + case FABI_UNKNOWN: + default: #if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED - LOGMAN_MSG_A_FMT("Unhandled IR Fallback ABI: {} {}", - FEXCore::IR::GetName(IROp->Op), ToUnderlying(Info.ABI)); + LOGMAN_MSG_A_FMT("Unhandled IR Fallback ABI: {} {}", FEXCore::IR::GetName(IROp->Op), ToUnderlying(Info.ABI)); #endif break; } @@ -498,9 +466,9 @@ void Arm64JITCore::Op_Unhandled(IR::IROp_Header const *IROp, IR::NodeID Node) { } -static void DirectBlockDelinker(FEXCore::Core::CpuStateFrame *Frame, FEXCore::Context::ExitFunctionLinkData *Record) { +static void DirectBlockDelinker(FEXCore::Core::CpuStateFrame* Frame, FEXCore::Context::ExitFunctionLinkData* Record) { auto LinkerAddress = Frame->Pointers.Common.ExitFunctionLinker; - uintptr_t branch = (uintptr_t)(Record) - 8; + uintptr_t branch = (uintptr_t)(Record)-8; FEXCore::ARMEmitter::Emitter emit((uint8_t*)(branch), 8); FEXCore::ARMEmitter::SingleUseForwardLabel l_BranchHost; emit.ldr(TMP1, &l_BranchHost); @@ -510,12 +478,12 @@ static void DirectBlockDelinker(FEXCore::Core::CpuStateFrame *Frame, FEXCore::Co FEXCore::ARMEmitter::Emitter::ClearICache((void*)branch, 8); } -static void IndirectBlockDelinker(FEXCore::Core::CpuStateFrame *Frame, FEXCore::Context::ExitFunctionLinkData *Record) { +static void IndirectBlockDelinker(FEXCore::Core::CpuStateFrame* Frame, FEXCore::Context::ExitFunctionLinkData* Record) { auto LinkerAddress = Frame->Pointers.Common.ExitFunctionLinker; Record->HostBranch = LinkerAddress; } -static uint64_t Arm64JITCore_ExitFunctionLink(FEXCore::Core::CpuStateFrame *Frame, FEXCore::Context::ExitFunctionLinkData *Record) { +static uint64_t Arm64JITCore_ExitFunctionLink(FEXCore::Core::CpuStateFrame* Frame, FEXCore::Context::ExitFunctionLinkData* Record) { auto Thread = Frame->Thread; auto GuestRip = Record->GuestRIP; @@ -526,9 +494,9 @@ static uint64_t Arm64JITCore_ExitFunctionLink(FEXCore::Core::CpuStateFrame *Fram return Frame->Pointers.Common.DispatcherLoopTop; } - uintptr_t branch = (uintptr_t)(Record) - 8; + uintptr_t branch = (uintptr_t)(Record)-8; - auto offset = HostCode/4 - branch/4; + auto offset = HostCode / 4 - branch / 4; if (vixl::IsInt26(offset)) { // optimal case - can branch directly // patch the code @@ -549,16 +517,15 @@ static uint64_t Arm64JITCore_ExitFunctionLink(FEXCore::Core::CpuStateFrame *Fram return HostCode; } -void Arm64JITCore::Op_NoOp(IR::IROp_Header const *IROp, IR::NodeID Node) { -} +void Arm64JITCore::Op_NoOp(const IR::IROp_Header* IROp, IR::NodeID Node) {} -Arm64JITCore::Arm64JITCore(FEXCore::Context::ContextImpl *ctx, FEXCore::Core::InternalThreadState *Thread) +Arm64JITCore::Arm64JITCore(FEXCore::Context::ContextImpl* ctx, FEXCore::Core::InternalThreadState* Thread) : CPUBackend(Thread, INITIAL_CODE_SIZE, MAX_CODE_SIZE) , Arm64Emitter(ctx) - , HostSupportsSVE128{ctx->HostFeatures.SupportsSVE} - , HostSupportsSVE256{ctx->HostFeatures.SupportsAVX} - , HostSupportsRPRES{ctx->HostFeatures.SupportsRPRES} - , HostSupportsAFP{ctx->HostFeatures.SupportsAFP} + , HostSupportsSVE128 {ctx->HostFeatures.SupportsSVE} + , HostSupportsSVE256 {ctx->HostFeatures.SupportsAVX} + , HostSupportsRPRES {ctx->HostFeatures.SupportsRPRES} + , HostSupportsAFP {ctx->HostFeatures.SupportsAFP} , CTX {ctx} { RAPass = Thread->PassManager->GetPass("RA"); @@ -573,7 +540,7 @@ Arm64JITCore::Arm64JITCore(FEXCore::Context::ContextImpl *ctx, FEXCore::Core::In RAPass->AddRegisters(FEXCore::IR::ComplexClass, 1); for (uint32_t i = 0; i < GeneralPairRegisters.size(); ++i) { - RAPass->AddRegisterConflict(FEXCore::IR::GPRClass, i * 2, FEXCore::IR::GPRPairClass, i); + RAPass->AddRegisterConflict(FEXCore::IR::GPRClass, i * 2, FEXCore::IR::GPRPairClass, i); RAPass->AddRegisterConflict(FEXCore::IR::GPRClass, i * 2 + 1, FEXCore::IR::GPRPairClass, i); } @@ -581,7 +548,7 @@ Arm64JITCore::Arm64JITCore(FEXCore::Context::ContextImpl *ctx, FEXCore::Core::In // Set up pointers that the JIT needs to load // Common - auto &Common = ThreadState->CurrentFrame->Pointers.Common; + auto& Common = ThreadState->CurrentFrame->Pointers.Common; Common.PrintValue = reinterpret_cast(PrintValue); Common.PrintVectorValue = reinterpret_cast(PrintVectorValue); @@ -609,7 +576,7 @@ Arm64JITCore::Arm64JITCore(FEXCore::Context::ContextImpl *ctx, FEXCore::Core::In InterpreterOps::FillFallbackIndexPointers(Common.FallbackHandlerPointers); // Platform Specific - auto &AArch64 = ThreadState->CurrentFrame->Pointers.AArch64; + auto& AArch64 = ThreadState->CurrentFrame->Pointers.AArch64; AArch64.LUDIV = reinterpret_cast(LUDIV); AArch64.LDIV = reinterpret_cast(LDIV); @@ -624,8 +591,7 @@ Arm64JITCore::Arm64JITCore(FEXCore::Context::ContextImpl *ctx, FEXCore::Core::In if (ParanoidTSO()) { RT_LoadMemTSO = &Arm64JITCore::Op_ParanoidLoadMemTSO; RT_StoreMemTSO = &Arm64JITCore::Op_ParanoidStoreMemTSO; - } - else { + } else { RT_LoadMemTSO = &Arm64JITCore::Op_LoadMemTSO; RT_StoreMemTSO = &Arm64JITCore::Op_StoreMemTSO; } @@ -645,9 +611,7 @@ void Arm64JITCore::ClearCache() { EmitDetectionString(); } -Arm64JITCore::~Arm64JITCore() { - -} +Arm64JITCore::~Arm64JITCore() {} bool Arm64JITCore::IsInlineConstant(const IR::OrderedNodeWrapper& WNode, uint64_t* Value) const { auto OpHeader = IR->GetOp(WNode); @@ -704,10 +668,8 @@ bool Arm64JITCore::IsGPRPair(IR::NodeID Node) const { return Class == IR::GPRPairClass; } -CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, - FEXCore::IR::IRListView const *IR, - FEXCore::Core::DebugData *DebugData, - FEXCore::IR::RegisterAllocationData *RAData) { +CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, const FEXCore::IR::IRListView* IR, FEXCore::Core::DebugData* DebugData, + FEXCore::IR::RegisterAllocationData* RAData) { FEXCORE_PROFILE_SCOPED("Arm64::CompileCode"); JumpTargets.clear(); @@ -727,9 +689,9 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, CodeData.BlockBegin = GetCursorAddress(); // Put the code header at the start of the data block. - ARMEmitter::BackwardLabel JITCodeHeaderLabel{}; + ARMEmitter::BackwardLabel JITCodeHeaderLabel {}; Bind(&JITCodeHeaderLabel); - JITCodeHeader *CodeHeader = GetCursorAddress(); + JITCodeHeader* CodeHeader = GetCursorAddress(); CursorIncrement(sizeof(JITCodeHeader)); #ifdef VIXL_DISASSEMBLER @@ -766,11 +728,11 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, if (CTX->Config.NeedsPendingInterruptFaultCheck) { // Trigger a fault if there are any pending interrupts // Used only for suspend on WIN32 at the moment - strb(ARMEmitter::XReg::zr, STATE, offsetof(FEXCore::Core::InternalThreadState, InterruptFaultPage) - - offsetof(FEXCore::Core::InternalThreadState, BaseFrameState)); + strb(ARMEmitter::XReg::zr, STATE, + offsetof(FEXCore::Core::InternalThreadState, InterruptFaultPage) - offsetof(FEXCore::Core::InternalThreadState, BaseFrameState)); } - //LOGMAN_THROW_A_FMT(RAData->HasFullRA(), "Arm64 JIT only works with RA"); + // LOGMAN_THROW_A_FMT(RAData->HasFullRA(), "Arm64 JIT only works with RA"); SpillSlots = RAData->SpillSlots(); @@ -794,14 +756,13 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, LOGMAN_THROW_AA_FMT(BlockIROp->Header.Op == IR::OP_CODEBLOCK, "IR type failed to be a code block"); #endif - auto BlockStartHostCode = GetCursorAddress(); + auto BlockStartHostCode = GetCursorAddress(); { const auto Node = IR->GetID(BlockNode); const auto IsTarget = JumpTargets.try_emplace(Node).first; // if there's a pending branch, and it is not fall-through - if (PendingTargetLabel && PendingTargetLabel != &IsTarget->second) - { + if (PendingTargetLabel && PendingTargetLabel != &IsTarget->second) { b(PendingTargetLabel); } PendingTargetLabel = nullptr; @@ -812,43 +773,40 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, for (auto [CodeNode, IROp] : IR->GetCode(BlockNode)) { const auto ID = IR->GetID(CodeNode); switch (IROp->Op) { -#define REGISTER_OP_RT(op, x) case FEXCore::IR::IROps::OP_##op: std::invoke(RT_##x, this, IROp, ID); break -#define REGISTER_OP(op, x) case FEXCore::IR::IROps::OP_##op: Op_##x(IROp, ID); break +#define REGISTER_OP_RT(op, x) \ + case FEXCore::IR::IROps::OP_##op: std::invoke(RT_##x, this, IROp, ID); break +#define REGISTER_OP(op, x) \ + case FEXCore::IR::IROps::OP_##op: Op_##x(IROp, ID); break #define IROP_DISPATCH_DISPATCH #include #undef REGISTER_OP - default: - Op_Unhandled(IROp, ID); - break; + default: Op_Unhandled(IROp, ID); break; } } if (DebugData) { - DebugData->Subblocks.push_back({ - static_cast(BlockStartHostCode - CodeData.BlockEntry), - static_cast(GetCursorAddress() - BlockStartHostCode) - }); + DebugData->Subblocks.push_back({static_cast(BlockStartHostCode - CodeData.BlockEntry), + static_cast(GetCursorAddress() - BlockStartHostCode)}); } } // Make sure last branch is generated. It certainly can't be eliminated here. - if (PendingTargetLabel) - { + if (PendingTargetLabel) { b(PendingTargetLabel); } PendingTargetLabel = nullptr; // CodeSize not including the tail data. - const uint64_t CodeOnlySize = GetCursorAddress() - CodeData.BlockBegin; + const uint64_t CodeOnlySize = GetCursorAddress() - CodeData.BlockBegin; // Add the JitCodeTail - auto JITBlockTailLocation = GetCursorAddress(); + auto JITBlockTailLocation = GetCursorAddress(); auto JITBlockTail = GetCursorAddress(); CursorIncrement(sizeof(JITCodeTail)); - auto JITRIPEntriesLocation = GetCursorAddress(); + auto JITRIPEntriesLocation = GetCursorAddress(); auto JITRIPEntries = GetCursorAddress(); CursorIncrement(sizeof(JITRIPReconstructEntries) * DebugData->GuestOpcodes.size()); @@ -867,8 +825,8 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, uintptr_t CurrentRIPOffset = 0; uint64_t CurrentPCOffset = 0; for (size_t i = 0; i < DebugData->GuestOpcodes.size(); i++) { - const auto &GuestOpcode = DebugData->GuestOpcodes[i]; - auto &RIPEntry = JITRIPEntries[i]; + const auto& GuestOpcode = DebugData->GuestOpcodes[i]; + auto& RIPEntry = JITRIPEntries[i]; RIPEntry.HostPCOffset = GuestOpcode.HostEntryOffset - CurrentPCOffset; RIPEntry.GuestRIPOffset = GuestOpcode.GuestEntryOffset - CurrentRIPOffset; CurrentPCOffset = GuestOpcode.HostEntryOffset; @@ -878,7 +836,7 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, CodeHeader->OffsetToBlockTail = JITBlockTailLocation - CodeData.BlockBegin; - CodeData.Size = GetCursorAddress() - CodeData.BlockBegin; + CodeData.Size = GetCursorAddress() - CodeData.BlockBegin; JITBlockTail->Size = CodeData.Size; @@ -933,7 +891,7 @@ void Arm64JITCore::ResetStack() { } } -fextl::unique_ptr CreateArm64JITCore(FEXCore::Context::ContextImpl *ctx, FEXCore::Core::InternalThreadState *Thread) { +fextl::unique_ptr CreateArm64JITCore(FEXCore::Context::ContextImpl* ctx, FEXCore::Core::InternalThreadState* Thread) { return fextl::make_unique(ctx, Thread); } @@ -945,4 +903,4 @@ CPUBackendFeatures GetArm64JITBackendFeatures() { }; } -} +} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp index 40472ac20a..244308f2c5 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp @@ -15,7 +15,7 @@ tags: backend|arm64 #include namespace FEXCore::CPU { -#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const *IROp, IR::NodeID Node) +#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const* IROp, IR::NodeID Node) DEF_OP(LoadContext) { const auto Op = IROp->C(); @@ -25,49 +25,26 @@ DEF_OP(LoadContext) { auto Dst = GetReg(Node); switch (OpSize) { - case 1: - ldrb(Dst, STATE, Op->Offset); - break; - case 2: - ldrh(Dst, STATE, Op->Offset); - break; - case 4: - ldr(Dst.W(), STATE, Op->Offset); - break; - case 8: - ldr(Dst.X(), STATE, Op->Offset); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled LoadContext size: {}", OpSize); - break; + case 1: ldrb(Dst, STATE, Op->Offset); break; + case 2: ldrh(Dst, STATE, Op->Offset); break; + case 4: ldr(Dst.W(), STATE, Op->Offset); break; + case 8: ldr(Dst.X(), STATE, Op->Offset); break; + default: LOGMAN_MSG_A_FMT("Unhandled LoadContext size: {}", OpSize); break; } - } - else { + } else { auto Dst = GetVReg(Node); switch (OpSize) { - case 1: - ldrb(Dst, STATE, Op->Offset); - break; - case 2: - ldrh(Dst, STATE, Op->Offset); - break; - case 4: - ldr(Dst.S(), STATE, Op->Offset); - break; - case 8: - ldr(Dst.D(), STATE, Op->Offset); - break; - case 16: - ldr(Dst.Q(), STATE, Op->Offset); - break; + case 1: ldrb(Dst, STATE, Op->Offset); break; + case 2: ldrh(Dst, STATE, Op->Offset); break; + case 4: ldr(Dst.S(), STATE, Op->Offset); break; + case 8: ldr(Dst.D(), STATE, Op->Offset); break; + case 16: ldr(Dst.Q(), STATE, Op->Offset); break; case 32: mov(TMP1, Op->Offset); ld1b(Dst.Z(), PRED_TMP_32B.Zeroing(), STATE, TMP1); break; - default: - LOGMAN_MSG_A_FMT("Unhandled LoadContext size: {}", OpSize); - break; + default: LOGMAN_MSG_A_FMT("Unhandled LoadContext size: {}", OpSize); break; } } } @@ -77,52 +54,29 @@ DEF_OP(StoreContext) { const auto OpSize = IROp->Size; if (Op->Class == FEXCore::IR::GPRClass) { - auto Src = GetReg(Op->Value.ID()); + auto Src = GetReg(Op->Value.ID()); switch (OpSize) { - case 1: - strb(Src, STATE, Op->Offset); - break; - case 2: - strh(Src, STATE, Op->Offset); - break; - case 4: - str(Src.W(), STATE, Op->Offset); - break; - case 8: - str(Src.X(), STATE, Op->Offset); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled StoreContext size: {}", OpSize); - break; + case 1: strb(Src, STATE, Op->Offset); break; + case 2: strh(Src, STATE, Op->Offset); break; + case 4: str(Src.W(), STATE, Op->Offset); break; + case 8: str(Src.X(), STATE, Op->Offset); break; + default: LOGMAN_MSG_A_FMT("Unhandled StoreContext size: {}", OpSize); break; } - } - else { + } else { const auto Src = GetVReg(Op->Value.ID()); switch (OpSize) { - case 1: - strb(Src, STATE, Op->Offset); - break; - case 2: - strh(Src, STATE, Op->Offset); - break; - case 4: - str(Src.S(), STATE, Op->Offset); - break; - case 8: - str(Src.D(), STATE, Op->Offset); - break; - case 16: - str(Src.Q(), STATE, Op->Offset); - break; + case 1: strb(Src, STATE, Op->Offset); break; + case 2: strh(Src, STATE, Op->Offset); break; + case 4: str(Src.S(), STATE, Op->Offset); break; + case 8: str(Src.D(), STATE, Op->Offset); break; + case 16: str(Src.Q(), STATE, Op->Offset); break; case 32: mov(TMP1, Op->Offset); st1b(Src.Z(), PRED_TMP_32B, STATE, TMP1); break; - default: - LOGMAN_MSG_A_FMT("Unhandled StoreContext size: {}", OpSize); - break; + default: LOGMAN_MSG_A_FMT("Unhandled StoreContext size: {}", OpSize); break; } } } @@ -132,10 +86,11 @@ DEF_OP(LoadRegister) { const auto OpSize = IROp->Size; if (Op->Class == IR::GPRClass) { - const auto regId = - Op->Offset == offsetof(Core::CpuStateFrame, State.pf_raw) ? (StaticRegisters.size() - 2) : - Op->Offset == offsetof(Core::CpuStateFrame, State.af_raw) ? (StaticRegisters.size() - 1) : - (Op->Offset - offsetof(Core::CpuStateFrame, State.gregs[0])) / Core::CPUState::GPR_REG_SIZE; + const auto regId = Op->Offset == offsetof(Core::CpuStateFrame, State.pf_raw) ? + (StaticRegisters.size() - 2) : + Op->Offset == offsetof(Core::CpuStateFrame, State.af_raw) ? + (StaticRegisters.size() - 1) : + (Op->Offset - offsetof(Core::CpuStateFrame, State.gregs[0])) / Core::CPUState::GPR_REG_SIZE; const auto regOffs = Op->Offset & 7; @@ -144,27 +99,24 @@ DEF_OP(LoadRegister) { const auto reg = StaticRegisters[regId]; switch (OpSize) { - case 4: - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs"); - if (GetReg(Node).Idx() != reg.Idx()) - mov(GetReg(Node).W(), reg.W()); - break; + case 4: + LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs"); + if (GetReg(Node).Idx() != reg.Idx()) { + mov(GetReg(Node).W(), reg.W()); + } + break; - case 8: - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs"); - if (GetReg(Node).Idx() != reg.Idx()) { - mov(GetReg(Node).X(), reg.X()); - } - break; + case 8: + LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs"); + if (GetReg(Node).Idx() != reg.Idx()) { + mov(GetReg(Node).X(), reg.X()); + } + break; - default: - LOGMAN_MSG_A_FMT("Unhandled LoadRegister GPR size: {}", OpSize); - break; + default: LOGMAN_MSG_A_FMT("Unhandled LoadRegister GPR size: {}", OpSize); break; } - } - else if (Op->Class == IR::FPRClass) { - const auto regSize = HostSupportsSVE256 ? Core::CPUState::XMM_AVX_REG_SIZE - : Core::CPUState::XMM_SSE_REG_SIZE; + } else if (Op->Class == IR::FPRClass) { + const auto regSize = HostSupportsSVE256 ? Core::CPUState::XMM_AVX_REG_SIZE : Core::CPUState::XMM_SSE_REG_SIZE; const auto regId = (Op->Offset - offsetof(Core::CpuStateFrame, State.xmm.avx.data[0][0])) / regSize; LOGMAN_THROW_A_FMT(regId < StaticFPRegisters.size(), "out of range regId"); @@ -192,116 +144,112 @@ DEF_OP(LoadRegister) { }; switch (OpSize) { - case 1: { - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); - dup(ARMEmitter::ScalarRegSize::i8Bit, host, guest, 0); - break; - } - case 2: { - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); - fmov(host.H(), guest.H()); - break; - } - case 4: { - LOGMAN_THROW_AA_FMT((regOffs & 3) == 0, "unexpected regOffs: {}", regOffs); - if (regOffs == 0) { - if (host.Idx() != guest.Idx()) { - fmov(host.S(), guest.S()); - } - } else { - const auto Predicate = LoadPredicate(); - - dup(FEXCore::ARMEmitter::SubRegSize::i32Bit, VTMP1.Z(), host.Z(), 0); - mov(FEXCore::ARMEmitter::SubRegSize::i32Bit, guest.Z(), Predicate, VTMP1.Z()); - - EmitData(1U << regOffs); + case 1: { + LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); + dup(ARMEmitter::ScalarRegSize::i8Bit, host, guest, 0); + break; + } + case 2: { + LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); + fmov(host.H(), guest.H()); + break; + } + case 4: { + LOGMAN_THROW_AA_FMT((regOffs & 3) == 0, "unexpected regOffs: {}", regOffs); + if (regOffs == 0) { + if (host.Idx() != guest.Idx()) { + fmov(host.S(), guest.S()); } - break; - } - - case 8: { - LOGMAN_THROW_AA_FMT((regOffs & 7) == 0, "unexpected regOffs: {}", regOffs); - if (regOffs == 0) { - if (host.Idx() != guest.Idx()) { - dup(ARMEmitter::ScalarRegSize::i64Bit, host, guest, 0); - } - } else { - const auto Predicate = LoadPredicate(); + } else { + const auto Predicate = LoadPredicate(); - dup(FEXCore::ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), host.Z(), 0); - mov(FEXCore::ARMEmitter::SubRegSize::i64Bit, guest.Z(), Predicate, VTMP1.Z()); + dup(FEXCore::ARMEmitter::SubRegSize::i32Bit, VTMP1.Z(), host.Z(), 0); + mov(FEXCore::ARMEmitter::SubRegSize::i32Bit, guest.Z(), Predicate, VTMP1.Z()); - EmitData(1U << regOffs); - } - break; + EmitData(1U << regOffs); } + break; + } - case 16: { - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); + case 8: { + LOGMAN_THROW_AA_FMT((regOffs & 7) == 0, "unexpected regOffs: {}", regOffs); + if (regOffs == 0) { if (host.Idx() != guest.Idx()) { - mov(host.Q(), guest.Q()); + dup(ARMEmitter::ScalarRegSize::i64Bit, host, guest, 0); } - break; + } else { + const auto Predicate = LoadPredicate(); + + dup(FEXCore::ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), host.Z(), 0); + mov(FEXCore::ARMEmitter::SubRegSize::i64Bit, guest.Z(), Predicate, VTMP1.Z()); + + EmitData(1U << regOffs); } + break; + } - case 32: { - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); - if (host.Idx() != guest.Idx()) { - mov(ARMEmitter::SubRegSize::i64Bit, host.Z(), PRED_TMP_32B.Merging(), guest.Z()); - } - break; + case 16: { + LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); + if (host.Idx() != guest.Idx()) { + mov(host.Q(), guest.Q()); + } + break; + } + + case 32: { + LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); + if (host.Idx() != guest.Idx()) { + mov(ARMEmitter::SubRegSize::i64Bit, host.Z(), PRED_TMP_32B.Merging(), guest.Z()); } + break; + } - default: - LOGMAN_MSG_A_FMT("Unhandled LoadRegister FPR size: {}", OpSize); - break; + default: LOGMAN_MSG_A_FMT("Unhandled LoadRegister FPR size: {}", OpSize); break; } } else { const auto regOffs = Op->Offset & 15; switch (OpSize) { - case 1: - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); - dup(ARMEmitter::ScalarRegSize::i8Bit, host, guest, 0); - break; - - case 2: - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); - fmov(host.H(), guest.H()); - break; - - case 4: - LOGMAN_THROW_AA_FMT((regOffs & 3) == 0, "unexpected regOffs: {}", regOffs); - if (regOffs == 0) { - if (host.Idx() != guest.Idx()) { - fmov(host.S(), guest.S()); - } - } else { - ins(ARMEmitter::SubRegSize::i32Bit, host, 0, guest, regOffs/4); - } - break; - - case 8: - LOGMAN_THROW_AA_FMT((regOffs & 7) == 0, "unexpected regOffs: {}", regOffs); - if (regOffs == 0) { - if (host.Idx() != guest.Idx()) { - dup(ARMEmitter::ScalarRegSize::i64Bit, host, guest, 0); - } - } else { - ins(ARMEmitter::SubRegSize::i64Bit, host, 0, guest, regOffs/8); + case 1: + LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); + dup(ARMEmitter::ScalarRegSize::i8Bit, host, guest, 0); + break; + + case 2: + LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); + fmov(host.H(), guest.H()); + break; + + case 4: + LOGMAN_THROW_AA_FMT((regOffs & 3) == 0, "unexpected regOffs: {}", regOffs); + if (regOffs == 0) { + if (host.Idx() != guest.Idx()) { + fmov(host.S(), guest.S()); } - break; + } else { + ins(ARMEmitter::SubRegSize::i32Bit, host, 0, guest, regOffs / 4); + } + break; - case 16: - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); + case 8: + LOGMAN_THROW_AA_FMT((regOffs & 7) == 0, "unexpected regOffs: {}", regOffs); + if (regOffs == 0) { if (host.Idx() != guest.Idx()) { - mov(host.Q(), guest.Q()); + dup(ARMEmitter::ScalarRegSize::i64Bit, host, guest, 0); } - break; + } else { + ins(ARMEmitter::SubRegSize::i64Bit, host, 0, guest, regOffs / 8); + } + break; + + case 16: + LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); + if (host.Idx() != guest.Idx()) { + mov(host.Q(), guest.Q()); + } + break; - default: - LOGMAN_MSG_A_FMT("Unhandled LoadRegister FPR size: {}", OpSize); - break; + default: LOGMAN_MSG_A_FMT("Unhandled LoadRegister FPR size: {}", OpSize); break; } } } else { @@ -316,10 +264,11 @@ DEF_OP(StoreRegister) { if (Op->Class == IR::GPRClass) { const auto regOffs = Op->Offset & 7; - const auto regId = - Op->Offset == offsetof(Core::CpuStateFrame, State.pf_raw) ? (StaticRegisters.size() - 2) : - Op->Offset == offsetof(Core::CpuStateFrame, State.af_raw) ? (StaticRegisters.size() - 1) : - (Op->Offset - offsetof(Core::CpuStateFrame, State.gregs[0])) / Core::CPUState::GPR_REG_SIZE; + const auto regId = Op->Offset == offsetof(Core::CpuStateFrame, State.pf_raw) ? + (StaticRegisters.size() - 2) : + Op->Offset == offsetof(Core::CpuStateFrame, State.af_raw) ? + (StaticRegisters.size() - 1) : + (Op->Offset - offsetof(Core::CpuStateFrame, State.gregs[0])) / Core::CPUState::GPR_REG_SIZE; LOGMAN_THROW_A_FMT(regId < StaticRegisters.size(), "out of range regId"); @@ -327,26 +276,23 @@ DEF_OP(StoreRegister) { const auto Src = GetReg(Op->Value.ID()); switch (OpSize) { - case 4: - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs"); - if (Src.Idx() != reg.Idx()) { - mov(ARMEmitter::Size::i32Bit, reg, Src); - } - break; - case 8: - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs"); - if (Src.Idx() != reg.Idx()) { - mov(ARMEmitter::Size::i64Bit, reg, Src); - } - break; + case 4: + LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs"); + if (Src.Idx() != reg.Idx()) { + mov(ARMEmitter::Size::i32Bit, reg, Src); + } + break; + case 8: + LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs"); + if (Src.Idx() != reg.Idx()) { + mov(ARMEmitter::Size::i64Bit, reg, Src); + } + break; - default: - LOGMAN_MSG_A_FMT("Unhandled StoreRegister GPR size: {}", OpSize); - break; + default: LOGMAN_MSG_A_FMT("Unhandled StoreRegister GPR size: {}", OpSize); break; } } else if (Op->Class == IR::FPRClass) { - const auto regSize = HostSupportsSVE256 ? Core::CPUState::XMM_AVX_REG_SIZE - : Core::CPUState::XMM_SSE_REG_SIZE; + const auto regSize = HostSupportsSVE256 ? Core::CPUState::XMM_AVX_REG_SIZE : Core::CPUState::XMM_SSE_REG_SIZE; const auto regId = (Op->Offset - offsetof(Core::CpuStateFrame, State.xmm.avx.data[0][0])) / regSize; LOGMAN_THROW_A_FMT(regId < StaticFPRegisters.size(), "regId out of range"); @@ -385,107 +331,101 @@ DEF_OP(StoreRegister) { }; switch (OpSize) { - case 1: { - LOGMAN_THROW_AA_FMT(regOffs <= 31, "unexpected reg index: {}", regOffs); + case 1: { + LOGMAN_THROW_AA_FMT(regOffs <= 31, "unexpected reg index: {}", regOffs); - const auto Predicate = LoadPredicate(); - dup(ARMEmitter::SubRegSize::i8Bit, VTMP1.Z(), host.Z(), 0); - mov(ARMEmitter::SubRegSize::i8Bit, guest.Z(), Predicate, VTMP1.Z()); + const auto Predicate = LoadPredicate(); + dup(ARMEmitter::SubRegSize::i8Bit, VTMP1.Z(), host.Z(), 0); + mov(ARMEmitter::SubRegSize::i8Bit, guest.Z(), Predicate, VTMP1.Z()); - EmitData(1U << regOffs); - break; - } + EmitData(1U << regOffs); + break; + } - case 2: { - LOGMAN_THROW_AA_FMT((regOffs / 2) <= 15, "unexpected reg index: {}", regOffs / 2); + case 2: { + LOGMAN_THROW_AA_FMT((regOffs / 2) <= 15, "unexpected reg index: {}", regOffs / 2); - const auto Predicate = LoadPredicate(); - dup(ARMEmitter::SubRegSize::i16Bit, VTMP1.Z(), host.Z(), 0); - mov(ARMEmitter::SubRegSize::i16Bit, guest.Z(), Predicate, VTMP1.Z()); + const auto Predicate = LoadPredicate(); + dup(ARMEmitter::SubRegSize::i16Bit, VTMP1.Z(), host.Z(), 0); + mov(ARMEmitter::SubRegSize::i16Bit, guest.Z(), Predicate, VTMP1.Z()); - EmitData(1U << regOffs); - break; - } + EmitData(1U << regOffs); + break; + } - case 4: { - LOGMAN_THROW_AA_FMT((regOffs / 4) <= 7, "unexpected reg index: {}", regOffs / 4); + case 4: { + LOGMAN_THROW_AA_FMT((regOffs / 4) <= 7, "unexpected reg index: {}", regOffs / 4); - const auto Predicate = LoadPredicate(); + const auto Predicate = LoadPredicate(); - dup(ARMEmitter::SubRegSize::i32Bit, VTMP1.Z(), host.Z(), 0); - mov(ARMEmitter::SubRegSize::i32Bit, guest.Z(), Predicate, VTMP1.Z()); + dup(ARMEmitter::SubRegSize::i32Bit, VTMP1.Z(), host.Z(), 0); + mov(ARMEmitter::SubRegSize::i32Bit, guest.Z(), Predicate, VTMP1.Z()); - EmitData(1U << regOffs); - break; - } + EmitData(1U << regOffs); + break; + } - case 8: { - LOGMAN_THROW_AA_FMT((regOffs / 8) <= 3, "unexpected reg index: {}", regOffs / 8); + case 8: { + LOGMAN_THROW_AA_FMT((regOffs / 8) <= 3, "unexpected reg index: {}", regOffs / 8); - const auto Predicate = LoadPredicate(); + const auto Predicate = LoadPredicate(); - dup(ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), host.Z(), 0); - mov(ARMEmitter::SubRegSize::i64Bit, guest.Z(), Predicate, VTMP1.Z()); + dup(ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), host.Z(), 0); + mov(ARMEmitter::SubRegSize::i64Bit, guest.Z(), Predicate, VTMP1.Z()); - EmitData(1U << regOffs); - break; - } + EmitData(1U << regOffs); + break; + } - case 16: { - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); - if (guest.Idx() != host.Idx()) { - mov(guest.Q(), host.Q()); - } - break; + case 16: { + LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); + if (guest.Idx() != host.Idx()) { + mov(guest.Q(), host.Q()); } + break; + } - case 32: { - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); - if (guest.Idx() != host.Idx()) { - mov(ARMEmitter::SubRegSize::i64Bit, guest.Z(), PRED_TMP_32B.Merging(), host.Z()); - } - break; + case 32: { + LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); + if (guest.Idx() != host.Idx()) { + mov(ARMEmitter::SubRegSize::i64Bit, guest.Z(), PRED_TMP_32B.Merging(), host.Z()); } + break; + } - default: - LOGMAN_MSG_A_FMT("Unhandled StoreRegister FPR size: {}", OpSize); - break; + default: LOGMAN_MSG_A_FMT("Unhandled StoreRegister FPR size: {}", OpSize); break; } } else { const auto regOffs = Op->Offset & 15; switch (OpSize) { - case 1: - ins(ARMEmitter::SubRegSize::i8Bit, guest, regOffs, host, 0); - break; - - case 2: - LOGMAN_THROW_AA_FMT((regOffs & 1) == 0, "unexpected regOffs: {}", regOffs); - ins(ARMEmitter::SubRegSize::i16Bit, guest, regOffs / 2, host, 0); - break; - - case 4: - LOGMAN_THROW_AA_FMT((regOffs & 3) == 0, "unexpected regOffs: {}", regOffs); - // XXX: This had a bug with insert of size 16bit - ins(ARMEmitter::SubRegSize::i32Bit, guest, regOffs / 4, host, 0); - break; - - case 8: - LOGMAN_THROW_AA_FMT((regOffs & 7) == 0, "unexpected regOffs: {}", regOffs); - // XXX: This had a bug with insert of size 16bit - ins(ARMEmitter::SubRegSize::i64Bit, guest, regOffs / 8, host, 0); - break; - - case 16: - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); - if (guest.Idx() != host.Idx()) { - mov(guest.Q(), host.Q()); - } - break; + case 1: ins(ARMEmitter::SubRegSize::i8Bit, guest, regOffs, host, 0); break; - default: - LOGMAN_MSG_A_FMT("Unhandled StoreRegister FPR size: {}", OpSize); - break; + case 2: + LOGMAN_THROW_AA_FMT((regOffs & 1) == 0, "unexpected regOffs: {}", regOffs); + ins(ARMEmitter::SubRegSize::i16Bit, guest, regOffs / 2, host, 0); + break; + + case 4: + LOGMAN_THROW_AA_FMT((regOffs & 3) == 0, "unexpected regOffs: {}", regOffs); + // XXX: This had a bug with insert of size 16bit + ins(ARMEmitter::SubRegSize::i32Bit, guest, regOffs / 4, host, 0); + break; + + case 8: + LOGMAN_THROW_AA_FMT((regOffs & 7) == 0, "unexpected regOffs: {}", regOffs); + // XXX: This had a bug with insert of size 16bit + ins(ARMEmitter::SubRegSize::i64Bit, guest, regOffs / 8, host, 0); + break; + + case 16: + LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); + if (guest.Idx() != host.Idx()) { + mov(guest.Q(), host.Q()); + } + break; + + default: LOGMAN_MSG_A_FMT("Unhandled StoreRegister FPR size: {}", OpSize); break; } } } else { @@ -508,33 +448,18 @@ DEF_OP(LoadContextIndexed) { add(ARMEmitter::Size::i64Bit, TMP1, STATE, Index, FEXCore::ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Op->Stride)); const auto Dst = GetReg(Node); switch (OpSize) { - case 1: - ldrb(Dst, TMP1, Op->BaseOffset); - break; - case 2: - ldrh(Dst, TMP1, Op->BaseOffset); - break; - case 4: - ldr(Dst.W(), TMP1, Op->BaseOffset); - break; - case 8: - ldr(Dst.X(), TMP1, Op->BaseOffset); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed size: {}", OpSize); - break; + case 1: ldrb(Dst, TMP1, Op->BaseOffset); break; + case 2: ldrh(Dst, TMP1, Op->BaseOffset); break; + case 4: ldr(Dst.W(), TMP1, Op->BaseOffset); break; + case 8: ldr(Dst.X(), TMP1, Op->BaseOffset); break; + default: LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed size: {}", OpSize); break; } break; } - case 16: - LOGMAN_MSG_A_FMT("Invalid Class load of size 16"); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed stride: {}", Op->Stride); - break; + case 16: LOGMAN_MSG_A_FMT("Invalid Class load of size 16"); break; + default: LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed stride: {}", Op->Stride); break; } - } - else { + } else { switch (Op->Stride) { case 1: case 2: @@ -546,18 +471,10 @@ DEF_OP(LoadContextIndexed) { const auto Dst = GetVReg(Node); switch (OpSize) { - case 1: - ldrb(Dst, TMP1, Op->BaseOffset); - break; - case 2: - ldrh(Dst, TMP1, Op->BaseOffset); - break; - case 4: - ldr(Dst.S(), TMP1, Op->BaseOffset); - break; - case 8: - ldr(Dst.D(), TMP1, Op->BaseOffset); - break; + case 1: ldrb(Dst, TMP1, Op->BaseOffset); break; + case 2: ldrh(Dst, TMP1, Op->BaseOffset); break; + case 4: ldr(Dst.S(), TMP1, Op->BaseOffset); break; + case 8: ldr(Dst.D(), TMP1, Op->BaseOffset); break; case 16: if (Op->BaseOffset % 16 == 0) { ldr(Dst.Q(), TMP1, Op->BaseOffset); @@ -570,15 +487,11 @@ DEF_OP(LoadContextIndexed) { mov(TMP2, Op->BaseOffset); ld1b(Dst.Z(), PRED_TMP_32B.Zeroing(), TMP1, TMP2); break; - default: - LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed size: {}", OpSize); - break; + default: LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed size: {}", OpSize); break; } break; } - default: - LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed stride: {}", Op->Stride); - break; + default: LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed stride: {}", Op->Stride); break; } } } @@ -600,33 +513,18 @@ DEF_OP(StoreContextIndexed) { add(ARMEmitter::Size::i64Bit, TMP1, STATE, Index, FEXCore::ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Op->Stride)); switch (OpSize) { - case 1: - strb(Value, TMP1, Op->BaseOffset); - break; - case 2: - strh(Value, TMP1, Op->BaseOffset); - break; - case 4: - str(Value.W(), TMP1, Op->BaseOffset); - break; - case 8: - str(Value.X(), TMP1, Op->BaseOffset); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled StoreContextIndexed size: {}", OpSize); - break; + case 1: strb(Value, TMP1, Op->BaseOffset); break; + case 2: strh(Value, TMP1, Op->BaseOffset); break; + case 4: str(Value.W(), TMP1, Op->BaseOffset); break; + case 8: str(Value.X(), TMP1, Op->BaseOffset); break; + default: LOGMAN_MSG_A_FMT("Unhandled StoreContextIndexed size: {}", OpSize); break; } break; } - case 16: - LOGMAN_MSG_A_FMT("Invalid Class store of size 16"); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled StoreContextIndexed stride: {}", Op->Stride); - break; + case 16: LOGMAN_MSG_A_FMT("Invalid Class store of size 16"); break; + default: LOGMAN_MSG_A_FMT("Unhandled StoreContextIndexed stride: {}", Op->Stride); break; } - } - else { + } else { const auto Value = GetVReg(Op->Value.ID()); switch (Op->Stride) { @@ -639,18 +537,10 @@ DEF_OP(StoreContextIndexed) { add(ARMEmitter::Size::i64Bit, TMP1, STATE, Index, FEXCore::ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Op->Stride)); switch (OpSize) { - case 1: - strb(Value, TMP1, Op->BaseOffset); - break; - case 2: - strh(Value, TMP1, Op->BaseOffset); - break; - case 4: - str(Value.S(), TMP1, Op->BaseOffset); - break; - case 8: - str(Value.D(), TMP1, Op->BaseOffset); - break; + case 1: strb(Value, TMP1, Op->BaseOffset); break; + case 2: strh(Value, TMP1, Op->BaseOffset); break; + case 4: str(Value.S(), TMP1, Op->BaseOffset); break; + case 8: str(Value.D(), TMP1, Op->BaseOffset); break; case 16: if (Op->BaseOffset % 16 == 0) { str(Value.Q(), TMP1, Op->BaseOffset); @@ -663,15 +553,11 @@ DEF_OP(StoreContextIndexed) { mov(TMP2, Op->BaseOffset); st1b(Value.Z(), PRED_TMP_32B, TMP1, TMP2); break; - default: - LOGMAN_MSG_A_FMT("Unhandled StoreContextIndexed size: {}", OpSize); - break; + default: LOGMAN_MSG_A_FMT("Unhandled StoreContextIndexed size: {}", OpSize); break; } break; } - default: - LOGMAN_MSG_A_FMT("Unhandled StoreContextIndexed stride: {}", Op->Stride); - break; + default: LOGMAN_MSG_A_FMT("Unhandled StoreContextIndexed stride: {}", Op->Stride); break; } } } @@ -688,8 +574,7 @@ DEF_OP(SpillRegister) { if (SlotOffset > LSByteMaxUnsignedOffset) { LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); strb(Src, ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); - } - else { + } else { strb(Src, ARMEmitter::Reg::rsp, SlotOffset); } break; @@ -698,8 +583,7 @@ DEF_OP(SpillRegister) { if (SlotOffset > LSHalfMaxUnsignedOffset) { LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); strh(Src, ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); - } - else { + } else { strh(Src, ARMEmitter::Reg::rsp, SlotOffset); } break; @@ -708,8 +592,7 @@ DEF_OP(SpillRegister) { if (SlotOffset > LSWordMaxUnsignedOffset) { LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); str(Src.W(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); - } - else { + } else { str(Src.W(), ARMEmitter::Reg::rsp, SlotOffset); } break; @@ -718,15 +601,12 @@ DEF_OP(SpillRegister) { if (SlotOffset > LSDWordMaxUnsignedOffset) { LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); str(Src.X(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); - } - else { + } else { str(Src.X(), ARMEmitter::Reg::rsp, SlotOffset); } break; } - default: - LOGMAN_MSG_A_FMT("Unhandled SpillRegister size: {}", OpSize); - break; + default: LOGMAN_MSG_A_FMT("Unhandled SpillRegister size: {}", OpSize); break; } } else if (Op->Class == FEXCore::IR::FPRClass) { const auto Src = GetVReg(Op->Value.ID()); @@ -736,8 +616,7 @@ DEF_OP(SpillRegister) { if (SlotOffset > LSWordMaxUnsignedOffset) { LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); str(Src.S(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); - } - else { + } else { str(Src.S(), ARMEmitter::Reg::rsp, SlotOffset); } break; @@ -746,8 +625,7 @@ DEF_OP(SpillRegister) { if (SlotOffset > LSDWordMaxUnsignedOffset) { LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); str(Src.D(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); - } - else { + } else { str(Src.D(), ARMEmitter::Reg::rsp, SlotOffset); } break; @@ -756,8 +634,7 @@ DEF_OP(SpillRegister) { if (SlotOffset > LSQWordMaxUnsignedOffset) { LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); str(Src.Q(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); - } - else { + } else { str(Src.Q(), ARMEmitter::Reg::rsp, SlotOffset); } break; @@ -767,9 +644,7 @@ DEF_OP(SpillRegister) { st1b(Src.Z(), PRED_TMP_32B, ARMEmitter::Reg::rsp, TMP3); break; } - default: - LOGMAN_MSG_A_FMT("Unhandled SpillRegister size: {}", OpSize); - break; + default: LOGMAN_MSG_A_FMT("Unhandled SpillRegister size: {}", OpSize); break; } } else { LOGMAN_MSG_A_FMT("Unhandled SpillRegister class: {}", Op->Class.Val); @@ -788,8 +663,7 @@ DEF_OP(FillRegister) { if (SlotOffset > LSByteMaxUnsignedOffset) { LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); ldrb(Dst, ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); - } - else { + } else { ldrb(Dst, ARMEmitter::Reg::rsp, SlotOffset); } break; @@ -798,8 +672,7 @@ DEF_OP(FillRegister) { if (SlotOffset > LSHalfMaxUnsignedOffset) { LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); ldrh(Dst, ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); - } - else { + } else { ldrh(Dst, ARMEmitter::Reg::rsp, SlotOffset); } break; @@ -808,8 +681,7 @@ DEF_OP(FillRegister) { if (SlotOffset > LSWordMaxUnsignedOffset) { LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); ldr(Dst.W(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); - } - else { + } else { ldr(Dst.W(), ARMEmitter::Reg::rsp, SlotOffset); } break; @@ -818,15 +690,12 @@ DEF_OP(FillRegister) { if (SlotOffset > LSDWordMaxUnsignedOffset) { LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); ldr(Dst.X(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); - } - else { + } else { ldr(Dst.X(), ARMEmitter::Reg::rsp, SlotOffset); } break; } - default: - LOGMAN_MSG_A_FMT("Unhandled FillRegister size: {}", OpSize); - break; + default: LOGMAN_MSG_A_FMT("Unhandled FillRegister size: {}", OpSize); break; } } else if (Op->Class == FEXCore::IR::FPRClass) { const auto Dst = GetVReg(Node); @@ -836,8 +705,7 @@ DEF_OP(FillRegister) { if (SlotOffset > LSWordMaxUnsignedOffset) { LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); ldr(Dst.S(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); - } - else { + } else { ldr(Dst.S(), ARMEmitter::Reg::rsp, SlotOffset); } break; @@ -846,8 +714,7 @@ DEF_OP(FillRegister) { if (SlotOffset > LSDWordMaxUnsignedOffset) { LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); ldr(Dst.D(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); - } - else { + } else { ldr(Dst.D(), ARMEmitter::Reg::rsp, SlotOffset); } break; @@ -856,8 +723,7 @@ DEF_OP(FillRegister) { if (SlotOffset > LSQWordMaxUnsignedOffset) { LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset); ldr(Dst.Q(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0); - } - else { + } else { ldr(Dst.Q(), ARMEmitter::Reg::rsp, SlotOffset); } break; @@ -867,9 +733,7 @@ DEF_OP(FillRegister) { ld1b(Dst.Z(), PRED_TMP_32B.Zeroing(), ARMEmitter::Reg::rsp, TMP3); break; } - default: - LOGMAN_MSG_A_FMT("Unhandled FillRegister size: {}", OpSize); - break; + default: LOGMAN_MSG_A_FMT("Unhandled FillRegister size: {}", OpSize); break; } } else { LOGMAN_MSG_A_FMT("Unhandled FillRegister class: {}", Op->Class.Val); @@ -892,9 +756,8 @@ DEF_OP(LoadFlag) { auto Op = IROp->C(); auto Dst = GetReg(Node); - LOGMAN_THROW_A_FMT(Op->Flag != X86State::RFLAG_PF_RAW_LOC && - Op->Flag != X86State::RFLAG_AF_RAW_LOC, - "PF/AF must be accessed as registers"); + LOGMAN_THROW_A_FMT(Op->Flag != X86State::RFLAG_PF_RAW_LOC && Op->Flag != X86State::RFLAG_AF_RAW_LOC, "PF/AF must be accessed as " + "registers"); ldrb(Dst, STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag); } @@ -902,18 +765,14 @@ DEF_OP(LoadFlag) { DEF_OP(StoreFlag) { auto Op = IROp->C(); - LOGMAN_THROW_A_FMT(Op->Flag != X86State::RFLAG_PF_RAW_LOC && - Op->Flag != X86State::RFLAG_AF_RAW_LOC, - "PF/AF must be accessed as registers"); + LOGMAN_THROW_A_FMT(Op->Flag != X86State::RFLAG_PF_RAW_LOC && Op->Flag != X86State::RFLAG_AF_RAW_LOC, "PF/AF must be accessed as " + "registers"); strb(GetReg(Op->Value.ID()), STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag); } -FEXCore::ARMEmitter::ExtendedMemOperand Arm64JITCore::GenerateMemOperand(uint8_t AccessSize, - FEXCore::ARMEmitter::Register Base, - IR::OrderedNodeWrapper Offset, - IR::MemOffsetType OffsetType, - uint8_t OffsetScale) { +FEXCore::ARMEmitter::ExtendedMemOperand Arm64JITCore::GenerateMemOperand( + uint8_t AccessSize, FEXCore::ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset, IR::MemOffsetType OffsetType, uint8_t OffsetScale) { if (Offset.IsInvalid()) { return ARMEmitter::ExtendedMemOperand(Base.X(), ARMEmitter::IndexType::OFFSET, 0); } else { @@ -925,11 +784,14 @@ FEXCore::ARMEmitter::ExtendedMemOperand Arm64JITCore::GenerateMemOperand(uint8_t return ARMEmitter::ExtendedMemOperand(Base.X(), ARMEmitter::IndexType::OFFSET, Const); } else { auto RegOffset = GetReg(Offset.ID()); - switch(OffsetType.Val) { - case IR::MEM_OFFSET_SXTX.Val: return ARMEmitter::ExtendedMemOperand(Base.X(), RegOffset.X(), ARMEmitter::ExtendedType::SXTX, FEXCore::ilog2(OffsetScale) ); - case IR::MEM_OFFSET_UXTW.Val: return ARMEmitter::ExtendedMemOperand(Base.X(), RegOffset.X(), ARMEmitter::ExtendedType::UXTW, FEXCore::ilog2(OffsetScale) ); - case IR::MEM_OFFSET_SXTW.Val: return ARMEmitter::ExtendedMemOperand(Base.X(), RegOffset.X(), ARMEmitter::ExtendedType::SXTW, FEXCore::ilog2(OffsetScale) ); - default: LOGMAN_MSG_A_FMT("Unhandled GenerateMemOperand OffsetType: {}", OffsetType.Val); break; + switch (OffsetType.Val) { + case IR::MEM_OFFSET_SXTX.Val: + return ARMEmitter::ExtendedMemOperand(Base.X(), RegOffset.X(), ARMEmitter::ExtendedType::SXTX, FEXCore::ilog2(OffsetScale)); + case IR::MEM_OFFSET_UXTW.Val: + return ARMEmitter::ExtendedMemOperand(Base.X(), RegOffset.X(), ARMEmitter::ExtendedType::UXTW, FEXCore::ilog2(OffsetScale)); + case IR::MEM_OFFSET_SXTW.Val: + return ARMEmitter::ExtendedMemOperand(Base.X(), RegOffset.X(), ARMEmitter::ExtendedType::SXTW, FEXCore::ilog2(OffsetScale)); + default: LOGMAN_MSG_A_FMT("Unhandled GenerateMemOperand OffsetType: {}", OffsetType.Val); break; } } } @@ -937,16 +799,14 @@ FEXCore::ARMEmitter::ExtendedMemOperand Arm64JITCore::GenerateMemOperand(uint8_t FEX_UNREACHABLE; } -FEXCore::ARMEmitter::SVEMemOperand Arm64JITCore::GenerateSVEMemOperand(uint8_t AccessSize, - FEXCore::ARMEmitter::Register Base, - IR::OrderedNodeWrapper Offset, - IR::MemOffsetType OffsetType, - [[maybe_unused]] uint8_t OffsetScale) { +FEXCore::ARMEmitter::SVEMemOperand Arm64JITCore::GenerateSVEMemOperand(uint8_t AccessSize, FEXCore::ARMEmitter::Register Base, + IR::OrderedNodeWrapper Offset, IR::MemOffsetType OffsetType, + [[maybe_unused]] uint8_t OffsetScale) { if (Offset.IsInvalid()) { return FEXCore::ARMEmitter::SVEMemOperand(Base.X(), 0); } - uint64_t Const{}; + uint64_t Const {}; if (IsInlineConstant(Offset, &Const)) { if (Const == 0) { return FEXCore::ARMEmitter::SVEMemOperand(Base.X(), 0); @@ -985,8 +845,7 @@ FEXCore::ARMEmitter::SVEMemOperand Arm64JITCore::GenerateSVEMemOperand(uint8_t A // Note that we do nothing with the offset type and offset scale, // since SVE loads and stores don't have the ability to perform an // optional extension or shift as part of their behavior. - LOGMAN_THROW_A_FMT(OffsetType.Val == IR::MEM_OFFSET_SXTX.Val, - "Currently only the default offset type (SXTX) is supported."); + LOGMAN_THROW_A_FMT(OffsetType.Val == IR::MEM_OFFSET_SXTX.Val, "Currently only the default offset type (SXTX) is supported."); const auto RegOffset = GetReg(Offset.ID()); return FEXCore::ARMEmitter::SVEMemOperand(Base.X(), RegOffset.X()); @@ -1003,50 +862,27 @@ DEF_OP(LoadMem) { const auto Dst = GetReg(Node); switch (OpSize) { - case 1: - ldrb(Dst, MemSrc); - break; - case 2: - ldrh(Dst, MemSrc); - break; - case 4: - ldr(Dst.W(), MemSrc); - break; - case 8: - ldr(Dst.X(), MemSrc); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled LoadMem size: {}", OpSize); - break; + case 1: ldrb(Dst, MemSrc); break; + case 2: ldrh(Dst, MemSrc); break; + case 4: ldr(Dst.W(), MemSrc); break; + case 8: ldr(Dst.X(), MemSrc); break; + default: LOGMAN_MSG_A_FMT("Unhandled LoadMem size: {}", OpSize); break; } - } - else { + } else { const auto Dst = GetVReg(Node); switch (OpSize) { - case 1: - ldrb(Dst, MemSrc); - break; - case 2: - ldrh(Dst, MemSrc); - break; - case 4: - ldr(Dst.S(), MemSrc); - break; - case 8: - ldr(Dst.D(), MemSrc); - break; - case 16: - ldr(Dst.Q(), MemSrc); - break; - case 32: { - const auto Operand = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); - ld1b(Dst.Z(), PRED_TMP_32B.Zeroing(), Operand); - break; - } - default: - LOGMAN_MSG_A_FMT("Unhandled LoadMem size: {}", OpSize); - break; + case 1: ldrb(Dst, MemSrc); break; + case 2: ldrh(Dst, MemSrc); break; + case 4: ldr(Dst.S(), MemSrc); break; + case 8: ldr(Dst.D(), MemSrc); break; + case 16: ldr(Dst.Q(), MemSrc); break; + case 32: { + const auto Operand = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); + ld1b(Dst.Z(), PRED_TMP_32B.Zeroing(), Operand); + break; + } + default: LOGMAN_MSG_A_FMT("Unhandled LoadMem size: {}", OpSize); break; } } } @@ -1068,103 +904,61 @@ DEF_OP(LoadMemTSO) { // 8bit load is always aligned to natural alignment const auto Dst = GetReg(Node); ldapurb(Dst, MemReg, Offset); - } - else { + } else { switch (OpSize) { - case 2: - ldapurh(Dst, MemReg, Offset); - break; - case 4: - ldapur(Dst.W(), MemReg, Offset); - break; - case 8: - ldapur(Dst.X(), MemReg, Offset); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled LoadMemTSO size: {}", OpSize); - break; + case 2: ldapurh(Dst, MemReg, Offset); break; + case 4: ldapur(Dst.W(), MemReg, Offset); break; + case 8: ldapur(Dst.X(), MemReg, Offset); break; + default: LOGMAN_MSG_A_FMT("Unhandled LoadMemTSO size: {}", OpSize); break; } // Half-barrier once back-patched. nop(); } - } - else if (CTX->HostFeatures.SupportsRCPC && Op->Class == FEXCore::IR::GPRClass) { + } else if (CTX->HostFeatures.SupportsRCPC && Op->Class == FEXCore::IR::GPRClass) { const auto Dst = GetReg(Node); if (OpSize == 1) { // 8bit load is always aligned to natural alignment ldaprb(Dst.W(), MemReg); - } - else { + } else { switch (OpSize) { - case 2: - ldaprh(Dst.W(), MemReg); - break; - case 4: - ldapr(Dst.W(), MemReg); - break; - case 8: - ldapr(Dst.X(), MemReg); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled LoadMemTSO size: {}", OpSize); - break; + case 2: ldaprh(Dst.W(), MemReg); break; + case 4: ldapr(Dst.W(), MemReg); break; + case 8: ldapr(Dst.X(), MemReg); break; + default: LOGMAN_MSG_A_FMT("Unhandled LoadMemTSO size: {}", OpSize); break; } // Half-barrier once back-patched. nop(); } - } - else if (Op->Class == FEXCore::IR::GPRClass) { + } else if (Op->Class == FEXCore::IR::GPRClass) { const auto Dst = GetReg(Node); if (OpSize == 1) { // 8bit load is always aligned to natural alignment ldarb(Dst, MemReg); - } - else { + } else { switch (OpSize) { - case 2: - ldarh(Dst, MemReg); - break; - case 4: - ldar(Dst.W(), MemReg); - break; - case 8: - ldar(Dst.X(), MemReg); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled LoadMemTSO size: {}", OpSize); - break; + case 2: ldarh(Dst, MemReg); break; + case 4: ldar(Dst.W(), MemReg); break; + case 8: ldar(Dst.X(), MemReg); break; + default: LOGMAN_MSG_A_FMT("Unhandled LoadMemTSO size: {}", OpSize); break; } // Half-barrier once back-patched. nop(); } - } - else { + } else { const auto Dst = GetVReg(Node); const auto MemSrc = GenerateMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); switch (OpSize) { - case 1: - ldrb(Dst, MemSrc); - break; - case 2: - ldrh(Dst, MemSrc); - break; - case 4: - ldr(Dst.S(), MemSrc); - break; - case 8: - ldr(Dst.D(), MemSrc); - break; - case 16: - ldr(Dst.Q(), MemSrc); - break; - case 32: { - const auto MemSrc = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); - ld1b(Dst.Z(), PRED_TMP_32B.Zeroing(), MemSrc); - break; - } - default: - LOGMAN_MSG_A_FMT("Unhandled LoadMemTSO size: {}", OpSize); - break; + case 1: ldrb(Dst, MemSrc); break; + case 2: ldrh(Dst, MemSrc); break; + case 4: ldr(Dst.S(), MemSrc); break; + case 8: ldr(Dst.D(), MemSrc); break; + case 16: ldr(Dst.Q(), MemSrc); break; + case 32: { + const auto MemSrc = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); + ld1b(Dst.Z(), PRED_TMP_32B.Zeroing(), MemSrc); + break; + } + default: LOGMAN_MSG_A_FMT("Unhandled LoadMemTSO size: {}", OpSize); break; } // Half-barrier. dmb(FEXCore::ARMEmitter::BarrierScope::ISHLD); @@ -1189,35 +983,33 @@ DEF_OP(VLoadVectorMasked) { const auto MemSrc = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; // Check if the sign bit is set for the given element size. cmplt(SubRegSize, CMPPredicate, GoverningPredicate.Zeroing(), MaskReg.Z(), 0); switch (ElementSize) { - case 1: { - ld1b(Dst.Z(), CMPPredicate.Zeroing(), MemSrc); - break; - } - case 2: { - ld1h(Dst.Z(), CMPPredicate.Zeroing(), MemSrc); - break; - } - case 4: { - ld1w(Dst.Z(), CMPPredicate.Zeroing(), MemSrc); - break; - } - case 8: { - ld1d(Dst.Z(), CMPPredicate.Zeroing(), MemSrc); - break; - } - default: - LOGMAN_MSG_A_FMT("Unhandled VLoadVectorMasked size: {}", ElementSize); - break; + case 1: { + ld1b(Dst.Z(), CMPPredicate.Zeroing(), MemSrc); + break; + } + case 2: { + ld1h(Dst.Z(), CMPPredicate.Zeroing(), MemSrc); + break; + } + case 4: { + ld1w(Dst.Z(), CMPPredicate.Zeroing(), MemSrc); + break; + } + case 8: { + ld1d(Dst.Z(), CMPPredicate.Zeroing(), MemSrc); + break; + } + default: LOGMAN_MSG_A_FMT("Unhandled VLoadVectorMasked size: {}", ElementSize); break; } } @@ -1239,35 +1031,33 @@ DEF_OP(VStoreVectorMasked) { const auto MemDst = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; // Check if the sign bit is set for the given element size. cmplt(SubRegSize, CMPPredicate, GoverningPredicate.Zeroing(), MaskReg.Z(), 0); switch (ElementSize) { - case 1: { - st1b(RegData.Z(), CMPPredicate.Zeroing(), MemDst); - break; - } - case 2: { - st1h(RegData.Z(), CMPPredicate.Zeroing(), MemDst); - break; - } - case 4: { - st1w(RegData.Z(), CMPPredicate.Zeroing(), MemDst); - break; - } - case 8: { - st1d(RegData.Z(), CMPPredicate.Zeroing(), MemDst); - break; - } - default: - LOGMAN_MSG_A_FMT("Unhandled VStoreVectorMasked size: {}", ElementSize); - break; + case 1: { + st1b(RegData.Z(), CMPPredicate.Zeroing(), MemDst); + break; + } + case 2: { + st1h(RegData.Z(), CMPPredicate.Zeroing(), MemDst); + break; + } + case 4: { + st1w(RegData.Z(), CMPPredicate.Zeroing(), MemDst); + break; + } + case 8: { + st1d(RegData.Z(), CMPPredicate.Zeroing(), MemDst); + break; + } + default: LOGMAN_MSG_A_FMT("Unhandled VStoreVectorMasked size: {}", ElementSize); break; } } @@ -1282,9 +1072,8 @@ DEF_OP(VLoadVectorElement) { const auto DstSrc = GetVReg(Op->DstSrc.ID()); const auto MemReg = GetReg(Op->Addr.ID()); - LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || - ElementSize == 4 || ElementSize == 8 || - ElementSize == 16, "Invalid element size"); + LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid element " + "size"); if (Is256Bit) { LOGMAN_MSG_A_FMT("Unsupported 256-bit VLoadVectorElement"); @@ -1293,24 +1082,12 @@ DEF_OP(VLoadVectorElement) { mov(Dst.Q(), DstSrc.Q()); } switch (ElementSize) { - case 1: - ld1(Dst.Q(), Op->Index, MemReg); - break; - case 2: - ld1(Dst.Q(), Op->Index, MemReg); - break; - case 4: - ld1(Dst.Q(), Op->Index, MemReg); - break; - case 8: - ld1(Dst.Q(), Op->Index, MemReg); - break; - case 16: - ldr(Dst.Q(), MemReg); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, ElementSize); - return; + case 1: ld1(Dst.Q(), Op->Index, MemReg); break; + case 2: ld1(Dst.Q(), Op->Index, MemReg); break; + case 4: ld1(Dst.Q(), Op->Index, MemReg); break; + case 8: ld1(Dst.Q(), Op->Index, MemReg); break; + case 16: ldr(Dst.Q(), MemReg); break; + default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, ElementSize); return; } } @@ -1330,9 +1107,8 @@ DEF_OP(VStoreVectorElement) { const auto Value = GetVReg(Op->Value.ID()); const auto MemReg = GetReg(Op->Addr.ID()); - LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || - ElementSize == 4 || ElementSize == 8 || - ElementSize == 16, "Invalid element size"); + LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid element " + "size"); // Emit a half-barrier if TSO is enabled. if (CTX->IsAtomicTSOEnabled()) { @@ -1343,24 +1119,12 @@ DEF_OP(VStoreVectorElement) { LOGMAN_MSG_A_FMT("Unsupported 256-bit {}", __func__); } else { switch (ElementSize) { - case 1: - st1(Value.Q(), Op->Index, MemReg); - break; - case 2: - st1(Value.Q(), Op->Index, MemReg); - break; - case 4: - st1(Value.Q(), Op->Index, MemReg); - break; - case 8: - st1(Value.Q(), Op->Index, MemReg); - break; - case 16: - str(Value.Q(), MemReg); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, ElementSize); - return; + case 1: st1(Value.Q(), Op->Index, MemReg); break; + case 2: st1(Value.Q(), Op->Index, MemReg); break; + case 4: st1(Value.Q(), Op->Index, MemReg); break; + case 8: st1(Value.Q(), Op->Index, MemReg); break; + case 16: str(Value.Q(), MemReg); break; + default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, ElementSize); return; } } } @@ -1375,9 +1139,8 @@ DEF_OP(VBroadcastFromMem) { const auto Dst = GetVReg(Node); const auto MemReg = GetReg(Op->Address.ID()); - LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || - ElementSize == 4 || ElementSize == 8 || - ElementSize == 16, "Invalid element size"); + LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid element " + "size"); if (Is256Bit && !HostSupportsSVE256) { LOGMAN_MSG_A_FMT("{}: 256-bit vectors must support SVE256", __func__); @@ -1388,49 +1151,24 @@ DEF_OP(VBroadcastFromMem) { const auto GoverningPredicate = PRED_TMP_32B.Zeroing(); switch (ElementSize) { - case 1: - ld1rb(ARMEmitter::SubRegSize::i8Bit, Dst.Z(), - GoverningPredicate, MemReg); - break; - case 2: - ld1rh(ARMEmitter::SubRegSize::i16Bit, Dst.Z(), - GoverningPredicate, MemReg); - break; - case 4: - ld1rw(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), - GoverningPredicate, MemReg); - break; - case 8: - ld1rd(Dst.Z(), GoverningPredicate, MemReg); - break; - case 16: - ld1rqb(Dst.Z(), GoverningPredicate, MemReg); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled VBroadcastFromMem size: {}", ElementSize); - return; + case 1: ld1rb(ARMEmitter::SubRegSize::i8Bit, Dst.Z(), GoverningPredicate, MemReg); break; + case 2: ld1rh(ARMEmitter::SubRegSize::i16Bit, Dst.Z(), GoverningPredicate, MemReg); break; + case 4: ld1rw(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), GoverningPredicate, MemReg); break; + case 8: ld1rd(Dst.Z(), GoverningPredicate, MemReg); break; + case 16: ld1rqb(Dst.Z(), GoverningPredicate, MemReg); break; + default: LOGMAN_MSG_A_FMT("Unhandled VBroadcastFromMem size: {}", ElementSize); return; } } else { switch (ElementSize) { - case 1: - ld1r(Dst.Q(), MemReg); - break; - case 2: - ld1r(Dst.Q(), MemReg); - break; - case 4: - ld1r(Dst.Q(), MemReg); - break; - case 8: - ld1r(Dst.Q(), MemReg); - break; + case 1: ld1r(Dst.Q(), MemReg); break; + case 2: ld1r(Dst.Q(), MemReg); break; + case 4: ld1r(Dst.Q(), MemReg); break; + case 8: ld1r(Dst.Q(), MemReg); break; case 16: // Normal load, like ld1rqb with 128-bit regs. ldr(Dst.Q(), MemReg); break; - default: - LOGMAN_MSG_A_FMT("Unhandled VBroadcastFromMem size: {}", ElementSize); - return; + default: LOGMAN_MSG_A_FMT("Unhandled VBroadcastFromMem size: {}", ElementSize); return; } } @@ -1452,8 +1190,7 @@ DEF_OP(Push) { if (Dst == Src) { NeedsMoveAfterwards = true; // Need to be careful here, incoming source might be reused afterwards. - } - else { + } else { // RA constraints would let this always be true. mov(IROp->Size == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit, Dst, AddrSrc); } @@ -1477,52 +1214,51 @@ DEF_OP(Push) { if (NeedsMoveAfterwards) { switch (ValueSize) { - case 1: { - sturb(Src.W(), AddrSrc, -ValueSize); - break; - } - case 2: { - sturh(Src.W(), AddrSrc, -ValueSize); - break; - } - case 4: { - stur(Src.W(), AddrSrc, -ValueSize); - break; - } - case 8: { - stur(Src.X(), AddrSrc, -ValueSize); - break; - } - default: { - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, ValueSize); - break; - } + case 1: { + sturb(Src.W(), AddrSrc, -ValueSize); + break; + } + case 2: { + sturh(Src.W(), AddrSrc, -ValueSize); + break; + } + case 4: { + stur(Src.W(), AddrSrc, -ValueSize); + break; + } + case 8: { + stur(Src.X(), AddrSrc, -ValueSize); + break; + } + default: { + LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, ValueSize); + break; + } } sub(IROp->Size == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit, Dst, AddrSrc, ValueSize); - } - else { + } else { switch (ValueSize) { - case 1: { - strb(Src.W(), Dst, -ValueSize); - break; - } - case 2: { - strh(Src.W(), Dst, -ValueSize); - break; - } - case 4: { - str(Src.W(), Dst, -ValueSize); - break; - } - case 8: { - str(Src.X(), Dst, -ValueSize); - break; - } - default: { - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, ValueSize); - break; - } + case 1: { + strb(Src.W(), Dst, -ValueSize); + break; + } + case 2: { + strh(Src.W(), Dst, -ValueSize); + break; + } + case 4: { + str(Src.W(), Dst, -ValueSize); + break; + } + case 8: { + str(Src.X(), Dst, -ValueSize); + break; + } + default: { + LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, ValueSize); + break; + } } } } @@ -1537,55 +1273,42 @@ DEF_OP(StoreMem) { if (Op->Class == FEXCore::IR::GPRClass) { const auto Src = GetReg(Op->Value.ID()); switch (OpSize) { - case 1: - strb(Src, MemSrc); - break; - case 2: - strh(Src, MemSrc); - break; - case 4: - str(Src.W(), MemSrc); - break; - case 8: - str(Src.X(), MemSrc); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled StoreMem size: {}", OpSize); - break; + case 1: strb(Src, MemSrc); break; + case 2: strh(Src, MemSrc); break; + case 4: str(Src.W(), MemSrc); break; + case 8: str(Src.X(), MemSrc); break; + default: LOGMAN_MSG_A_FMT("Unhandled StoreMem size: {}", OpSize); break; } - } - else { + } else { const auto Src = GetVReg(Op->Value.ID()); switch (OpSize) { - case 1: { - strb(Src, MemSrc); - break; - } - case 2: { - strh(Src, MemSrc); - break; - } - case 4: { - str(Src.S(), MemSrc); - break; - } - case 8: { - str(Src.D(), MemSrc); - break; - } - case 16: { - str(Src.Q(), MemSrc); - break; - } - case 32: { - const auto MemSrc = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); - st1b(Src.Z(), PRED_TMP_32B, MemSrc); - break; - } - default: - LOGMAN_MSG_A_FMT("Unhandled StoreMem size: {}", OpSize); - break; + case 1: { + strb(Src, MemSrc); + break; + } + case 2: { + strh(Src, MemSrc); + break; + } + case 4: { + str(Src.S(), MemSrc); + break; + } + case 8: { + str(Src.D(), MemSrc); + break; + } + case 16: { + str(Src.Q(), MemSrc); + break; + } + case 32: { + const auto MemSrc = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); + st1b(Src.Z(), PRED_TMP_32B, MemSrc); + break; + } + default: LOGMAN_MSG_A_FMT("Unhandled StoreMem size: {}", OpSize); break; } } } @@ -1606,81 +1329,49 @@ DEF_OP(StoreMemTSO) { if (OpSize == 1) { // 8bit load is always aligned to natural alignment stlurb(Src, MemReg, Offset); - } - else { + } else { // Half-barrier once back-patched. nop(); switch (OpSize) { - case 2: - stlurh(Src, MemReg, Offset); - break; - case 4: - stlur(Src.W(), MemReg, Offset); - break; - case 8: - stlur(Src.X(), MemReg, Offset); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled StoreMemTSO size: {}", OpSize); - break; + case 2: stlurh(Src, MemReg, Offset); break; + case 4: stlur(Src.W(), MemReg, Offset); break; + case 8: stlur(Src.X(), MemReg, Offset); break; + default: LOGMAN_MSG_A_FMT("Unhandled StoreMemTSO size: {}", OpSize); break; } } - } - else if (Op->Class == FEXCore::IR::GPRClass) { + } else if (Op->Class == FEXCore::IR::GPRClass) { const auto Src = GetReg(Op->Value.ID()); if (OpSize == 1) { // 8bit load is always aligned to natural alignment stlrb(Src, MemReg); - } - else { + } else { // Half-barrier once back-patched. nop(); switch (OpSize) { - case 2: - stlrh(Src, MemReg); - break; - case 4: - stlr(Src.W(), MemReg); - break; - case 8: - stlr(Src.X(), MemReg); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled StoreMemTSO size: {}", OpSize); - break; + case 2: stlrh(Src, MemReg); break; + case 4: stlr(Src.W(), MemReg); break; + case 8: stlr(Src.X(), MemReg); break; + default: LOGMAN_MSG_A_FMT("Unhandled StoreMemTSO size: {}", OpSize); break; } } - } - else { + } else { // Half-Barrier. dmb(FEXCore::ARMEmitter::BarrierScope::ISH); const auto Src = GetVReg(Op->Value.ID()); const auto MemSrc = GenerateMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); switch (OpSize) { - case 1: - strb(Src, MemSrc); - break; - case 2: - strh(Src, MemSrc); - break; - case 4: - str(Src.S(), MemSrc); - break; - case 8: - str(Src.D(), MemSrc); - break; - case 16: - str(Src.Q(), MemSrc); - break; - case 32: { - const auto Operand = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); - st1b(Src.Z(), PRED_TMP_32B, Operand); - break; - } - default: - LOGMAN_MSG_A_FMT("Unhandled StoreMemTSO size: {}", OpSize); - break; + case 1: strb(Src, MemSrc); break; + case 2: strh(Src, MemSrc); break; + case 4: str(Src.S(), MemSrc); break; + case 8: str(Src.D(), MemSrc); break; + case 16: str(Src.Q(), MemSrc); break; + case 32: { + const auto Operand = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale); + st1b(Src.Z(), PRED_TMP_32B, Operand); + break; + } + default: LOGMAN_MSG_A_FMT("Unhandled StoreMemTSO size: {}", OpSize); break; } } } @@ -1715,14 +1406,13 @@ DEF_OP(MemSet) { // // Counter is decremented regardless. - ARMEmitter::SingleUseForwardLabel BackwardImpl{}; - ARMEmitter::SingleUseForwardLabel Done{}; + ARMEmitter::SingleUseForwardLabel BackwardImpl {}; + ARMEmitter::SingleUseForwardLabel Done {}; mov(TMP1, Length.X()); if (Op->Prefix.IsInvalid()) { mov(TMP2, MemReg.X()); - } - else { + } else { const auto Prefix = GetReg(Op->Prefix.ID()); add(TMP2, Prefix.X(), MemReg.X()); } @@ -1734,21 +1424,11 @@ DEF_OP(MemSet) { auto MemStore = [this](auto Value, uint32_t OpSize, int32_t Size) { switch (OpSize) { - case 1: - strb(Value.W(), TMP2, Size); - break; - case 2: - strh(Value.W(), TMP2, Size); - break; - case 4: - str(Value.W(), TMP2, Size); - break; - case 8: - str(Value.X(), TMP2, Size); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); - break; + case 1: strb(Value.W(), TMP2, Size); break; + case 2: strh(Value.W(), TMP2, Size); break; + case 4: str(Value.W(), TMP2, Size); break; + case 8: str(Value.X(), TMP2, Size); break; + default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break; } }; @@ -1756,30 +1436,20 @@ DEF_OP(MemSet) { if (OpSize == 1) { // 8bit load is always aligned to natural alignment stlrb(Value.W(), TMP2); - } - else { + } else { nop(); switch (OpSize) { - case 2: - stlrh(Value.W(), TMP2); - break; - case 4: - stlr(Value.W(), TMP2); - break; - case 8: - stlr(Value.X(), TMP2); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); - break; + case 2: stlrh(Value.W(), TMP2); break; + case 4: stlr(Value.W(), TMP2); break; + case 8: stlr(Value.X(), TMP2); break; + default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break; } nop(); } if (Size >= 0) { add(ARMEmitter::Size::i64Bit, TMP2, TMP2, OpSize); - } - else { + } else { sub(ARMEmitter::Size::i64Bit, TMP2, TMP2, OpSize); } }; @@ -1788,8 +1458,8 @@ DEF_OP(MemSet) { const int32_t OpSize = Size; const int32_t SizeDirection = Size * Direction; - ARMEmitter::BackwardLabel AgainInternal{}; - ARMEmitter::SingleUseForwardLabel DoneInternal{}; + ARMEmitter::BackwardLabel AgainInternal {}; + ARMEmitter::SingleUseForwardLabel DoneInternal {}; // Early exit if zero count. cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal); @@ -1797,8 +1467,7 @@ DEF_OP(MemSet) { Bind(&AgainInternal); if (Op->IsAtomic) { MemStoreTSO(Value, OpSize, SizeDirection); - } - else { + } else { MemStore(Value, OpSize, SizeDirection); } sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 1); @@ -1808,40 +1477,19 @@ DEF_OP(MemSet) { if (SizeDirection >= 0) { switch (OpSize) { - case 1: - add(Dst.X(), MemReg.X(), Length.X()); - break; - case 2: - add(Dst.X(), MemReg.X(), Length.X(), ARMEmitter::ShiftType::LSL, 1); - break; - case 4: - add(Dst.X(), MemReg.X(), Length.X(), ARMEmitter::ShiftType::LSL, 2); - break; - case 8: - add(Dst.X(), MemReg.X(), Length.X(), ARMEmitter::ShiftType::LSL, 3); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, OpSize); - break; + case 1: add(Dst.X(), MemReg.X(), Length.X()); break; + case 2: add(Dst.X(), MemReg.X(), Length.X(), ARMEmitter::ShiftType::LSL, 1); break; + case 4: add(Dst.X(), MemReg.X(), Length.X(), ARMEmitter::ShiftType::LSL, 2); break; + case 8: add(Dst.X(), MemReg.X(), Length.X(), ARMEmitter::ShiftType::LSL, 3); break; + default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, OpSize); break; } - } - else { + } else { switch (OpSize) { - case 1: - sub(Dst.X(), MemReg.X(), Length.X()); - break; - case 2: - sub(Dst.X(), MemReg.X(), Length.X(), ARMEmitter::ShiftType::LSL, 1); - break; - case 4: - sub(Dst.X(), MemReg.X(), Length.X(), ARMEmitter::ShiftType::LSL, 2); - break; - case 8: - sub(Dst.X(), MemReg.X(), Length.X(), ARMEmitter::ShiftType::LSL, 3); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, OpSize); - break; + case 1: sub(Dst.X(), MemReg.X(), Length.X()); break; + case 2: sub(Dst.X(), MemReg.X(), Length.X(), ARMEmitter::ShiftType::LSL, 1); break; + case 4: sub(Dst.X(), MemReg.X(), Length.X(), ARMEmitter::ShiftType::LSL, 2); break; + case 8: sub(Dst.X(), MemReg.X(), Length.X(), ARMEmitter::ShiftType::LSL, 3); break; + default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, OpSize); break; } } }; @@ -1849,10 +1497,9 @@ DEF_OP(MemSet) { if (DirectionIsInline) { // If the direction constant is set then the direction is negative. EmitMemset(DirectionConstant ? -1 : 1); - } - else { + } else { // Emit forward direction memset then backward direction memset. - for (int32_t Direction : { 1, -1 }) { + for (int32_t Direction : {1, -1}) { EmitMemset(Direction); if (Direction == 1) { @@ -1895,22 +1542,20 @@ DEF_OP(MemCpy) { // // Counter is decremented regardless. - ARMEmitter::SingleUseForwardLabel BackwardImpl{}; - ARMEmitter::SingleUseForwardLabel Done{}; + ARMEmitter::SingleUseForwardLabel BackwardImpl {}; + ARMEmitter::SingleUseForwardLabel Done {}; mov(TMP1, Length.X()); if (Op->PrefixDest.IsInvalid()) { mov(TMP2, MemRegDest.X()); - } - else { + } else { const auto Prefix = GetReg(Op->PrefixDest.ID()); add(TMP2, Prefix.X(), MemRegDest.X()); } if (Op->PrefixSrc.IsInvalid()) { mov(TMP3, MemRegSrc.X()); - } - else { + } else { const auto Prefix = GetReg(Op->PrefixSrc.ID()); add(TMP3, Prefix.X(), MemRegSrc.X()); } @@ -1927,25 +1572,23 @@ DEF_OP(MemCpy) { auto MemCpy = [this](uint32_t OpSize, int32_t Size) { switch (OpSize) { - case 1: - ldrb(TMP4.W(), TMP3, Size); - strb(TMP4.W(), TMP2, Size); - break; - case 2: - ldrh(TMP4.W(), TMP3, Size); - strh(TMP4.W(), TMP2, Size); - break; - case 4: - ldr(TMP4.W(), TMP3, Size); - str(TMP4.W(), TMP2, Size); - break; - case 8: - ldr(TMP4, TMP3, Size); - str(TMP4, TMP2, Size); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); - break; + case 1: + ldrb(TMP4.W(), TMP3, Size); + strb(TMP4.W(), TMP2, Size); + break; + case 2: + ldrh(TMP4.W(), TMP3, Size); + strh(TMP4.W(), TMP2, Size); + break; + case 4: + ldr(TMP4.W(), TMP3, Size); + str(TMP4.W(), TMP2, Size); + break; + case 8: + ldr(TMP4, TMP3, Size); + str(TMP4, TMP2, Size); + break; + default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break; } }; @@ -1955,81 +1598,46 @@ DEF_OP(MemCpy) { // 8bit load is always aligned to natural alignment ldaprb(TMP4.W(), TMP3); stlrb(TMP4.W(), TMP2); - } - else { + } else { nop(); switch (OpSize) { - case 2: - ldaprh(TMP4.W(), TMP3); - break; - case 4: - ldapr(TMP4.W(), TMP3); - break; - case 8: - ldapr(TMP4, TMP3); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); - break; + case 2: ldaprh(TMP4.W(), TMP3); break; + case 4: ldapr(TMP4.W(), TMP3); break; + case 8: ldapr(TMP4, TMP3); break; + default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break; } nop(); nop(); switch (OpSize) { - case 2: - stlrh(TMP4.W(), TMP2); - break; - case 4: - stlr(TMP4.W(), TMP2); - break; - case 8: - stlr(TMP4, TMP2); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); - break; + case 2: stlrh(TMP4.W(), TMP2); break; + case 4: stlr(TMP4.W(), TMP2); break; + case 8: stlr(TMP4, TMP2); break; + default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break; } nop(); } - } - else { + } else { if (OpSize == 1) { // 8bit load is always aligned to natural alignment ldarb(TMP4.W(), TMP3); stlrb(TMP4.W(), TMP2); - } - else { + } else { nop(); switch (OpSize) { - case 2: - ldarh(TMP4.W(), TMP3); - break; - case 4: - ldar(TMP4.W(), TMP3); - break; - case 8: - ldar(TMP4, TMP3); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); - break; + case 2: ldarh(TMP4.W(), TMP3); break; + case 4: ldar(TMP4.W(), TMP3); break; + case 8: ldar(TMP4, TMP3); break; + default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break; } nop(); nop(); switch (OpSize) { - case 2: - stlrh(TMP4.W(), TMP2); - break; - case 4: - stlr(TMP4.W(), TMP2); - break; - case 8: - stlr(TMP4, TMP2); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); - break; + case 2: stlrh(TMP4.W(), TMP2); break; + case 4: stlr(TMP4.W(), TMP2); break; + case 8: stlr(TMP4, TMP2); break; + default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break; } nop(); } @@ -2038,8 +1646,7 @@ DEF_OP(MemCpy) { if (Size >= 0) { add(ARMEmitter::Size::i64Bit, TMP2, TMP2, OpSize); add(ARMEmitter::Size::i64Bit, TMP3, TMP3, OpSize); - } - else { + } else { sub(ARMEmitter::Size::i64Bit, TMP2, TMP2, OpSize); sub(ARMEmitter::Size::i64Bit, TMP3, TMP3, OpSize); } @@ -2049,8 +1656,8 @@ DEF_OP(MemCpy) { const int32_t OpSize = Size; const int32_t SizeDirection = Size * Direction; - ARMEmitter::BackwardLabel AgainInternal{}; - ARMEmitter::SingleUseForwardLabel DoneInternal{}; + ARMEmitter::BackwardLabel AgainInternal {}; + ARMEmitter::SingleUseForwardLabel DoneInternal {}; // Early exit if zero count. cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal); @@ -2058,8 +1665,7 @@ DEF_OP(MemCpy) { Bind(&AgainInternal); if (Op->IsAtomic) { MemCpyTSO(OpSize, SizeDirection); - } - else { + } else { MemCpy(OpSize, SizeDirection); } sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 1); @@ -2074,48 +1680,43 @@ DEF_OP(MemCpy) { if (SizeDirection >= 0) { switch (OpSize) { - case 1: - add(Dst.first.X(), TMP1, TMP3); - add(Dst.second.X(), TMP2, TMP3); - break; - case 2: - add(Dst.first.X(), TMP1, TMP3, ARMEmitter::ShiftType::LSL, 1); - add(Dst.second.X(), TMP2, TMP3, ARMEmitter::ShiftType::LSL, 1); - break; - case 4: - add(Dst.first.X(), TMP1, TMP3, ARMEmitter::ShiftType::LSL, 2); - add(Dst.second.X(), TMP2, TMP3, ARMEmitter::ShiftType::LSL, 2); - break; - case 8: - add(Dst.first.X(), TMP1, TMP3, ARMEmitter::ShiftType::LSL, 3); - add(Dst.second.X(), TMP2, TMP3, ARMEmitter::ShiftType::LSL, 3); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, OpSize); - break; + case 1: + add(Dst.first.X(), TMP1, TMP3); + add(Dst.second.X(), TMP2, TMP3); + break; + case 2: + add(Dst.first.X(), TMP1, TMP3, ARMEmitter::ShiftType::LSL, 1); + add(Dst.second.X(), TMP2, TMP3, ARMEmitter::ShiftType::LSL, 1); + break; + case 4: + add(Dst.first.X(), TMP1, TMP3, ARMEmitter::ShiftType::LSL, 2); + add(Dst.second.X(), TMP2, TMP3, ARMEmitter::ShiftType::LSL, 2); + break; + case 8: + add(Dst.first.X(), TMP1, TMP3, ARMEmitter::ShiftType::LSL, 3); + add(Dst.second.X(), TMP2, TMP3, ARMEmitter::ShiftType::LSL, 3); + break; + default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, OpSize); break; } - } - else { + } else { switch (OpSize) { - case 1: - sub(Dst.first.X(), TMP1, TMP3); - sub(Dst.second.X(), TMP2, TMP3); - break; - case 2: - sub(Dst.first.X(), TMP1, TMP3, ARMEmitter::ShiftType::LSL, 1); - sub(Dst.second.X(), TMP2, TMP3, ARMEmitter::ShiftType::LSL, 1); - break; - case 4: - sub(Dst.first.X(), TMP1, TMP3, ARMEmitter::ShiftType::LSL, 2); - sub(Dst.second.X(), TMP2, TMP3, ARMEmitter::ShiftType::LSL, 2); - break; - case 8: - sub(Dst.first.X(), TMP1, TMP3, ARMEmitter::ShiftType::LSL, 3); - sub(Dst.second.X(), TMP2, TMP3, ARMEmitter::ShiftType::LSL, 3); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, OpSize); - break; + case 1: + sub(Dst.first.X(), TMP1, TMP3); + sub(Dst.second.X(), TMP2, TMP3); + break; + case 2: + sub(Dst.first.X(), TMP1, TMP3, ARMEmitter::ShiftType::LSL, 1); + sub(Dst.second.X(), TMP2, TMP3, ARMEmitter::ShiftType::LSL, 1); + break; + case 4: + sub(Dst.first.X(), TMP1, TMP3, ARMEmitter::ShiftType::LSL, 2); + sub(Dst.second.X(), TMP2, TMP3, ARMEmitter::ShiftType::LSL, 2); + break; + case 8: + sub(Dst.first.X(), TMP1, TMP3, ARMEmitter::ShiftType::LSL, 3); + sub(Dst.second.X(), TMP2, TMP3, ARMEmitter::ShiftType::LSL, 3); + break; + default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, OpSize); break; } } }; @@ -2123,10 +1724,9 @@ DEF_OP(MemCpy) { if (DirectionIsInline) { // If the direction constant is set then the direction is negative. EmitMemcpy(DirectionConstant ? -1 : 1); - } - else { + } else { // Emit forward direction memset then backward direction memset. - for (int32_t Direction : { 1, -1 }) { + for (int32_t Direction : {1, -1}) { EmitMemcpy(Direction); if (Direction == 1) { b(&Done); @@ -2155,100 +1755,67 @@ DEF_OP(ParanoidLoadMemTSO) { // 8bit load is always aligned to natural alignment const auto Dst = GetReg(Node); ldapurb(Dst, MemReg, Offset); - } - else { + } else { switch (OpSize) { - case 2: - ldapurh(Dst, MemReg, Offset); - break; - case 4: - ldapur(Dst.W(), MemReg, Offset); - break; - case 8: - ldapur(Dst.X(), MemReg, Offset); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled ParanoidLoadMemTSO size: {}", OpSize); - break; + case 2: ldapurh(Dst, MemReg, Offset); break; + case 4: ldapur(Dst.W(), MemReg, Offset); break; + case 8: ldapur(Dst.X(), MemReg, Offset); break; + default: LOGMAN_MSG_A_FMT("Unhandled ParanoidLoadMemTSO size: {}", OpSize); break; } } - } - else if (CTX->HostFeatures.SupportsRCPC && Op->Class == FEXCore::IR::GPRClass) { + } else if (CTX->HostFeatures.SupportsRCPC && Op->Class == FEXCore::IR::GPRClass) { const auto Dst = GetReg(Node); if (OpSize == 1) { // 8bit load is always aligned to natural alignment ldaprb(Dst.W(), MemReg); - } - else { + } else { switch (OpSize) { - case 2: - ldaprh(Dst.W(), MemReg); - break; - case 4: - ldapr(Dst.W(), MemReg); - break; - case 8: - ldapr(Dst.X(), MemReg); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled ParanoidLoadMemTSO size: {}", OpSize); - break; + case 2: ldaprh(Dst.W(), MemReg); break; + case 4: ldapr(Dst.W(), MemReg); break; + case 8: ldapr(Dst.X(), MemReg); break; + default: LOGMAN_MSG_A_FMT("Unhandled ParanoidLoadMemTSO size: {}", OpSize); break; } } - } - else if (Op->Class == FEXCore::IR::GPRClass) { + } else if (Op->Class == FEXCore::IR::GPRClass) { const auto Dst = GetReg(Node); switch (OpSize) { - case 1: - ldarb(Dst, MemReg); - break; - case 2: - ldarh(Dst, MemReg); - break; - case 4: - ldar(Dst.W(), MemReg); - break; - case 8: - ldar(Dst.X(), MemReg); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled ParanoidLoadMemTSO size: {}", OpSize); - break; + case 1: ldarb(Dst, MemReg); break; + case 2: ldarh(Dst, MemReg); break; + case 4: ldar(Dst.W(), MemReg); break; + case 8: ldar(Dst.X(), MemReg); break; + default: LOGMAN_MSG_A_FMT("Unhandled ParanoidLoadMemTSO size: {}", OpSize); break; } - } - else { + } else { const auto Dst = GetVReg(Node); switch (OpSize) { - case 1: - ldarb(TMP1, MemReg); - fmov(ARMEmitter::Size::i32Bit, Dst.S(), TMP1.W()); - break; - case 2: - ldarh(TMP1, MemReg); - fmov(ARMEmitter::Size::i32Bit, Dst.S(), TMP1.W()); - break; - case 4: - ldar(TMP1.W(), MemReg); - fmov(ARMEmitter::Size::i32Bit, Dst.S(), TMP1.W()); - break; - case 8: - ldar(TMP1, MemReg); - fmov(ARMEmitter::Size::i64Bit, Dst.D(), TMP1); - break; - case 16: - ldaxp(ARMEmitter::Size::i64Bit, TMP1, TMP2, MemReg); - clrex(); - ins(ARMEmitter::SubRegSize::i64Bit, Dst, 0, TMP1); - ins(ARMEmitter::SubRegSize::i64Bit, Dst, 1, TMP2); - break; - case 32: - dmb(FEXCore::ARMEmitter::BarrierScope::ISH); - ld1b(Dst.Z(), PRED_TMP_32B.Zeroing(), MemReg); - dmb(FEXCore::ARMEmitter::BarrierScope::ISH); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled ParanoidLoadMemTSO size: {}", OpSize); - break; + case 1: + ldarb(TMP1, MemReg); + fmov(ARMEmitter::Size::i32Bit, Dst.S(), TMP1.W()); + break; + case 2: + ldarh(TMP1, MemReg); + fmov(ARMEmitter::Size::i32Bit, Dst.S(), TMP1.W()); + break; + case 4: + ldar(TMP1.W(), MemReg); + fmov(ARMEmitter::Size::i32Bit, Dst.S(), TMP1.W()); + break; + case 8: + ldar(TMP1, MemReg); + fmov(ARMEmitter::Size::i64Bit, Dst.D(), TMP1); + break; + case 16: + ldaxp(ARMEmitter::Size::i64Bit, TMP1, TMP2, MemReg); + clrex(); + ins(ARMEmitter::SubRegSize::i64Bit, Dst, 0, TMP1); + ins(ARMEmitter::SubRegSize::i64Bit, Dst, 1, TMP2); + break; + case 32: + dmb(FEXCore::ARMEmitter::BarrierScope::ISH); + ld1b(Dst.Z(), PRED_TMP_32B.Zeroing(), MemReg); + dmb(FEXCore::ARMEmitter::BarrierScope::ISH); + break; + default: LOGMAN_MSG_A_FMT("Unhandled ParanoidLoadMemTSO size: {}", OpSize); break; } } } @@ -2269,86 +1836,63 @@ DEF_OP(ParanoidStoreMemTSO) { if (OpSize == 1) { // 8bit load is always aligned to natural alignment stlurb(Src, MemReg, Offset); - } - else { + } else { switch (OpSize) { - case 2: - stlurh(Src, MemReg, Offset); - break; - case 4: - stlur(Src.W(), MemReg, Offset); - break; - case 8: - stlur(Src.X(), MemReg, Offset); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled ParanoidStoreMemTSO size: {}", OpSize); - break; + case 2: stlurh(Src, MemReg, Offset); break; + case 4: stlur(Src.W(), MemReg, Offset); break; + case 8: stlur(Src.X(), MemReg, Offset); break; + default: LOGMAN_MSG_A_FMT("Unhandled ParanoidStoreMemTSO size: {}", OpSize); break; } } - } - else if (Op->Class == FEXCore::IR::GPRClass) { + } else if (Op->Class == FEXCore::IR::GPRClass) { const auto Src = GetReg(Op->Value.ID()); switch (OpSize) { - case 1: - stlrb(Src, MemReg); - break; - case 2: - stlrh(Src, MemReg); - break; - case 4: - stlr(Src.W(), MemReg); - break; - case 8: - stlr(Src.X(), MemReg); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled ParanoidStoreMemTSO size: {}", OpSize); - break; + case 1: stlrb(Src, MemReg); break; + case 2: stlrh(Src, MemReg); break; + case 4: stlr(Src.W(), MemReg); break; + case 8: stlr(Src.X(), MemReg); break; + default: LOGMAN_MSG_A_FMT("Unhandled ParanoidStoreMemTSO size: {}", OpSize); break; } - } - else { + } else { const auto Src = GetVReg(Op->Value.ID()); switch (OpSize) { - case 1: - umov(TMP1, Src, 0); - stlrb(TMP1, MemReg); - break; - case 2: - umov(TMP1, Src, 0); - stlrh(TMP1, MemReg); - break; - case 4: - umov(TMP1, Src, 0); - stlr(TMP1.W(), MemReg); - break; - case 8: - umov(TMP1, Src, 0); - stlr(TMP1, MemReg); - break; - case 16: { - // Move vector to GPRs - umov(TMP1, Src, 0); - umov(TMP2, Src, 1); - ARMEmitter::BackwardLabel B; - Bind(&B); - - // ldaxp must not have both the destination registers be the same - ldaxp(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::zr, TMP3, MemReg); // <- Can hit SIGBUS. Overwritten with DMB - stlxp(ARMEmitter::Size::i64Bit, TMP3, TMP1, TMP2, MemReg); // <- Can also hit SIGBUS - cbnz(ARMEmitter::Size::i64Bit, TMP3, &B); // < Overwritten with DMB - break; - } - case 32: { - dmb(FEXCore::ARMEmitter::BarrierScope::ISH); - st1b(Src.Z(), PRED_TMP_32B, MemReg, 0); - dmb(FEXCore::ARMEmitter::BarrierScope::ISH); - break; - } - default: - LOGMAN_MSG_A_FMT("Unhandled ParanoidStoreMemTSO size: {}", OpSize); - break; + case 1: + umov(TMP1, Src, 0); + stlrb(TMP1, MemReg); + break; + case 2: + umov(TMP1, Src, 0); + stlrh(TMP1, MemReg); + break; + case 4: + umov(TMP1, Src, 0); + stlr(TMP1.W(), MemReg); + break; + case 8: + umov(TMP1, Src, 0); + stlr(TMP1, MemReg); + break; + case 16: { + // Move vector to GPRs + umov(TMP1, Src, 0); + umov(TMP2, Src, 1); + ARMEmitter::BackwardLabel B; + Bind(&B); + + // ldaxp must not have both the destination registers be the same + ldaxp(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::zr, TMP3, MemReg); // <- Can hit SIGBUS. Overwritten with DMB + stlxp(ARMEmitter::Size::i64Bit, TMP3, TMP1, TMP2, MemReg); // <- Can also hit SIGBUS + cbnz(ARMEmitter::Size::i64Bit, TMP3, &B); // < Overwritten with DMB + break; + } + case 32: { + dmb(FEXCore::ARMEmitter::BarrierScope::ISH); + st1b(Src.Z(), PRED_TMP_32B, MemReg, 0); + dmb(FEXCore::ARMEmitter::BarrierScope::ISH); + break; + } + default: LOGMAN_MSG_A_FMT("Unhandled ParanoidStoreMemTSO size: {}", OpSize); break; } } } @@ -2362,8 +1906,7 @@ DEF_OP(CacheLineClear) { // icache doesn't matter here since the guest application shouldn't be calling clflush on JIT code. if (CTX->HostFeatures.DCacheLineSize >= 64U) { dc(ARMEmitter::DataCacheOperation::CIVAC, MemReg); - } - else { + } else { auto CurrentWorkingReg = MemReg.X(); for (size_t i = 0; i < std::max(1U, CTX->HostFeatures.DCacheLineSize / 64U); ++i) { dc(ARMEmitter::DataCacheOperation::CIVAC, TMP1); @@ -2386,8 +1929,7 @@ DEF_OP(CacheLineClean) { // Clean dcache only if (CTX->HostFeatures.DCacheLineSize >= 64U) { dc(ARMEmitter::DataCacheOperation::CVAC, MemReg); - } - else { + } else { auto CurrentWorkingReg = MemReg.X(); for (size_t i = 0; i < std::max(1U, CTX->HostFeatures.DCacheLineSize / 64U); ++i) { dc(ARMEmitter::DataCacheOperation::CVAC, TMP1); @@ -2405,8 +1947,7 @@ DEF_OP(CacheLineZero) { if (CTX->HostFeatures.SupportsCLZERO) { // We can use this instruction directly dc(ARMEmitter::DataCacheOperation::ZVA, MemReg); - } - else { + } else { // We must walk the cacheline ourselves // Force cacheline alignment and_(ARMEmitter::Size::i64Bit, TMP1, MemReg, ~(CPUIDEmu::CACHELINE_SIZE - 1)); @@ -2420,5 +1961,4 @@ DEF_OP(CacheLineZero) { } #undef DEF_OP -} - +} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/MiscOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/MiscOps.cpp index 0991349796..137258dc62 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/MiscOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/MiscOps.cpp @@ -17,7 +17,7 @@ tags: backend|arm64 #include namespace FEXCore::CPU { -#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const *IROp, IR::NodeID Node) +#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const* IROp, IR::NodeID Node) DEF_OP(GuestOpcode) { auto Op = IROp->C(); @@ -28,16 +28,10 @@ DEF_OP(GuestOpcode) { DEF_OP(Fence) { auto Op = IROp->C(); switch (Op->Fence) { - case IR::Fence_Load.Val: - dmb(FEXCore::ARMEmitter::BarrierScope::LD); - break; - case IR::Fence_LoadStore.Val: - dmb(FEXCore::ARMEmitter::BarrierScope::SY); - break; - case IR::Fence_Store.Val: - dmb(FEXCore::ARMEmitter::BarrierScope::ST); - break; - default: LOGMAN_MSG_A_FMT("Unknown Fence: {}", Op->Fence); break; + case IR::Fence_Load.Val: dmb(FEXCore::ARMEmitter::BarrierScope::LD); break; + case IR::Fence_LoadStore.Val: dmb(FEXCore::ARMEmitter::BarrierScope::SY); break; + case IR::Fence_Store.Val: dmb(FEXCore::ARMEmitter::BarrierScope::ST); break; + default: LOGMAN_MSG_A_FMT("Unknown Fence: {}", Op->Fence); break; } } @@ -55,7 +49,7 @@ DEF_OP(Break) { .err_code = Op->Reason.ErrorRegister, }; - uint64_t Constant{}; + uint64_t Constant {}; memcpy(&Constant, &State, sizeof(State)); LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, Constant); @@ -136,8 +130,7 @@ DEF_OP(Print) { if (IsGPR(Op->Value.ID())) { mov(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, GetReg(Op->Value.ID())); ldr(ARMEmitter::XReg::x3, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.PrintValue)); - } - else { + } else { fmov(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, GetVReg(Op->Value.ID()), false); fmov(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, GetVReg(Op->Value.ID()), true); ldr(ARMEmitter::XReg::x3, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.PrintVectorValue)); @@ -146,12 +139,10 @@ DEF_OP(Print) { if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] { if (IsGPR(Op->Value.ID())) { GenerateIndirectRuntimeCall(ARMEmitter::Reg::r3); - } - else { + } else { GenerateIndirectRuntimeCall(ARMEmitter::Reg::r3); } - } - else { + } else { blr(ARMEmitter::Reg::r3); } @@ -231,8 +222,7 @@ DEF_OP(RDRAND) { if (Op->GetReseeded) { mrs(Dst.first, ARMEmitter::SystemRegister::RNDRRS); - } - else { + } else { mrs(Dst.first, ARMEmitter::SystemRegister::RNDR); } @@ -245,5 +235,4 @@ DEF_OP(Yield) { } #undef DEF_OP -} - +} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/MoveOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/MoveOps.cpp index 118be0ce02..001bf8869d 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/MoveOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/MoveOps.cpp @@ -8,7 +8,7 @@ tags: backend|arm64 #include "Interface/Core/JIT/Arm64/JITClass.h" namespace FEXCore::CPU { -#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const *IROp, IR::NodeID Node) +#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const* IROp, IR::NodeID Node) DEF_OP(ExtractElementPair) { auto Op = IROp->C(); LOGMAN_THROW_AA_FMT(Op->Header.Size == 4 || Op->Header.Size == 8, "Invalid size"); @@ -43,5 +43,4 @@ DEF_OP(CreateElementPair) { } #undef DEF_OP -} - +} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp index 4c6f14ca26..e3eb9cbf6a 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp @@ -13,9 +13,10 @@ tags: backend|arm64 #include namespace FEXCore::CPU { -#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const *IROp, IR::NodeID Node) +#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const* IROp, IR::NodeID Node) -void Arm64JITCore::VFScalarOperation(uint8_t OpSize, uint8_t ElementSize, bool ZeroUpperBits, ScalarBinaryOpCaller ScalarEmit, ARMEmitter::VRegister Dst, ARMEmitter::VRegister Vector1, ARMEmitter::VRegister Vector2) { +void Arm64JITCore::VFScalarOperation(uint8_t OpSize, uint8_t ElementSize, bool ZeroUpperBits, ScalarBinaryOpCaller ScalarEmit, + ARMEmitter::VRegister Dst, ARMEmitter::VRegister Vector1, ARMEmitter::VRegister Vector2) { const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; if (!Is256Bit) { LOGMAN_THROW_A_FMT(ZeroUpperBits == false, "128-bit operation doesn't support ZeroUpperBits in {}", __func__); @@ -25,10 +26,9 @@ void Arm64JITCore::VFScalarOperation(uint8_t OpSize, uint8_t ElementSize, bool Z // The upper bits of the destination comes from the first source. LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ARMEmitter::SubRegSize::i64Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit); constexpr auto Predicate = ARMEmitter::PReg::p0; @@ -41,65 +41,58 @@ void Arm64JITCore::VFScalarOperation(uint8_t OpSize, uint8_t ElementSize, bool Z if (HostSupportsAFP) { // If the host CPU supports AFP then scalar does an insert without modifying upper bits. ScalarEmit(Dst, Vector1, Vector2); - } - else { + } else { // If AFP is unsupported then the operation result goes in to a temporary. // and then it gets inserted. ScalarEmit(VTMP1, Vector1, Vector2); if (!ZeroUpperBits && Is256Bit) { ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1); mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z()); - } - else { + } else { ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0); } } - } - else if (Dst != Vector2) { + } else if (Dst != Vector2) { if (!ZeroUpperBits && Is256Bit) { mov(Dst.Z(), Vector1.Z()); - } - else { + } else { mov(Dst.Q(), Vector1.Q()); } if (HostSupportsAFP) { ScalarEmit(Dst, Vector1, Vector2); - } - else { + } else { ScalarEmit(VTMP1, Vector1, Vector2); if (!ZeroUpperBits && Is256Bit) { ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1); mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z()); - } - else { + } else { ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0); } } - } - else { + } else { // Destination intersects Vector2, can't do anything optimal in this case. // Do the scalar operation first and then move and insert. ScalarEmit(VTMP1, Vector1, Vector2); if (!ZeroUpperBits && Is256Bit) { mov(Dst.Z(), Vector1.Z()); - } - else { + } else { mov(Dst.Q(), Vector1.Q()); } if (!ZeroUpperBits && Is256Bit) { ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1); mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z()); - } - else { + } else { ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0); } } } -void Arm64JITCore::VFScalarUnaryOperation(uint8_t OpSize, uint8_t ElementSize, bool ZeroUpperBits, ScalarUnaryOpCaller ScalarEmit, ARMEmitter::VRegister Dst, ARMEmitter::VRegister Vector1, std::variant Vector2) { +void Arm64JITCore::VFScalarUnaryOperation(uint8_t OpSize, uint8_t ElementSize, bool ZeroUpperBits, ScalarUnaryOpCaller ScalarEmit, + ARMEmitter::VRegister Dst, ARMEmitter::VRegister Vector1, + std::variant Vector2) { const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; if (!Is256Bit) { LOGMAN_THROW_A_FMT(ZeroUpperBits == false, "128-bit operation doesn't support ZeroUpperBits in {}", __func__); @@ -109,10 +102,9 @@ void Arm64JITCore::VFScalarUnaryOperation(uint8_t OpSize, uint8_t ElementSize, b // The upper bits of the destination comes from the first source. LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ARMEmitter::SubRegSize::i64Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit); constexpr auto Predicate = ARMEmitter::PReg::p0; bool DstOverlapsVector2 = false; @@ -129,59 +121,50 @@ void Arm64JITCore::VFScalarUnaryOperation(uint8_t OpSize, uint8_t ElementSize, b if (HostSupportsAFP) { // If the host CPU supports AFP then scalar does an insert without modifying upper bits. ScalarEmit(Dst, Vector2); - } - else { + } else { // If AFP is unsupported then the operation result goes in to a temporary. // and then it gets inserted. ScalarEmit(VTMP1, Vector2); if (!ZeroUpperBits && Is256Bit) { ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1); mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z()); - } - else { + } else { ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0); } } - } - else if (!DstOverlapsVector2) { + } else if (!DstOverlapsVector2) { if (!ZeroUpperBits && Is256Bit) { mov(Dst.Z(), Vector1.Z()); - } - else { + } else { mov(Dst.Q(), Vector1.Q()); } if (HostSupportsAFP) { ScalarEmit(Dst, Vector2); - } - else { + } else { ScalarEmit(VTMP1, Vector2); if (!ZeroUpperBits && Is256Bit) { ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1); mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z()); - } - else { + } else { ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0); } } - } - else { + } else { // Destination intersects Vector2, can't do anything optimal in this case. // Do the scalar operation first and then move and insert. ScalarEmit(VTMP1, Vector2); if (!ZeroUpperBits && Is256Bit) { mov(Dst.Z(), Vector1.Z()); - } - else { + } else { mov(Dst.Q(), Vector1.Q()); } if (!ZeroUpperBits && Is256Bit) { ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1); mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z()); - } - else { + } else { ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0); } } @@ -192,10 +175,9 @@ DEF_OP(VFAddScalarInsert) { const auto ElementSize = Op->Header.ElementSize; LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ARMEmitter::SubRegSize::i64Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit); auto ScalarEmit = [this, SubRegSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) { fadd(SubRegSize.Scalar, Dst, Src1, Src2); @@ -215,10 +197,9 @@ DEF_OP(VFSubScalarInsert) { const auto ElementSize = Op->Header.ElementSize; LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ARMEmitter::SubRegSize::i64Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit); auto ScalarEmit = [this, SubRegSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) { fsub(SubRegSize.Scalar, Dst, Src1, Src2); @@ -238,10 +219,9 @@ DEF_OP(VFMulScalarInsert) { const auto ElementSize = Op->Header.ElementSize; LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ARMEmitter::SubRegSize::i64Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit); auto ScalarEmit = [this, SubRegSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) { fmul(SubRegSize.Scalar, Dst, Src1, Src2); @@ -261,10 +241,9 @@ DEF_OP(VFDivScalarInsert) { const auto ElementSize = Op->Header.ElementSize; LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ARMEmitter::SubRegSize::i64Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit); auto ScalarEmit = [this, SubRegSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) { fdiv(SubRegSize.Scalar, Dst, Src1, Src2); @@ -284,17 +263,15 @@ DEF_OP(VFMinScalarInsert) { const auto ElementSize = Op->Header.ElementSize; LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ARMEmitter::SubRegSize::i64Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit); auto ScalarEmit = [this, SubRegSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) { if (HostSupportsAFP) { // AFP.AH lets fmin behave like x86 min fmin(SubRegSize.Scalar, Dst, Src1, Src2); - } - else { + } else { fcmp(SubRegSize.Scalar, Src1, Src2); fcsel(SubRegSize.Scalar, Dst, Src1, Src2, ARMEmitter::Condition::CC_MI); } @@ -314,18 +291,16 @@ DEF_OP(VFMaxScalarInsert) { const auto ElementSize = Op->Header.ElementSize; LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ARMEmitter::SubRegSize::i64Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit); // AFP can make this more optimal. auto ScalarEmit = [this, SubRegSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) { if (HostSupportsAFP) { // AFP.AH lets fmax behave like x86 max fmax(SubRegSize.Scalar, Dst, Src1, Src2); - } - else { + } else { fcmp(SubRegSize.Scalar, Src1, Src2); fcsel(SubRegSize.Scalar, Dst, Src2, Src1, ARMEmitter::Condition::CC_MI); } @@ -345,10 +320,9 @@ DEF_OP(VFSqrtScalarInsert) { const auto ElementSize = Op->Header.ElementSize; LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ARMEmitter::SubRegSize::i64Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit); auto ScalarEmit = [this, SubRegSize](ARMEmitter::VRegister Dst, std::variant SrcVar) { auto Src = *std::get_if(&SrcVar); @@ -369,10 +343,9 @@ DEF_OP(VFRSqrtScalarInsert) { const auto ElementSize = Op->Header.ElementSize; LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ARMEmitter::SubRegSize::i64Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit); auto ScalarEmit = [this, SubRegSize](ARMEmitter::VRegister Dst, std::variant SrcVar) { auto Src = *std::get_if(&SrcVar); @@ -407,10 +380,9 @@ DEF_OP(VFRecpScalarInsert) { const auto ElementSize = Op->Header.ElementSize; LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ARMEmitter::SubRegSize::i64Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit); auto ScalarEmit = [this, SubRegSize](ARMEmitter::VRegister Dst, std::variant SrcVar) { auto Src = *std::get_if(&SrcVar); @@ -448,31 +420,31 @@ DEF_OP(VFToFScalarInsert) { auto Src = *std::get_if(&SrcVar); switch (Conv) { - case 0x0204: { // Half <- Float - fcvt(Dst.H(), Src.S()); - break; - } - case 0x0208: { // Half <- Double - fcvt(Dst.H(), Src.D()); - break; - } - case 0x0402: { // Float <- Half - fcvt(Dst.S(), Src.H()); - break; - } - case 0x0802: { // Double <- Half - fcvt(Dst.D(), Src.H()); - break; - } - case 0x0804: { // Double <- Float - fcvt(Dst.D(), Src.S()); - break; - } - case 0x0408: { // Float <- Double - fcvt(Dst.S(), Src.D()); - break; - } - default: LOGMAN_MSG_A_FMT("Unknown FCVT sizes: 0x{:x}", Conv); + case 0x0204: { // Half <- Float + fcvt(Dst.H(), Src.S()); + break; + } + case 0x0208: { // Half <- Double + fcvt(Dst.H(), Src.D()); + break; + } + case 0x0402: { // Float <- Half + fcvt(Dst.S(), Src.H()); + break; + } + case 0x0802: { // Double <- Half + fcvt(Dst.D(), Src.H()); + break; + } + case 0x0804: { // Double <- Float + fcvt(Dst.D(), Src.S()); + break; + } + case 0x0408: { // Float <- Double + fcvt(Dst.S(), Src.D()); + break; + } + default: LOGMAN_MSG_A_FMT("Unknown FCVT sizes: 0x{:x}", Conv); } }; @@ -500,12 +472,10 @@ DEF_OP(VSToFVectorInsert) { if (ElementSize == 4) { if (HasTwoElements) { scvtf(ARMEmitter::SubRegSize::i32Bit, Dst.D(), Src.D()); - } - else { + } else { scvtf(ARMEmitter::ScalarRegSize::i32Bit, Dst.S(), Src.S()); } - } - else { + } else { scvtf(ARMEmitter::ScalarRegSize::i64Bit, Dst.D(), Src.D()); } }; @@ -532,34 +502,31 @@ DEF_OP(VSToFGPRInsert) { auto Src = *std::get_if(&SrcVar); switch (Conv) { - case 0x0204: { // Half <- int32_t - scvtf(ARMEmitter::Size::i32Bit, Dst.H(), Src); - break; - } - case 0x0208: { // Half <- int64_t - scvtf(ARMEmitter::Size::i64Bit, Dst.H(), Src); - break; - } - case 0x0404: { // Float <- int32_t - scvtf(ARMEmitter::Size::i32Bit, Dst.S(), Src); - break; - } - case 0x0408: { // Float <- int64_t - scvtf(ARMEmitter::Size::i64Bit, Dst.S(), Src); - break; - } - case 0x0804: { // Double <- int32_t - scvtf(ARMEmitter::Size::i32Bit, Dst.D(), Src); - break; - } - case 0x0808: { // Double <- int64_t - scvtf(ARMEmitter::Size::i64Bit, Dst.D(), Src); - break; - } - default: - LOGMAN_MSG_A_FMT("Unhandled conversion mask: Mask=0x{:04x}", - Conv); - break; + case 0x0204: { // Half <- int32_t + scvtf(ARMEmitter::Size::i32Bit, Dst.H(), Src); + break; + } + case 0x0208: { // Half <- int64_t + scvtf(ARMEmitter::Size::i64Bit, Dst.H(), Src); + break; + } + case 0x0404: { // Float <- int32_t + scvtf(ARMEmitter::Size::i32Bit, Dst.S(), Src); + break; + } + case 0x0408: { // Float <- int64_t + scvtf(ARMEmitter::Size::i64Bit, Dst.S(), Src); + break; + } + case 0x0804: { // Double <- int32_t + scvtf(ARMEmitter::Size::i32Bit, Dst.D(), Src); + break; + } + case 0x0808: { // Double <- int64_t + scvtf(ARMEmitter::Size::i64Bit, Dst.D(), Src); + break; + } + default: LOGMAN_MSG_A_FMT("Unhandled conversion mask: Mask=0x{:04x}", Conv); break; } }; @@ -578,10 +545,9 @@ DEF_OP(VFToIScalarInsert) { const auto ElementSize = Op->Header.ElementSize; LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ARMEmitter::SubRegSize::i64Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit); const auto RoundMode = Op->Round; @@ -589,21 +555,11 @@ DEF_OP(VFToIScalarInsert) { auto Src = *std::get_if(&SrcVar); switch (RoundMode) { - case IR::Round_Nearest: - frintn(SubRegSize.Scalar, Dst, Src); - break; - case IR::Round_Negative_Infinity: - frintm(SubRegSize.Scalar, Dst, Src); - break; - case IR::Round_Positive_Infinity: - frintp(SubRegSize.Scalar, Dst, Src); - break; - case IR::Round_Towards_Zero: - frintz(SubRegSize.Scalar, Dst, Src); - break; - case IR::Round_Host: - frinti(SubRegSize.Scalar, Dst, Src); - break; + case IR::Round_Nearest: frintn(SubRegSize.Scalar, Dst, Src); break; + case IR::Round_Negative_Infinity: frintm(SubRegSize.Scalar, Dst, Src); break; + case IR::Round_Positive_Infinity: frintp(SubRegSize.Scalar, Dst, Src); break; + case IR::Round_Towards_Zero: frintz(SubRegSize.Scalar, Dst, Src); break; + case IR::Round_Host: frinti(SubRegSize.Scalar, Dst, Src); break; } }; @@ -621,70 +577,60 @@ DEF_OP(VFCMPScalarInsert) { const auto ElementSize = Op->Header.ElementSize; LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ARMEmitter::SubRegSize::i64Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit); const auto ZeroUpperBits = Op->ZeroUpperBits; const auto Is256Bit = IROp->Size == Core::CPUState::XMM_AVX_REG_SIZE; auto ScalarEmitEQ = [this, SubRegSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) { switch (SubRegSize.Scalar) { - case ARMEmitter::ScalarRegSize::i16Bit: { - fcmeq(Dst.H(), Src1.H(), Src2.H()); - break; - } - case ARMEmitter::ScalarRegSize::i32Bit: - case ARMEmitter::ScalarRegSize::i64Bit: - fcmeq(SubRegSize.Scalar, Dst, Src1, Src2); - break; - default: - break; + case ARMEmitter::ScalarRegSize::i16Bit: { + fcmeq(Dst.H(), Src1.H(), Src2.H()); + break; + } + case ARMEmitter::ScalarRegSize::i32Bit: + case ARMEmitter::ScalarRegSize::i64Bit: fcmeq(SubRegSize.Scalar, Dst, Src1, Src2); break; + default: break; } }; auto ScalarEmitLT = [this, SubRegSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) { switch (SubRegSize.Scalar) { - case ARMEmitter::ScalarRegSize::i16Bit: { - fcmgt(Dst.H(), Src2.H(), Src1.H()); - break; - } - case ARMEmitter::ScalarRegSize::i32Bit: - case ARMEmitter::ScalarRegSize::i64Bit: - fcmgt(SubRegSize.Scalar, Dst, Src2, Src1); - break; - default: - break; + case ARMEmitter::ScalarRegSize::i16Bit: { + fcmgt(Dst.H(), Src2.H(), Src1.H()); + break; + } + case ARMEmitter::ScalarRegSize::i32Bit: + case ARMEmitter::ScalarRegSize::i64Bit: fcmgt(SubRegSize.Scalar, Dst, Src2, Src1); break; + default: break; } }; auto ScalarEmitLE = [this, SubRegSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) { switch (SubRegSize.Scalar) { - case ARMEmitter::ScalarRegSize::i16Bit: { - fcmge(Dst.H(), Src2.H(), Src1.H()); - break; - } - case ARMEmitter::ScalarRegSize::i32Bit: - case ARMEmitter::ScalarRegSize::i64Bit: - fcmge(SubRegSize.Scalar, Dst, Src2, Src1); - break; - default: - break; + case ARMEmitter::ScalarRegSize::i16Bit: { + fcmge(Dst.H(), Src2.H(), Src1.H()); + break; + } + case ARMEmitter::ScalarRegSize::i32Bit: + case ARMEmitter::ScalarRegSize::i64Bit: fcmge(SubRegSize.Scalar, Dst, Src2, Src1); break; + default: break; } }; - auto ScalarEmitUNO = [this, SubRegSize, ZeroUpperBits, Is256Bit](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) { + auto ScalarEmitUNO = [this, SubRegSize, ZeroUpperBits, Is256Bit](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, + ARMEmitter::VRegister Src2) { switch (SubRegSize.Scalar) { - case ARMEmitter::ScalarRegSize::i16Bit: { - fcmge(VTMP1.H(), Src1.H(), Src2.H()); - fcmgt(VTMP2.H(), Src2.H(), Src1.H()); - break; - } - case ARMEmitter::ScalarRegSize::i32Bit: - case ARMEmitter::ScalarRegSize::i64Bit: - fcmge(SubRegSize.Scalar, VTMP1, Src1, Src2); - fcmgt(SubRegSize.Scalar, VTMP2, Src2, Src1); - break; - default: - break; + case ARMEmitter::ScalarRegSize::i16Bit: { + fcmge(VTMP1.H(), Src1.H(), Src2.H()); + fcmgt(VTMP2.H(), Src2.H(), Src1.H()); + break; + } + case ARMEmitter::ScalarRegSize::i32Bit: + case ARMEmitter::ScalarRegSize::i64Bit: + fcmge(SubRegSize.Scalar, VTMP1, Src1, Src2); + fcmgt(SubRegSize.Scalar, VTMP2, Src2, Src1); + break; + default: break; } // If the destination is a temporary then it is going to do an insert after the operation. // This means this operation can avoid a redundant insert in this case. @@ -701,24 +647,21 @@ DEF_OP(VFCMPScalarInsert) { constexpr auto Predicate = ARMEmitter::PReg::p0; ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1); mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z()); - } - else { + } else { ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0); } } }; - auto ScalarEmitNEQ = [this, SubRegSize, ZeroUpperBits, Is256Bit](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) { + auto ScalarEmitNEQ = [this, SubRegSize, ZeroUpperBits, Is256Bit](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, + ARMEmitter::VRegister Src2) { switch (SubRegSize.Scalar) { - case ARMEmitter::ScalarRegSize::i16Bit: { - fcmeq(VTMP1.H(), Src1.H(), Src2.H()); - break; - } - case ARMEmitter::ScalarRegSize::i32Bit: - case ARMEmitter::ScalarRegSize::i64Bit: - fcmeq(SubRegSize.Scalar, VTMP1, Src1, Src2); - break; - default: - break; + case ARMEmitter::ScalarRegSize::i16Bit: { + fcmeq(VTMP1.H(), Src1.H(), Src2.H()); + break; + } + case ARMEmitter::ScalarRegSize::i32Bit: + case ARMEmitter::ScalarRegSize::i64Bit: fcmeq(SubRegSize.Scalar, VTMP1, Src1, Src2); break; + default: break; } // If the destination is a temporary then it is going to do an insert after the operation. // This means this operation can avoid a redundant insert in this case. @@ -734,26 +677,25 @@ DEF_OP(VFCMPScalarInsert) { constexpr auto Predicate = ARMEmitter::PReg::p0; ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1); mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z()); - } - else { + } else { ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0); } } }; - auto ScalarEmitORD = [this, SubRegSize, ZeroUpperBits, Is256Bit](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) { + auto ScalarEmitORD = [this, SubRegSize, ZeroUpperBits, Is256Bit](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, + ARMEmitter::VRegister Src2) { switch (SubRegSize.Scalar) { - case ARMEmitter::ScalarRegSize::i16Bit: { - fcmge(VTMP1.H(), Src1.H(), Src2.H()); - fcmgt(VTMP2.H(), Src2.H(), Src1.H()); - break; - } - case ARMEmitter::ScalarRegSize::i32Bit: - case ARMEmitter::ScalarRegSize::i64Bit: - fcmge(SubRegSize.Scalar, VTMP1, Src1, Src2); - fcmgt(SubRegSize.Scalar, VTMP2, Src2, Src1); - break; - default: - break; + case ARMEmitter::ScalarRegSize::i16Bit: { + fcmge(VTMP1.H(), Src1.H(), Src2.H()); + fcmgt(VTMP2.H(), Src2.H(), Src1.H()); + break; + } + case ARMEmitter::ScalarRegSize::i32Bit: + case ARMEmitter::ScalarRegSize::i64Bit: + fcmge(SubRegSize.Scalar, VTMP1, Src1, Src2); + fcmgt(SubRegSize.Scalar, VTMP2, Src2, Src1); + break; + default: break; } // If the destination is a temporary then it is going to do an insert after the operation. // This means this operation can avoid a redundant insert in this case. @@ -769,8 +711,7 @@ DEF_OP(VFCMPScalarInsert) { constexpr auto Predicate = ARMEmitter::PReg::p0; ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1); mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z()); - } - else { + } else { ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0); } } @@ -804,17 +745,15 @@ DEF_OP(VectorZero) { mov_imm(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), 0); } else { switch (OpSize) { - case 8: { - movi(ARMEmitter::SubRegSize::i64Bit, Dst.D(), 0); - break; - } - case 16: { - movi(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), 0); - break; - } - default: - LOGMAN_MSG_A_FMT("Unknown Op Size: {}", OpSize); - break; + case 8: { + movi(ARMEmitter::SubRegSize::i64Bit, Dst.D(), 0); + break; + } + case 16: { + movi(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), 0); + break; + } + default: LOGMAN_MSG_A_FMT("Unknown Op Size: {}", OpSize); break; } } } @@ -827,11 +766,11 @@ DEF_OP(VectorImm) { const auto ElementSize = Op->Header.ElementSize; LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; const auto Dst = GetVReg(Node); @@ -841,8 +780,7 @@ DEF_OP(VectorImm) { // SVE dup uses sign extension where VectorImm wants zext LoadConstant(ARMEmitter::Size::i64Bit, TMP1, Op->Immediate); dup(SubRegSize, Dst.Z(), TMP1); - } - else { + } else { dup_imm(SubRegSize, Dst.Z(), static_cast(Op->Immediate)); } } else { @@ -850,8 +788,7 @@ DEF_OP(VectorImm) { // movi with 64bit element size doesn't do what we want here LoadConstant(ARMEmitter::Size::i64Bit, TMP1, static_cast(Op->Immediate) << Op->ShiftAmount); dup(SubRegSize, Dst.Q(), TMP1.R()); - } - else { + } else { movi(SubRegSize, Dst.Q(), Op->Immediate, Op->ShiftAmount); } } @@ -863,22 +800,18 @@ DEF_OP(LoadNamedVectorConstant) { const auto Dst = GetVReg(Node); switch (Op->Constant) { - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO: - movi(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), 0); - return; - default: - // Intentionally doing nothing. - break; + case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO: movi(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), 0); return; + default: + // Intentionally doing nothing. + break; } if (HostSupportsSVE128) { switch (Op->Constant) { - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_MOVMSKPS_SHIFT: - index(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), 0, 1); - return; - default: - // Intentionally doing nothing. - break; + case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_MOVMSKPS_SHIFT: index(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), 0, 1); return; + default: + // Intentionally doing nothing. + break; } } // Load the pointer. @@ -903,24 +836,12 @@ DEF_OP(LoadNamedVectorConstant) { auto MemOperand = GenerateMemOperand(OpSize, Op->Constant, STATE); switch (OpSize) { - case 1: - ldrb(Dst, MemOperand); - break; - case 2: - ldrh(Dst, MemOperand); - break; - case 4: - ldr(Dst.S(), MemOperand); - break; - case 8: - ldr(Dst.D(), MemOperand); - break; - case 16: - ldr(Dst.Q(), MemOperand); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, OpSize); - break; + case 1: ldrb(Dst, MemOperand); break; + case 2: ldrh(Dst, MemOperand); break; + case 4: ldr(Dst.S(), MemOperand); break; + case 8: ldr(Dst.D(), MemOperand); break; + case 16: ldr(Dst.Q(), MemOperand); break; + default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, OpSize); break; } } DEF_OP(LoadNamedVectorIndexedConstant) { @@ -933,29 +854,17 @@ DEF_OP(LoadNamedVectorIndexedConstant) { ldr(TMP1, STATE_PTR(CpuStateFrame, Pointers.Common.IndexedNamedVectorConstantPointers[Op->Constant])); switch (OpSize) { - case 1: - ldrb(Dst, TMP1, Op->Index); - break; - case 2: - ldrh(Dst, TMP1, Op->Index); - break; - case 4: - ldr(Dst.S(), TMP1, Op->Index); - break; - case 8: - ldr(Dst.D(), TMP1, Op->Index); - break; - case 16: - ldr(Dst.Q(), TMP1, Op->Index); - break; - case 32: { - add(ARMEmitter::Size::i64Bit, TMP1, TMP1, Op->Index); - ld1b(Dst.Z(), PRED_TMP_32B.Zeroing(), TMP1, 0); - break; - } - default: - LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, OpSize); - break; + case 1: ldrb(Dst, TMP1, Op->Index); break; + case 2: ldrh(Dst, TMP1, Op->Index); break; + case 4: ldr(Dst.S(), TMP1, Op->Index); break; + case 8: ldr(Dst.D(), TMP1, Op->Index); break; + case 16: ldr(Dst.Q(), TMP1, Op->Index); break; + case 32: { + add(ARMEmitter::Size::i64Bit, TMP1, TMP1, Op->Index); + ld1b(Dst.Z(), PRED_TMP_32B.Zeroing(), TMP1, 0); + break; + } + default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, OpSize); break; } } @@ -967,47 +876,45 @@ DEF_OP(VMov) { const auto Source = GetVReg(Op->Source.ID()); switch (OpSize) { - case 1: { - movi(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), 0); - ins(ARMEmitter::SubRegSize::i8Bit, VTMP1, 0, Source, 0); - mov(Dst.Q(), VTMP1.Q()); - break; - } - case 2: { - movi(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), 0); - ins(ARMEmitter::SubRegSize::i16Bit, VTMP1, 0, Source, 0); - mov(Dst.Q(), VTMP1.Q()); - break; - } - case 4: { - movi(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), 0); - ins(ARMEmitter::SubRegSize::i32Bit, VTMP1, 0, Source, 0); - mov(Dst.Q(), VTMP1.Q()); - break; - } - case 8: { - mov(Dst.D(), Source.D()); - break; - } - case 16: { - if (HostSupportsSVE256 || Dst.Idx() != Source.Idx()) { - mov(Dst.Q(), Source.Q()); - } - break; + case 1: { + movi(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), 0); + ins(ARMEmitter::SubRegSize::i8Bit, VTMP1, 0, Source, 0); + mov(Dst.Q(), VTMP1.Q()); + break; + } + case 2: { + movi(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), 0); + ins(ARMEmitter::SubRegSize::i16Bit, VTMP1, 0, Source, 0); + mov(Dst.Q(), VTMP1.Q()); + break; + } + case 4: { + movi(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), 0); + ins(ARMEmitter::SubRegSize::i32Bit, VTMP1, 0, Source, 0); + mov(Dst.Q(), VTMP1.Q()); + break; + } + case 8: { + mov(Dst.D(), Source.D()); + break; + } + case 16: { + if (HostSupportsSVE256 || Dst.Idx() != Source.Idx()) { + mov(Dst.Q(), Source.Q()); } - case 32: { - // NOTE: If, in the distant future we support larger moves, or registers - // (*cough* AVX-512 *cough*) make sure to change this to treat - // 256-bit moves with zero extending behavior instead of doing only - // a regular SVE move into a 512-bit register. - if (Dst.Idx() != Source.Idx()) { - mov(Dst.Z(), Source.Z()); - } - break; + break; + } + case 32: { + // NOTE: If, in the distant future we support larger moves, or registers + // (*cough* AVX-512 *cough*) make sure to change this to treat + // 256-bit moves with zero extending behavior instead of doing only + // a regular SVE move into a 512-bit register. + if (Dst.Idx() != Source.Idx()) { + mov(Dst.Z(), Source.Z()); } - default: - LOGMAN_MSG_A_FMT("Unknown Op Size: {}", OpSize); - break; + break; + } + default: LOGMAN_MSG_A_FMT("Unknown Op Size: {}", OpSize); break; } } @@ -1087,11 +994,11 @@ DEF_OP(VAdd) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { add(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); @@ -1112,11 +1019,11 @@ DEF_OP(VSub) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { sub(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); @@ -1137,11 +1044,11 @@ DEF_OP(VUQAdd) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { uqadd(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); @@ -1162,11 +1069,11 @@ DEF_OP(VUQSub) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { uqsub(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); @@ -1187,11 +1094,11 @@ DEF_OP(VSQAdd) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { sqadd(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); @@ -1212,11 +1119,11 @@ DEF_OP(VSQSub) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { sqsub(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); @@ -1238,11 +1145,11 @@ DEF_OP(VAddP) { const auto VectorUpper = GetVReg(Op->VectorUpper.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Pred = PRED_TMP_32B.Merging(); @@ -1279,11 +1186,12 @@ DEF_OP(VFAddV) { const auto Dst = GetVReg(Node); const auto Vector = GetVReg(Op->Vector.ID()); - LOGMAN_THROW_AA_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE || OpSize == Core::CPUState::XMM_AVX_REG_SIZE, "Only AVX and SSE size supported"); + LOGMAN_THROW_AA_FMT(OpSize == Core::CPUState::XMM_SSE_REG_SIZE || OpSize == Core::CPUState::XMM_AVX_REG_SIZE, "Only AVX and SSE size " + "supported"); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i64Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit); if (HostSupportsSVE256 && Is256Bit) { const auto Pred = PRED_TMP_32B.Merging(); @@ -1297,8 +1205,7 @@ DEF_OP(VFAddV) { if (ElementSize == 4) { faddp(SubRegSize.Vector, Dst.Q(), Vector.Q(), Vector.Q()); faddp(SubRegSize.Scalar, Dst, Vector); - } - else { + } else { faddp(SubRegSize.Scalar, Dst, Vector); } } @@ -1315,11 +1222,11 @@ DEF_OP(VAddV) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit); if (HostSupportsSVE256 && Is256Bit) { // SVE doesn't have an equivalent ADDV instruction, so we make do @@ -1339,8 +1246,7 @@ DEF_OP(VAddV) { } else { if (ElementSize == 8) { addp(SubRegSize.Scalar, Dst, Vector); - } - else { + } else { addv(SubRegSize.Vector, Dst.Q(), Vector.Q()); } } @@ -1357,11 +1263,11 @@ DEF_OP(VUMinV) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Pred = PRED_TMP_32B; @@ -1383,11 +1289,11 @@ DEF_OP(VUMaxV) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Pred = PRED_TMP_32B; @@ -1410,11 +1316,11 @@ DEF_OP(VURAvg) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); @@ -1448,11 +1354,11 @@ DEF_OP(VAbs) { const auto Src = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { abs(SubRegSize, Dst.Z(), PRED_TMP_32B.Merging(), Src.Z()); @@ -1478,30 +1384,29 @@ DEF_OP(VFAbs) { const auto Src = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { fabs(SubRegSize, Dst.Z(), PRED_TMP_32B.Merging(), Src.Z()); } else { if (ElementSize == OpSize) { switch (ElementSize) { - case 2: { - fabs(Dst.H(), Src.H()); - break; - } - case 4: { - fabs(Dst.S(), Src.S()); - break; - } - case 8: { - fabs(Dst.D(), Src.D()); - break; - } - default: - break; + case 2: { + fabs(Dst.H(), Src.H()); + break; + } + case 4: { + fabs(Dst.S(), Src.S()); + break; + } + case 8: { + fabs(Dst.D(), Src.D()); + break; + } + default: break; } } else { // Vector @@ -1523,11 +1428,11 @@ DEF_OP(VPopcount) { const auto Src = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Pred = PRED_TMP_32B.Merging(); @@ -1554,30 +1459,29 @@ DEF_OP(VFAdd) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { fadd(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); } else { if (IsScalar) { switch (ElementSize) { - case 2: { - fadd(Dst.H(), Vector1.H(), Vector2.H()); - break; - } - case 4: { - fadd(Dst.S(), Vector1.S(), Vector2.S()); - break; - } - case 8: { - fadd(Dst.D(), Vector1.D(), Vector2.D()); - break; - } - default: - break; + case 2: { + fadd(Dst.H(), Vector1.H(), Vector2.H()); + break; + } + case 4: { + fadd(Dst.S(), Vector1.S(), Vector2.S()); + break; + } + case 8: { + fadd(Dst.D(), Vector1.D(), Vector2.D()); + break; + } + default: break; } } else { fadd(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q()); @@ -1597,10 +1501,10 @@ DEF_OP(VFAddP) { const auto VectorUpper = GetVReg(Op->VectorUpper.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Pred = PRED_TMP_32B.Merging(); @@ -1636,30 +1540,29 @@ DEF_OP(VFSub) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { fsub(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); } else { if (IsScalar) { switch (ElementSize) { - case 2: { - fsub(Dst.H(), Vector1.H(), Vector2.H()); - break; - } - case 4: { - fsub(Dst.S(), Vector1.S(), Vector2.S()); - break; - } - case 8: { - fsub(Dst.D(), Vector1.D(), Vector2.D()); - break; - } - default: - break; + case 2: { + fsub(Dst.H(), Vector1.H(), Vector2.H()); + break; + } + case 4: { + fsub(Dst.S(), Vector1.S(), Vector2.S()); + break; + } + case 8: { + fsub(Dst.D(), Vector1.D(), Vector2.D()); + break; + } + default: break; } } else { fsub(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q()); @@ -1680,30 +1583,29 @@ DEF_OP(VFMul) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { fmul(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); } else { if (IsScalar) { switch (ElementSize) { - case 2: { - fmul(Dst.H(), Vector1.H(), Vector2.H()); - break; - } - case 4: { - fmul(Dst.S(), Vector1.S(), Vector2.S()); - break; - } - case 8: { - fmul(Dst.D(), Vector1.D(), Vector2.D()); - break; - } - default: - break; + case 2: { + fmul(Dst.H(), Vector1.H(), Vector2.H()); + break; + } + case 4: { + fmul(Dst.S(), Vector1.S(), Vector2.S()); + break; + } + case 8: { + fmul(Dst.D(), Vector1.D(), Vector2.D()); + break; + } + default: break; } } else { fmul(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q()); @@ -1724,10 +1626,10 @@ DEF_OP(VFDiv) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); @@ -1751,20 +1653,19 @@ DEF_OP(VFDiv) { } else { if (IsScalar) { switch (ElementSize) { - case 2: { - fdiv(Dst.H(), Vector1.H(), Vector2.H()); - break; - } - case 4: { - fdiv(Dst.S(), Vector1.S(), Vector2.S()); - break; - } - case 8: { - fdiv(Dst.D(), Vector1.D(), Vector2.D()); - break; - } - default: - break; + case 2: { + fdiv(Dst.H(), Vector1.H(), Vector2.H()); + break; + } + case 4: { + fdiv(Dst.S(), Vector1.S(), Vector2.S()); + break; + } + case 8: { + fdiv(Dst.D(), Vector1.D(), Vector2.D()); + break; + } + default: break; } } else { fdiv(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q()); @@ -1785,10 +1686,10 @@ DEF_OP(VFMin) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; // NOTE: We don't directly use FMIN here for any of the implementations, // because it has undesirable NaN handling behavior (it sets @@ -1810,10 +1711,9 @@ DEF_OP(VFMin) { // predicate bits from the second vector into the // same temporary. // 5. Move temporary into the destination register and we're done. - fcmgt(SubRegSize, ComparePred, Mask.Zeroing(), - Vector2.Z(), Vector1.Z()); + fcmgt(SubRegSize, ComparePred, Mask.Zeroing(), Vector2.Z(), Vector1.Z()); not_(ComparePred, Mask.Zeroing(), ComparePred); - + if (Dst == Vector1) { // Trivial case where Vector1 is also the destination. // We don't need to move any data around in this case (aside from the merge). @@ -1829,23 +1729,22 @@ DEF_OP(VFMin) { mrs(TMP1, ARMEmitter::SystemRegister::NZCV); switch (ElementSize) { - case 2: { - fcmp(Vector1.H(), Vector2.H()); - fcsel(Dst.H(), Vector1.H(), Vector2.H(), ARMEmitter::Condition::CC_MI); - break; - } - case 4: { - fcmp(Vector1.S(), Vector2.S()); - fcsel(Dst.S(), Vector1.S(), Vector2.S(), ARMEmitter::Condition::CC_MI); - break; - } - case 8: { - fcmp(Vector1.D(), Vector2.D()); - fcsel(Dst.D(), Vector1.D(), Vector2.D(), ARMEmitter::Condition::CC_MI); - break; - } - default: - break; + case 2: { + fcmp(Vector1.H(), Vector2.H()); + fcsel(Dst.H(), Vector1.H(), Vector2.H(), ARMEmitter::Condition::CC_MI); + break; + } + case 4: { + fcmp(Vector1.S(), Vector2.S()); + fcsel(Dst.S(), Vector1.S(), Vector2.S(), ARMEmitter::Condition::CC_MI); + break; + } + case 8: { + fcmp(Vector1.D(), Vector2.D()); + fcsel(Dst.D(), Vector1.D(), Vector2.D(), ARMEmitter::Condition::CC_MI); + break; + } + default: break; } // Restore NZCV @@ -1855,13 +1754,11 @@ DEF_OP(VFMin) { // Destination is already Vector1, need to insert Vector2 on false. fcmgt(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q()); bif(Dst.Q(), Vector2.Q(), VTMP1.Q()); - } - else if (Dst == Vector2) { + } else if (Dst == Vector2) { // Destination is already Vector2, Invert arguments and insert Vector1 on false. fcmgt(SubRegSize, VTMP1.Q(), Vector1.Q(), Vector2.Q()); bif(Dst.Q(), Vector1.Q(), VTMP1.Q()); - } - else { + } else { // Dst is not either source, need a move. fcmgt(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q()); mov(Dst.Q(), Vector1.Q()); @@ -1884,10 +1781,10 @@ DEF_OP(VFMax) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; // NOTE: See VFMin implementation for reasons why we // don't just use FMAX/FMIN for these implementations. @@ -1896,8 +1793,7 @@ DEF_OP(VFMax) { const auto Mask = PRED_TMP_32B; const auto ComparePred = ARMEmitter::PReg::p0; - fcmgt(SubRegSize, ComparePred, Mask.Zeroing(), - Vector2.Z(), Vector1.Z()); + fcmgt(SubRegSize, ComparePred, Mask.Zeroing(), Vector2.Z(), Vector1.Z()); if (Dst == Vector1) { // Trivial case where Vector1 is also the destination. @@ -1914,23 +1810,22 @@ DEF_OP(VFMax) { mrs(TMP1, ARMEmitter::SystemRegister::NZCV); switch (ElementSize) { - case 2: { - fcmp(Vector1.H(), Vector2.H()); - fcsel(Dst.H(), Vector2.H(), Vector1.H(), ARMEmitter::Condition::CC_MI); - break; - } - case 4: { - fcmp(Vector1.S(), Vector2.S()); - fcsel(Dst.S(), Vector2.S(), Vector1.S(), ARMEmitter::Condition::CC_MI); - break; - } - case 8: { - fcmp(Vector1.D(), Vector2.D()); - fcsel(Dst.D(), Vector2.D(), Vector1.D(), ARMEmitter::Condition::CC_MI); - break; - } - default: - break; + case 2: { + fcmp(Vector1.H(), Vector2.H()); + fcsel(Dst.H(), Vector2.H(), Vector1.H(), ARMEmitter::Condition::CC_MI); + break; + } + case 4: { + fcmp(Vector1.S(), Vector2.S()); + fcsel(Dst.S(), Vector2.S(), Vector1.S(), ARMEmitter::Condition::CC_MI); + break; + } + case 8: { + fcmp(Vector1.D(), Vector2.D()); + fcsel(Dst.D(), Vector2.D(), Vector1.D(), ARMEmitter::Condition::CC_MI); + break; + } + default: break; } // Restore NZCV @@ -1940,13 +1835,11 @@ DEF_OP(VFMax) { // Destination is already Vector1, need to insert Vector2 on true. fcmgt(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q()); bit(Dst.Q(), Vector2.Q(), VTMP1.Q()); - } - else if (Dst == Vector2) { + } else if (Dst == Vector2) { // Destination is already Vector2, Invert arguments and insert Vector1 on true. fcmgt(SubRegSize, VTMP1.Q(), Vector1.Q(), Vector2.Q()); bit(Dst.Q(), Vector1.Q(), VTMP1.Q()); - } - else { + } else { // Dst is not either source, need a move. fcmgt(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q()); mov(Dst.Q(), Vector1.Q()); @@ -1968,10 +1861,10 @@ DEF_OP(VFRecp) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i128Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i128Bit); if (HostSupportsSVE256 && Is256Bit) { const auto Pred = PRED_TMP_32B.Merging(); @@ -1995,28 +1888,26 @@ DEF_OP(VFRecp) { fmov(SubRegSize.Scalar, VTMP1.Q(), 1.0f); switch (ElementSize) { - case 2: { - fdiv(Dst.H(), VTMP1.H(), Vector.H()); - break; - } - case 4: { - fdiv(Dst.S(), VTMP1.S(), Vector.S()); - break; - } - case 8: { - fdiv(Dst.D(), VTMP1.D(), Vector.D()); - break; - } - default: - break; + case 2: { + fdiv(Dst.H(), VTMP1.H(), Vector.H()); + break; + } + case 4: { + fdiv(Dst.S(), VTMP1.S(), Vector.S()); + break; + } + case 8: { + fdiv(Dst.D(), VTMP1.D(), Vector.D()); + break; + } + default: break; } } else { if (ElementSize == 4 && HostSupportsRPRES) { // RPRES gives enough precision for this. if (OpSize == 8) { frecpe(SubRegSize.Vector, Dst.D(), Vector.D()); - } - else { + } else { frecpe(SubRegSize.Vector, Dst.Q(), Vector.Q()); } return; @@ -2040,10 +1931,10 @@ DEF_OP(VFSqrt) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Pred = PRED_TMP_32B.Merging(); @@ -2052,20 +1943,19 @@ DEF_OP(VFSqrt) { } else { if (IsScalar) { switch (ElementSize) { - case 2: { - fsqrt(Dst.H(), Vector.H()); - break; - } - case 4: { - fsqrt(Dst.S(), Vector.S()); - break; - } - case 8: { - fsqrt(Dst.D(), Vector.D()); - break; - } - default: - break; + case 2: { + fsqrt(Dst.H(), Vector.H()); + break; + } + case 4: { + fsqrt(Dst.S(), Vector.S()); + break; + } + case 8: { + fsqrt(Dst.D(), Vector.D()); + break; + } + default: break; } } else { fsqrt(SubRegSize, Dst.Q(), Vector.Q()); @@ -2085,10 +1975,10 @@ DEF_OP(VFRSqrt) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i128Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i128Bit); if (HostSupportsSVE256 && Is256Bit) { const auto Pred = PRED_TMP_32B.Merging(); @@ -2111,31 +2001,29 @@ DEF_OP(VFRSqrt) { fmov(SubRegSize.Scalar, VTMP1.Q(), 1.0); switch (ElementSize) { - case 2: { - fsqrt(VTMP2.H(), Vector.H()); - fdiv(Dst.H(), VTMP1.H(), VTMP2.H()); - break; - } - case 4: { - fsqrt(VTMP2.S(), Vector.S()); - fdiv(Dst.S(), VTMP1.S(), VTMP2.S()); - break; - } - case 8: { - fsqrt(VTMP2.D(), Vector.D()); - fdiv(Dst.D(), VTMP1.D(), VTMP2.D()); - break; - } - default: - break; + case 2: { + fsqrt(VTMP2.H(), Vector.H()); + fdiv(Dst.H(), VTMP1.H(), VTMP2.H()); + break; + } + case 4: { + fsqrt(VTMP2.S(), Vector.S()); + fdiv(Dst.S(), VTMP1.S(), VTMP2.S()); + break; + } + case 8: { + fsqrt(VTMP2.D(), Vector.D()); + fdiv(Dst.D(), VTMP1.D(), VTMP2.D()); + break; + } + default: break; } } else { if (ElementSize == 4 && HostSupportsRPRES) { // RPRES gives enough precision for this. if (OpSize == 8) { frsqrte(SubRegSize.Vector, Dst.D(), Vector.D()); - } - else { + } else { frsqrte(SubRegSize.Vector, Dst.Q(), Vector.Q()); } return; @@ -2156,14 +2044,14 @@ DEF_OP(VNeg) { const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; const auto Dst = GetVReg(Node); - const auto Vector= GetVReg(Op->Vector.ID()); + const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Pred = PRED_TMP_32B.Merging(); @@ -2184,10 +2072,10 @@ DEF_OP(VFNeg) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Pred = PRED_TMP_32B.Merging(); @@ -2225,11 +2113,11 @@ DEF_OP(VUMin) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i128Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i128Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Pred = PRED_TMP_32B.Merging(); @@ -2247,23 +2135,22 @@ DEF_OP(VUMin) { movprfx(Dst.Z(), Vector1.Z()); umin(SubRegSize, Dst.Z(), Pred, Dst.Z(), Vector2.Z()); } - } else { - switch (ElementSize) { - case 1: - case 2: - case 4: { - umin(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q()); - break; - } - case 8: { - cmhi(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q()); - mov(VTMP2.Q(), Vector1.Q()); - bif(VTMP2.Q(), Vector2.Q(), VTMP1.Q()); - mov(Dst.Q(), VTMP2.Q()); - break; - } - default: - break; + } else { + switch (ElementSize) { + case 1: + case 2: + case 4: { + umin(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q()); + break; + } + case 8: { + cmhi(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q()); + mov(VTMP2.Q(), Vector1.Q()); + bif(VTMP2.Q(), Vector2.Q(), VTMP1.Q()); + mov(Dst.Q(), VTMP2.Q()); + break; + } + default: break; } } } @@ -2280,11 +2167,11 @@ DEF_OP(VSMin) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i128Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i128Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Pred = PRED_TMP_32B.Merging(); @@ -2304,21 +2191,20 @@ DEF_OP(VSMin) { } } else { switch (ElementSize) { - case 1: - case 2: - case 4: { - smin(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q()); - break; - } - case 8: { - cmgt(SubRegSize, VTMP1.Q(), Vector1.Q(), Vector2.Q()); - mov(VTMP2.Q(), Vector1.Q()); - bif(VTMP2.Q(), Vector2.Q(), VTMP1.Q()); - mov(Dst.Q(), VTMP2.Q()); - break; - } - default: - break; + case 1: + case 2: + case 4: { + smin(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q()); + break; + } + case 8: { + cmgt(SubRegSize, VTMP1.Q(), Vector1.Q(), Vector2.Q()); + mov(VTMP2.Q(), Vector1.Q()); + bif(VTMP2.Q(), Vector2.Q(), VTMP1.Q()); + mov(Dst.Q(), VTMP2.Q()); + break; + } + default: break; } } } @@ -2335,11 +2221,11 @@ DEF_OP(VUMax) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i128Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i128Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Pred = PRED_TMP_32B.Merging(); @@ -2359,21 +2245,20 @@ DEF_OP(VUMax) { } } else { switch (ElementSize) { - case 1: - case 2: - case 4: { - umax(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q()); - break; - } - case 8: { - cmhi(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q()); - mov(VTMP2.Q(), Vector1.Q()); - bif(VTMP2.Q(), Vector2.Q(), VTMP1.Q()); - mov(Dst.Q(), VTMP2.Q()); - break; - } - default: - break; + case 1: + case 2: + case 4: { + umax(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q()); + break; + } + case 8: { + cmhi(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q()); + mov(VTMP2.Q(), Vector1.Q()); + bif(VTMP2.Q(), Vector2.Q(), VTMP1.Q()); + mov(Dst.Q(), VTMP2.Q()); + break; + } + default: break; } } } @@ -2390,11 +2275,11 @@ DEF_OP(VSMax) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i128Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i128Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Pred = PRED_TMP_32B.Merging(); @@ -2414,21 +2299,20 @@ DEF_OP(VSMax) { } } else { switch (ElementSize) { - case 1: - case 2: - case 4: { - smax(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q()); - break; - } - case 8: { - cmgt(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q()); - mov(VTMP2.Q(), Vector1.Q()); - bif(VTMP2.Q(), Vector2.Q(), VTMP1.Q()); - mov(Dst.Q(), VTMP2.Q()); - break; - } - default: - break; + case 1: + case 2: + case 4: { + smax(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q()); + break; + } + case 8: { + cmgt(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q()); + mov(VTMP2.Q(), Vector1.Q()); + bif(VTMP2.Q(), Vector2.Q(), VTMP1.Q()); + mov(Dst.Q(), VTMP2.Q()); + break; + } + default: break; } } } @@ -2445,11 +2329,11 @@ DEF_OP(VZip) { const auto VectorUpper = GetVReg(Op->VectorUpper.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { zip1(SubRegSize, Dst.Z(), VectorLower.Z(), VectorUpper.Z()); @@ -2474,11 +2358,11 @@ DEF_OP(VZip2) { const auto VectorUpper = GetVReg(Op->VectorUpper.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { zip2(SubRegSize, Dst.Z(), VectorLower.Z(), VectorUpper.Z()); @@ -2503,11 +2387,11 @@ DEF_OP(VUnZip) { const auto VectorUpper = GetVReg(Op->VectorUpper.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { uzp1(SubRegSize, Dst.Z(), VectorLower.Z(), VectorUpper.Z()); @@ -2532,11 +2416,11 @@ DEF_OP(VUnZip2) { const auto VectorUpper = GetVReg(Op->VectorUpper.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { uzp2(SubRegSize, Dst.Z(), VectorLower.Z(), VectorUpper.Z()); @@ -2561,11 +2445,11 @@ DEF_OP(VTrn) { const auto VectorUpper = GetVReg(Op->VectorUpper.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { trn1(SubRegSize, Dst.Z(), VectorLower.Z(), VectorUpper.Z()); @@ -2590,11 +2474,11 @@ DEF_OP(VTrn2) { const auto VectorUpper = GetVReg(Op->VectorUpper.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { trn2(SubRegSize, Dst.Z(), VectorLower.Z(), VectorUpper.Z()); @@ -2636,30 +2520,24 @@ DEF_OP(VBSL) { // Can use BSL without any moves. if (OpSize == 8) { bsl(Dst.D(), VectorTrue.D(), VectorFalse.D()); - } - else { + } else { bsl(Dst.Q(), VectorTrue.Q(), VectorFalse.Q()); } - } - else if (VectorTrue == Dst) { + } else if (VectorTrue == Dst) { // Can use BIF without any moves. if (OpSize == 8) { bif(Dst.D(), VectorFalse.D(), VectorMask.D()); - } - else { + } else { bif(Dst.Q(), VectorFalse.Q(), VectorMask.Q()); } - } - else if (VectorFalse == Dst) { + } else if (VectorFalse == Dst) { // Can use BIT without any moves. if (OpSize == 8) { bit(Dst.D(), VectorTrue.D(), VectorMask.D()); - } - else { + } else { bit(Dst.Q(), VectorTrue.Q(), VectorMask.Q()); } - } - else { + } else { // Needs moves. if (OpSize == 8) { mov(Dst.D(), VectorMask.D()); @@ -2685,11 +2563,11 @@ DEF_OP(VCMPEQ) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i128Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i128Bit); if (HostSupportsSVE256 && Is256Bit) { // FIXME: We should rework this op to avoid the NZCV spill/fill dance. @@ -2729,11 +2607,11 @@ DEF_OP(VCMPEQZ) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i128Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i128Bit); if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Zeroing(); @@ -2777,11 +2655,11 @@ DEF_OP(VCMPGT) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i128Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i128Bit); if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Zeroing(); @@ -2821,11 +2699,11 @@ DEF_OP(VCMPGTZ) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i128Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i128Bit); if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Zeroing(); @@ -2865,11 +2743,11 @@ DEF_OP(VCMPLTZ) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i128Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i128Bit); if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Zeroing(); @@ -2910,10 +2788,10 @@ DEF_OP(VFCMPEQ) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit); if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Zeroing(); @@ -2926,16 +2804,13 @@ DEF_OP(VFCMPEQ) { } else { if (IsScalar) { switch (ElementSize) { - case 2: { - fcmeq(Dst.H(), Vector1.H(), Vector2.H()); - break; - } - case 4: - case 8: - fcmeq(SubRegSize.Scalar, Dst, Vector1, Vector2); - break; - default: - break; + case 2: { + fcmeq(Dst.H(), Vector1.H(), Vector2.H()); + break; + } + case 4: + case 8: fcmeq(SubRegSize.Scalar, Dst, Vector1, Vector2); break; + default: break; } } else { fcmeq(SubRegSize.Vector, Dst.Q(), Vector1.Q(), Vector2.Q()); @@ -2956,10 +2831,10 @@ DEF_OP(VFCMPNEQ) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit); if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Zeroing(); @@ -2972,16 +2847,13 @@ DEF_OP(VFCMPNEQ) { } else { if (IsScalar) { switch (ElementSize) { - case 2: { - fcmeq(Dst.H(), Vector1.H(), Vector2.H()); - break; - } - case 4: - case 8: - fcmeq(SubRegSize.Scalar, Dst, Vector1, Vector2); - break; - default: - break; + case 2: { + fcmeq(Dst.H(), Vector1.H(), Vector2.H()); + break; + } + case 4: + case 8: fcmeq(SubRegSize.Scalar, Dst, Vector1, Vector2); break; + default: break; } mvn(ARMEmitter::SubRegSize::i8Bit, Dst.D(), Dst.D()); } else { @@ -2993,7 +2865,7 @@ DEF_OP(VFCMPNEQ) { DEF_OP(VFCMPLT) { const auto Op = IROp->C(); - const auto OpSize = IROp->Size; + const auto OpSize = IROp->Size; const auto ElementSize = Op->Header.ElementSize; const auto IsScalar = ElementSize == OpSize; @@ -3004,10 +2876,10 @@ DEF_OP(VFCMPLT) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit); if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Zeroing(); @@ -3020,16 +2892,13 @@ DEF_OP(VFCMPLT) { } else { if (IsScalar) { switch (ElementSize) { - case 2: { - fcmgt(Dst.H(), Vector2.H(), Vector1.H()); - break; - } - case 4: - case 8: - fcmgt(SubRegSize.Scalar, Dst, Vector2, Vector1); - break; - default: - break; + case 2: { + fcmgt(Dst.H(), Vector2.H(), Vector1.H()); + break; + } + case 4: + case 8: fcmgt(SubRegSize.Scalar, Dst, Vector2, Vector1); break; + default: break; } } else { fcmgt(SubRegSize.Vector, Dst.Q(), Vector2.Q(), Vector1.Q()); @@ -3050,10 +2919,10 @@ DEF_OP(VFCMPGT) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit); if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Zeroing(); @@ -3066,16 +2935,13 @@ DEF_OP(VFCMPGT) { } else { if (IsScalar) { switch (ElementSize) { - case 2: { - fcmgt(Dst.H(), Vector1.H(), Vector2.H()); - break; - } - case 4: - case 8: - fcmgt(SubRegSize.Scalar, Dst, Vector1, Vector2); - break; - default: - break; + case 2: { + fcmgt(Dst.H(), Vector1.H(), Vector2.H()); + break; + } + case 4: + case 8: fcmgt(SubRegSize.Scalar, Dst, Vector1, Vector2); break; + default: break; } } else { fcmgt(SubRegSize.Vector, Dst.Q(), Vector1.Q(), Vector2.Q()); @@ -3096,10 +2962,10 @@ DEF_OP(VFCMPLE) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit); if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Zeroing(); @@ -3112,16 +2978,13 @@ DEF_OP(VFCMPLE) { } else { if (IsScalar) { switch (ElementSize) { - case 2: { - fcmge(Dst.H(), Vector2.H(), Vector1.H()); - break; - } - case 4: - case 8: - fcmge(SubRegSize.Scalar, Dst, Vector2, Vector1); - break; - default: - break; + case 2: { + fcmge(Dst.H(), Vector2.H(), Vector1.H()); + break; + } + case 4: + case 8: fcmge(SubRegSize.Scalar, Dst, Vector2, Vector1); break; + default: break; } } else { fcmge(SubRegSize.Vector, Dst.Q(), Vector2.Q(), Vector1.Q()); @@ -3143,10 +3006,10 @@ DEF_OP(VFCMPORD) { LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Incorrect size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit); if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Zeroing(); @@ -3163,20 +3026,19 @@ DEF_OP(VFCMPORD) { } else { if (IsScalar) { switch (ElementSize) { - case 2: { - fcmge(VTMP1.H(), Vector1.H(), Vector2.H()); - fcmgt(VTMP2.H(), Vector2.H(), Vector1.H()); - orr(Dst.D(), VTMP1.D(), VTMP2.D()); - break; - } - case 4: - case 8: - fcmge(SubRegSize.Scalar, VTMP1, Vector1, Vector2); - fcmgt(SubRegSize.Scalar, VTMP2, Vector2, Vector1); - orr(Dst.D(), VTMP1.D(), VTMP2.D()); - break; - default: - break; + case 2: { + fcmge(VTMP1.H(), Vector1.H(), Vector2.H()); + fcmgt(VTMP2.H(), Vector2.H(), Vector1.H()); + orr(Dst.D(), VTMP1.D(), VTMP2.D()); + break; + } + case 4: + case 8: + fcmge(SubRegSize.Scalar, VTMP1, Vector1, Vector2); + fcmgt(SubRegSize.Scalar, VTMP2, Vector2, Vector1); + orr(Dst.D(), VTMP1.D(), VTMP2.D()); + break; + default: break; } } else { fcmge(SubRegSize.Vector, VTMP1.Q(), Vector1.Q(), Vector2.Q()); @@ -3200,10 +3062,10 @@ DEF_OP(VFCMPUNO) { LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Incorrect size"); - const auto SubRegSize = ARMEmitter::ToVectorSizePair( - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit); + const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit); if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Zeroing(); @@ -3216,22 +3078,21 @@ DEF_OP(VFCMPUNO) { } else { if (IsScalar) { switch (ElementSize) { - case 2: { - fcmge(VTMP1.H(), Vector1.H(), Vector2.H()); - fcmgt(VTMP2.H(), Vector2.H(), Vector1.H()); - orr(Dst.D(), VTMP1.D(), VTMP2.D()); - mvn(ARMEmitter::SubRegSize::i8Bit, Dst.D(), Dst.D()); - break; - } - case 4: - case 8: - fcmge(SubRegSize.Scalar, VTMP1, Vector1, Vector2); - fcmgt(SubRegSize.Scalar, VTMP2, Vector2, Vector1); - orr(Dst.D(), VTMP1.D(), VTMP2.D()); - mvn(ARMEmitter::SubRegSize::i8Bit, Dst.D(), Dst.D()); - break; - default: - break; + case 2: { + fcmge(VTMP1.H(), Vector1.H(), Vector2.H()); + fcmgt(VTMP2.H(), Vector2.H(), Vector1.H()); + orr(Dst.D(), VTMP1.D(), VTMP2.D()); + mvn(ARMEmitter::SubRegSize::i8Bit, Dst.D(), Dst.D()); + break; + } + case 4: + case 8: + fcmge(SubRegSize.Scalar, VTMP1, Vector1, Vector2); + fcmgt(SubRegSize.Scalar, VTMP2, Vector2, Vector1); + orr(Dst.D(), VTMP1.D(), VTMP2.D()); + mvn(ARMEmitter::SubRegSize::i8Bit, Dst.D(), Dst.D()); + break; + default: break; } } else { fcmge(SubRegSize.Vector, VTMP1.Q(), Vector1.Q(), Vector2.Q()); @@ -3256,11 +3117,11 @@ DEF_OP(VUShl) { const auto RangeCheck = Op->RangeCheck; LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); @@ -3316,11 +3177,11 @@ DEF_OP(VUShr) { const auto RangeCheck = Op->RangeCheck; LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); @@ -3379,11 +3240,11 @@ DEF_OP(VSShr) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); @@ -3440,11 +3301,11 @@ DEF_OP(VUShlS) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i128Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i128Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); @@ -3475,11 +3336,11 @@ DEF_OP(VUShrS) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i128Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i128Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); @@ -3511,10 +3372,10 @@ DEF_OP(VUShrSWide) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i64Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); @@ -3526,12 +3387,10 @@ DEF_OP(VUShrSWide) { } if (ElementSize == 8) { lsr(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP1.Z()); - } - else { + } else { lsr_wide(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP1.Z()); } - } - else if (HostSupportsSVE128) { + } else if (HostSupportsSVE128) { const auto Mask = PRED_TMP_16B.Merging(); auto ShiftRegister = ShiftScalar; @@ -3554,8 +3413,7 @@ DEF_OP(VUShrSWide) { } if (ElementSize == 8) { lsr(SubRegSize, Dst.Z(), Mask, Dst.Z(), ShiftRegister.Z()); - } - else { + } else { lsr_wide(SubRegSize, Dst.Z(), Mask, Dst.Z(), ShiftRegister.Z()); } } else { @@ -3584,10 +3442,10 @@ DEF_OP(VSShrSWide) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i64Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); @@ -3599,12 +3457,10 @@ DEF_OP(VSShrSWide) { } if (ElementSize == 8) { asr(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP1.Z()); - } - else { + } else { asr_wide(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP1.Z()); } - } - else if (HostSupportsSVE128) { + } else if (HostSupportsSVE128) { const auto Mask = PRED_TMP_16B.Merging(); auto ShiftRegister = ShiftScalar; @@ -3627,8 +3483,7 @@ DEF_OP(VSShrSWide) { } if (ElementSize == 8) { asr(SubRegSize, Dst.Z(), Mask, Dst.Z(), ShiftRegister.Z()); - } - else { + } else { asr_wide(SubRegSize, Dst.Z(), Mask, Dst.Z(), ShiftRegister.Z()); } } else { @@ -3657,10 +3512,10 @@ DEF_OP(VUShlSWide) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i64Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); @@ -3672,12 +3527,10 @@ DEF_OP(VUShlSWide) { } if (ElementSize == 8) { lsl(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP1.Z()); - } - else { + } else { lsl_wide(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP1.Z()); } - } - else if (HostSupportsSVE128) { + } else if (HostSupportsSVE128) { const auto Mask = PRED_TMP_16B.Merging(); auto ShiftRegister = ShiftScalar; @@ -3700,8 +3553,7 @@ DEF_OP(VUShlSWide) { } if (ElementSize == 8) { lsl(SubRegSize, Dst.Z(), Mask, Dst.Z(), ShiftRegister.Z()); - } - else { + } else { lsl_wide(SubRegSize, Dst.Z(), Mask, Dst.Z(), ShiftRegister.Z()); } } else { @@ -3729,13 +3581,13 @@ DEF_OP(VSShrS) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i128Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i128Bit; - if (HostSupportsSVE256 && Is256Bit) { + if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); // NOTE: SVE ASR is a destructive operation, so we need to @@ -3769,11 +3621,11 @@ DEF_OP(VInsElement) { if (HostSupportsSVE256 && Is256Bit) { LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i128Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i128Bit; // Broadcast our source value across a temporary, // then combine with the destination. @@ -3809,14 +3661,13 @@ DEF_OP(VInsElement) { // Restore NZCV msr(ARMEmitter::SystemRegister::NZCV, TMP1); } - } - else { + } else { LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; // If nothing aliases the destination, then we can just // move the DestVector over and directly insert. @@ -3859,19 +3710,18 @@ DEF_OP(VDupElement) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i128Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i128Bit; if (HostSupportsSVE256 && Is256Bit) { dup(SubRegSize, Dst.Z(), Vector.Z(), Index); } else { if (Is128Bit) { dup(SubRegSize, Dst.Q(), Vector.Q(), Index); - } - else { + } else { dup(SubRegSize, Dst.D(), Vector.D(), Index); } } @@ -3936,11 +3786,11 @@ DEF_OP(VUShrI) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (BitShift >= (ElementSize * 8)) { movi(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), 0); @@ -3985,22 +3835,20 @@ DEF_OP(VUShraI) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { if (Dst == DestVector) { usra(SubRegSize, Dst.Z(), Vector.Z(), BitShift); - } - else { + } else { if (Dst != Vector) { mov(Dst.Z(), DestVector.Z()); usra(SubRegSize, Dst.Z(), Vector.Z(), BitShift); - } - else { + } else { mov(VTMP1.Z(), DestVector.Z()); usra(SubRegSize, Dst.Z(), Vector.Z(), BitShift); mov(Dst.Z(), VTMP1.Z()); @@ -4009,13 +3857,11 @@ DEF_OP(VUShraI) { } else { if (Dst == DestVector) { usra(SubRegSize, Dst.Q(), Vector.Q(), BitShift); - } - else { + } else { if (Dst != Vector) { mov(Dst.Q(), DestVector.Q()); usra(SubRegSize, Dst.Q(), Vector.Q(), BitShift); - } - else { + } else { mov(VTMP1.Q(), DestVector.Q()); usra(SubRegSize, VTMP1.Q(), Vector.Q(), BitShift); mov(Dst.Q(), VTMP1.Q()); @@ -4036,11 +3882,11 @@ DEF_OP(VSShrI) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); @@ -4080,11 +3926,11 @@ DEF_OP(VShlI) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (BitShift >= (ElementSize * 8)) { @@ -4129,10 +3975,10 @@ DEF_OP(VUShrNI) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4, "Incorrect size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { shrnb(SubRegSize, Dst.Z(), Vector.Z(), BitShift); @@ -4155,10 +4001,10 @@ DEF_OP(VUShrNI2) { const auto VectorUpper = GetVReg(Op->VectorUpper.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_16B; @@ -4196,13 +4042,12 @@ DEF_OP(VSXTL) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Incorrect size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; - if ((HostSupportsSVE128 && !Is256Bit && !HostSupportsSVE256) || - (HostSupportsSVE256 && Is256Bit)) { + if ((HostSupportsSVE128 && !Is256Bit && !HostSupportsSVE256) || (HostSupportsSVE256 && Is256Bit)) { sunpklo(SubRegSize, Dst.Z(), Vector.Z()); } else { sxtl(SubRegSize, Dst.D(), Vector.D()); @@ -4220,13 +4065,12 @@ DEF_OP(VSXTL2) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Incorrect size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; - if ((HostSupportsSVE128 && !Is256Bit && !HostSupportsSVE256) || - (HostSupportsSVE256 && Is256Bit)) { + if ((HostSupportsSVE128 && !Is256Bit && !HostSupportsSVE256) || (HostSupportsSVE256 && Is256Bit)) { sunpkhi(SubRegSize, Dst.Z(), Vector.Z()); } else { sxtl2(SubRegSize, Dst.Q(), Vector.Q()); @@ -4244,13 +4088,12 @@ DEF_OP(VUXTL) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Incorrect size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; - if ((HostSupportsSVE128 && !Is256Bit && !HostSupportsSVE256) || - (HostSupportsSVE256 && Is256Bit)) { + if ((HostSupportsSVE128 && !Is256Bit && !HostSupportsSVE256) || (HostSupportsSVE256 && Is256Bit)) { uunpklo(SubRegSize, Dst.Z(), Vector.Z()); } else { uxtl(SubRegSize, Dst.D(), Vector.D()); @@ -4268,13 +4111,12 @@ DEF_OP(VUXTL2) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Incorrect size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; - if ((HostSupportsSVE128 && !Is256Bit && !HostSupportsSVE256) || - (HostSupportsSVE256 && Is256Bit)) { + if ((HostSupportsSVE128 && !Is256Bit && !HostSupportsSVE256) || (HostSupportsSVE256 && Is256Bit)) { uunpkhi(SubRegSize, Dst.Z(), Vector.Z()); } else { uxtl2(SubRegSize, Dst.Q(), Vector.Q()); @@ -4292,10 +4134,10 @@ DEF_OP(VSQXTN) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4, "Incorrect size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { // Note that SVE SQXTNB and SQXTNT are a tad different @@ -4350,10 +4192,10 @@ DEF_OP(VSQXTN2) { const auto VectorUpper = GetVReg(Op->VectorUpper.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { // We use the 16 byte mask due to how SPLICE works. We only @@ -4399,10 +4241,10 @@ DEF_OP(VSQXTNPair) { auto VectorUpper = GetVReg(Op->VectorUpper.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4, "Incorrect size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { // This combines the SVE versions of VSQXTN/VSQXTN2. @@ -4424,8 +4266,7 @@ DEF_OP(VSQXTNPair) { if (OpSize == 8) { zip1(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), VectorLower.Q(), VectorUpper.Q()); sqxtn(SubRegSize, Dst, Dst); - } - else { + } else { if (Dst == VectorUpper) { // If the destination overlaps the upper then we need to move it temporarily. mov(VTMP1.Q(), VectorUpper.Q()); @@ -4448,10 +4289,10 @@ DEF_OP(VSQXTUN) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { sqxtunb(SubRegSize, Dst.Z(), Vector.Z()); @@ -4473,10 +4314,10 @@ DEF_OP(VSQXTUN2) { const auto VectorUpper = GetVReg(Op->VectorUpper.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { // NOTE: See VSQXTN2 implementation for an in-depth explanation @@ -4524,10 +4365,10 @@ DEF_OP(VSQXTUNPair) { auto VectorUpper = GetVReg(Op->VectorUpper.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4, "Incorrect size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { // This combines the SVE versions of VSQXTUN/VSQXTUN2. @@ -4549,8 +4390,7 @@ DEF_OP(VSQXTUNPair) { if (OpSize == 8) { zip1(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), VectorLower.Q(), VectorUpper.Q()); sqxtun(SubRegSize, Dst, Dst); - } - else { + } else { if (Dst == VectorUpper) { // If the destination overlaps the upper then we need to move it temporarily. mov(VTMP1.Q(), VectorUpper.Q()); @@ -4574,11 +4414,11 @@ DEF_OP(VSRSHR) { const auto BitShift = Op->BitShift; LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); @@ -4591,8 +4431,7 @@ DEF_OP(VSRSHR) { } else { if (OpSize == 8) { srshr(SubRegSize, Dst.D(), Vector.D(), BitShift); - } - else { + } else { srshr(SubRegSize, Dst.Q(), Vector.Q(), BitShift); } } @@ -4610,11 +4449,11 @@ DEF_OP(VSQSHL) { const auto BitShift = Op->BitShift; LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); @@ -4627,8 +4466,7 @@ DEF_OP(VSQSHL) { } else { if (OpSize == 8) { sqshl(SubRegSize, Dst.D(), Vector.D(), BitShift); - } - else { + } else { sqshl(SubRegSize, Dst.Q(), Vector.Q(), BitShift); } } @@ -4646,11 +4484,11 @@ DEF_OP(VMul) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { mul(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); @@ -4671,10 +4509,10 @@ DEF_OP(VUMull) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { umullb(SubRegSize, VTMP1.Z(), Vector1.Z(), Vector2.Z()); @@ -4697,10 +4535,10 @@ DEF_OP(VSMull) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { smullb(SubRegSize, VTMP1.Z(), Vector1.Z(), Vector2.Z()); @@ -4723,10 +4561,10 @@ DEF_OP(VUMull2) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { umullb(SubRegSize, VTMP1.Z(), Vector1.Z(), Vector2.Z()); @@ -4749,10 +4587,10 @@ DEF_OP(VSMull2) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { smullb(SubRegSize, VTMP1.Z(), Vector1.Z(), Vector2.Z()); @@ -4776,46 +4614,40 @@ DEF_OP(VUMulH) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i64Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit; - const auto SubRegSizeLarger = - ElementSize == 1 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSizeLarger = ElementSize == 1 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { umulh(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); - } - else if (HostSupportsSVE128 && Is128Bit) { + } else if (HostSupportsSVE128 && Is128Bit) { if (HostSupportsSVE256) { // Do predicated to ensure upper-bits get zero as expected const auto Mask = PRED_TMP_16B.Merging(); if (Dst == Vector1) { umulh(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector2.Z()); - } - else if (Dst == Vector2) { + } else if (Dst == Vector2) { umulh(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector1.Z()); - } - else { + } else { // Destination register doesn't overlap either source. // NOTE: SVE umulh (predicated) is a destructive operation. movprfx(Dst.Z(), Vector1.Z()); umulh(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector2.Z()); } - } - else { + } else { umulh(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); } - } - else if (OpSize == 8) { + } else if (OpSize == 8) { umull(SubRegSizeLarger, Dst.D(), Vector1.D(), Vector2.D()); shrn(SubRegSize, Dst.D(), Dst.D(), ElementSize * 8); - } - else { + } else { // ASIMD doesn't have a umulh. Need to emulate. umull2(SubRegSizeLarger, VTMP1.Q(), Vector1.Q(), Vector2.Q()); umull(SubRegSizeLarger, Dst.D(), Vector1.D(), Vector2.D()); @@ -4836,46 +4668,40 @@ DEF_OP(VSMulH) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i64Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit; - const auto SubRegSizeLarger = - ElementSize == 1 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSizeLarger = ElementSize == 1 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { smulh(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); - } - else if (HostSupportsSVE128 && Is128Bit) { + } else if (HostSupportsSVE128 && Is128Bit) { if (HostSupportsSVE256) { // Do predicated to ensure upper-bits get zero as expected const auto Mask = PRED_TMP_16B.Merging(); if (Dst == Vector1) { smulh(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector2.Z()); - } - else if (Dst == Vector2) { + } else if (Dst == Vector2) { smulh(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector1.Z()); - } - else { + } else { // Destination register doesn't overlap either source. // NOTE: SVE umulh (predicated) is a destructive operation. movprfx(Dst.Z(), Vector1.Z()); smulh(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector2.Z()); } - } - else { + } else { smulh(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z()); } - } - else if (OpSize == 8) { + } else if (OpSize == 8) { smull(SubRegSizeLarger, Dst.D(), Vector1.D(), Vector2.D()); shrn(SubRegSize, Dst.D(), Dst.D(), ElementSize * 8); - } - else { + } else { // ASIMD doesn't have a umulh. Need to emulate. smull2(SubRegSizeLarger, VTMP1.Q(), Vector1.Q(), Vector2.Q()); smull(SubRegSizeLarger, Dst.D(), Vector1.D(), Vector2.D()); @@ -4895,10 +4721,10 @@ DEF_OP(VUABDL) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { // To mimic the behavior of AdvSIMD UABDL, we need to get the @@ -4926,10 +4752,10 @@ DEF_OP(VUABDL2) { const auto Vector2 = GetVReg(Op->Vector2.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : - ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { // To mimic the behavior of AdvSIMD UABDL, we need to get the @@ -4954,24 +4780,21 @@ DEF_OP(VTBL1) { const auto VectorTable = GetVReg(Op->VectorTable.ID()); switch (OpSize) { - case 8: { - tbl(Dst.D(), VectorTable.Q(), VectorIndices.D()); - break; - } - case 16: { - tbl(Dst.Q(), VectorTable.Q(), VectorIndices.Q()); - break; - } - case 32: { - LOGMAN_THROW_AA_FMT(HostSupportsSVE256, - "Host does not support SVE. Cannot perform 256-bit table lookup"); + case 8: { + tbl(Dst.D(), VectorTable.Q(), VectorIndices.D()); + break; + } + case 16: { + tbl(Dst.Q(), VectorTable.Q(), VectorIndices.Q()); + break; + } + case 32: { + LOGMAN_THROW_AA_FMT(HostSupportsSVE256, "Host does not support SVE. Cannot perform 256-bit table lookup"); - tbl(ARMEmitter::SubRegSize::i8Bit, Dst.Z(), VectorTable.Z(), VectorIndices.Z()); - break; - } - default: - LOGMAN_MSG_A_FMT("Unknown OpSize: {}", OpSize); - break; + tbl(ARMEmitter::SubRegSize::i8Bit, Dst.Z(), VectorTable.Z(), VectorIndices.Z()); + break; + } + default: LOGMAN_MSG_A_FMT("Unknown OpSize: {}", OpSize); break; } } @@ -4989,37 +4812,32 @@ DEF_OP(VTBL2) { if (OpSize == 32) { mov(VTMP1.Z(), VectorTable1.Z()); mov(VTMP2.Z(), VectorTable2.Z()); - } - else { + } else { mov(VTMP1.Q(), VectorTable1.Q()); mov(VTMP2.Q(), VectorTable2.Q()); } - static_assert(ARMEmitter::AreVectorsSequential(VTMP1, VTMP2), - "VTMP1 and VTMP2 must be sequential in order to use double-table TBL"); + static_assert(ARMEmitter::AreVectorsSequential(VTMP1, VTMP2), "VTMP1 and VTMP2 must be sequential in order to use double-table TBL"); VectorTable1 = VTMP1; VectorTable2 = VTMP2; } switch (OpSize) { - case 8: { - tbl(Dst.D(), VectorTable1.Q(), VectorTable2.Q(), VectorIndices.D()); - break; - } - case 16: { - tbl(Dst.Q(), VectorTable1.Q(), VectorTable2.Q(), VectorIndices.Q()); - break; - } - case 32: { - LOGMAN_THROW_AA_FMT(HostSupportsSVE256, - "Host does not support SVE. Cannot perform 256-bit table lookup"); + case 8: { + tbl(Dst.D(), VectorTable1.Q(), VectorTable2.Q(), VectorIndices.D()); + break; + } + case 16: { + tbl(Dst.Q(), VectorTable1.Q(), VectorTable2.Q(), VectorIndices.Q()); + break; + } + case 32: { + LOGMAN_THROW_AA_FMT(HostSupportsSVE256, "Host does not support SVE. Cannot perform 256-bit table lookup"); - tbl(ARMEmitter::SubRegSize::i8Bit, Dst.Z(), VectorTable1.Z(), VectorTable2.Z(), VectorIndices.Z()); - break; - } - default: - LOGMAN_MSG_A_FMT("Unknown OpSize: {}", OpSize); - break; + tbl(ARMEmitter::SubRegSize::i8Bit, Dst.Z(), VectorTable1.Z(), VectorTable2.Z(), VectorIndices.Z()); + break; + } + default: LOGMAN_MSG_A_FMT("Unknown OpSize: {}", OpSize); break; } } @@ -5034,50 +4852,44 @@ DEF_OP(VTBX1) { if (Dst != VectorSrcDst) { switch (OpSize) { - case 8: { - mov(VTMP1.D(), VectorSrcDst.D()); - tbx(VTMP1.D(), VectorTable.Q(), VectorIndices.D()); - mov(Dst.D(), VTMP1.D()); - break; - } - case 16: { - mov(VTMP1.Q(), VectorSrcDst.Q()); - tbx(VTMP1.Q(), VectorTable.Q(), VectorIndices.Q()); - mov(Dst.Q(), VTMP1.Q()); - break; - } - case 32: { - LOGMAN_THROW_AA_FMT(HostSupportsSVE256, - "Host does not support SVE. Cannot perform 256-bit table lookup"); - mov(VTMP1.Z(), VectorSrcDst.Z()); - tbx(ARMEmitter::SubRegSize::i8Bit, VTMP1.Z(), VectorTable.Z(), VectorIndices.Z()); - mov(Dst.Z(), VTMP1.Z()); - break; - } - default: - LOGMAN_MSG_A_FMT("Unknown OpSize: {}", OpSize); - break; + case 8: { + mov(VTMP1.D(), VectorSrcDst.D()); + tbx(VTMP1.D(), VectorTable.Q(), VectorIndices.D()); + mov(Dst.D(), VTMP1.D()); + break; + } + case 16: { + mov(VTMP1.Q(), VectorSrcDst.Q()); + tbx(VTMP1.Q(), VectorTable.Q(), VectorIndices.Q()); + mov(Dst.Q(), VTMP1.Q()); + break; + } + case 32: { + LOGMAN_THROW_AA_FMT(HostSupportsSVE256, "Host does not support SVE. Cannot perform 256-bit table lookup"); + mov(VTMP1.Z(), VectorSrcDst.Z()); + tbx(ARMEmitter::SubRegSize::i8Bit, VTMP1.Z(), VectorTable.Z(), VectorIndices.Z()); + mov(Dst.Z(), VTMP1.Z()); + break; + } + default: LOGMAN_MSG_A_FMT("Unknown OpSize: {}", OpSize); break; } } else { switch (OpSize) { - case 8: { - tbx(VectorSrcDst.D(), VectorTable.Q(), VectorIndices.D()); - break; - } - case 16: { - tbx(VectorSrcDst.Q(), VectorTable.Q(), VectorIndices.Q()); - break; - } - case 32: { - LOGMAN_THROW_AA_FMT(HostSupportsSVE256, - "Host does not support SVE. Cannot perform 256-bit table lookup"); + case 8: { + tbx(VectorSrcDst.D(), VectorTable.Q(), VectorIndices.D()); + break; + } + case 16: { + tbx(VectorSrcDst.Q(), VectorTable.Q(), VectorIndices.Q()); + break; + } + case 32: { + LOGMAN_THROW_AA_FMT(HostSupportsSVE256, "Host does not support SVE. Cannot perform 256-bit table lookup"); - tbx(ARMEmitter::SubRegSize::i8Bit, VectorSrcDst.Z(), VectorTable.Z(), VectorIndices.Z()); - break; - } - default: - LOGMAN_MSG_A_FMT("Unknown OpSize: {}", OpSize); - break; + tbx(ARMEmitter::SubRegSize::i8Bit, VectorSrcDst.Z(), VectorTable.Z(), VectorIndices.Z()); + break; + } + default: LOGMAN_MSG_A_FMT("Unknown OpSize: {}", OpSize); break; } } } @@ -5093,30 +4905,26 @@ DEF_OP(VRev32) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i16Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i16Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); switch (ElementSize) { - case 1: { - revb(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Mask, Vector.Z()); - break; - } - case 2: { - revh(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Mask, Vector.Z()); - break; - } - default: - LOGMAN_MSG_A_FMT("Invalid Element Size: {}", ElementSize); - break; + case 1: { + revb(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Mask, Vector.Z()); + break; + } + case 2: { + revh(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Mask, Vector.Z()); + break; + } + default: LOGMAN_MSG_A_FMT("Invalid Element Size: {}", ElementSize); break; } } else { if (OpSize == 8) { rev32(SubRegSize, Dst.D(), Vector.D()); - } - else { + } else { rev32(SubRegSize, Dst.Q(), Vector.Q()); } } @@ -5134,36 +4942,33 @@ DEF_OP(VRev64) { const auto Vector = GetVReg(Op->Vector.ID()); LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4, "Invalid size"); - const auto SubRegSize = - ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i8Bit; + const auto SubRegSize = ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit : + ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i8Bit; if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); switch (ElementSize) { - case 1: { - revb(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Vector.Z()); - break; - } - case 2: { - revh(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Vector.Z()); - break; - } - case 4: { - revw(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Vector.Z()); - break; - } - default: - LOGMAN_MSG_A_FMT("Invalid Element Size: {}", ElementSize); - break; + case 1: { + revb(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Vector.Z()); + break; + } + case 2: { + revh(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Vector.Z()); + break; + } + case 4: { + revw(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Vector.Z()); + break; + } + default: LOGMAN_MSG_A_FMT("Invalid Element Size: {}", ElementSize); break; } } else { if (OpSize == 8) { rev64(SubRegSize, Dst.D(), Vector.D()); - } - else { + } else { rev64(SubRegSize, Dst.Q(), Vector.Q()); } } @@ -5182,11 +4987,10 @@ DEF_OP(VFCADD) { LOGMAN_THROW_AA_FMT(ElementSize == 2 || ElementSize == 4 || ElementSize == 8, "Invalid size"); LOGMAN_THROW_A_FMT(Op->Rotate == 90 || Op->Rotate == 270, "Invalidate Rotate"); - const auto SubRegSize = - ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : - ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : ARMEmitter::SubRegSize::i64Bit; - const auto Rotate = - Op->Rotate == 90 ? ARMEmitter::Rotation::ROTATE_90 : ARMEmitter::Rotation::ROTATE_270; + const auto SubRegSize = ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit : + ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit : + ARMEmitter::SubRegSize::i64Bit; + const auto Rotate = Op->Rotate == 90 ? ARMEmitter::Rotation::ROTATE_90 : ARMEmitter::Rotation::ROTATE_270; if (HostSupportsSVE256 && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); @@ -5209,13 +5013,11 @@ DEF_OP(VFCADD) { } else { if (OpSize == 8) { fcadd(SubRegSize, Dst.D(), Vector1.D(), Vector2.D(), Rotate); - } - else { + } else { fcadd(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q(), Rotate); } } } #undef DEF_OP -} - +} // namespace FEXCore::CPU diff --git a/FEXCore/Source/Interface/Core/LookupCache.cpp b/FEXCore/Source/Interface/Core/LookupCache.cpp index 03c3123c9d..1deefdb7dc 100644 --- a/FEXCore/Source/Interface/Core/LookupCache.cpp +++ b/FEXCore/Source/Interface/Core/LookupCache.cpp @@ -13,8 +13,8 @@ desc: Stores information about blocks, and provides C++ implementations to looku #include "Interface/Core/LookupCache.h" namespace FEXCore { -LookupCache::LookupCache(FEXCore::Context::ContextImpl *CTX) - : BlockLinks_mbr { fextl::pmr::get_default_resource() } +LookupCache::LookupCache(FEXCore::Context::ContextImpl* CTX) + : BlockLinks_mbr {fextl::pmr::get_default_resource()} , ctx {CTX} { TotalCacheSize = ctx->Config.VirtualMemSize / 4096 * 8 + CODE_SIZE + L1_SIZE; @@ -78,5 +78,4 @@ void LookupCache::ClearCache() { BlockList.clear(); } -} - +} // namespace FEXCore diff --git a/FEXCore/Source/Interface/Core/ObjectCache/JobHandling.cpp b/FEXCore/Source/Interface/Core/ObjectCache/JobHandling.cpp index 9f5ea8eb0f..7b81cc1442 100644 --- a/FEXCore/Source/Interface/Core/ObjectCache/JobHandling.cpp +++ b/FEXCore/Source/Interface/Core/ObjectCache/JobHandling.cpp @@ -11,120 +11,112 @@ #include namespace FEXCore::CodeSerialize { - void AsyncJobHandler::AsyncAddNamedRegionJob(uintptr_t Base, uintptr_t Size, uintptr_t Offset, const fextl::string &filename) { +void AsyncJobHandler::AsyncAddNamedRegionJob(uintptr_t Base, uintptr_t Size, uintptr_t Offset, const fextl::string& filename) { #ifndef _WIN32 - // This function adds a named region *JOB* to our named region handler - // This needs to be as fast as possible to keep out of the way of the JIT + // This function adds a named region *JOB* to our named region handler + // This needs to be as fast as possible to keep out of the way of the JIT - const fextl::string BaseFilename = FHU::Filesystem::GetFilename(filename); + const fextl::string BaseFilename = FHU::Filesystem::GetFilename(filename); - if (!BaseFilename.empty()) { - // Create a new entry that once set up will be put in to our section object map - auto Entry = fextl::make_unique( - Base, - Size, - Offset, - filename, - NamedRegionHandler->DefaultCodeHeader(Base, Offset) - ); + if (!BaseFilename.empty()) { + // Create a new entry that once set up will be put in to our section object map + auto Entry = fextl::make_unique(Base, Size, Offset, filename, NamedRegionHandler->DefaultCodeHeader(Base, Offset)); - // Lock the job ref counter so we can block anything attempting to use the entry before it is loaded - Entry->NamedJobRefCountMutex.lock(); + // Lock the job ref counter so we can block anything attempting to use the entry before it is loaded + Entry->NamedJobRefCountMutex.lock(); - CodeRegionMapType::iterator EntryIterator; - { - std::unique_lock lk {CodeObjectCacheService->GetEntryMapMutex()}; - - auto &EntryMap = CodeObjectCacheService->GetEntryMap(); + CodeRegionMapType::iterator EntryIterator; + { + std::unique_lock lk {CodeObjectCacheService->GetEntryMapMutex()}; - auto it = EntryMap.emplace(Base, std::move(Entry)); - if (!it.second) { - // This happens when an application overwrites a previous region without unmapping what was there + auto& EntryMap = CodeObjectCacheService->GetEntryMap(); - // Lock this entry's Named job reference counter. - // Once this passes then we know that this section has been loaded. - it.first->second->NamedJobRefCountMutex.lock(); + auto it = EntryMap.emplace(Base, std::move(Entry)); + if (!it.second) { + // This happens when an application overwrites a previous region without unmapping what was there - // Finalize anything the region needs to do first. - CodeObjectCacheService->DoCodeRegionClosure(it.first->second->Base, it.first->second.get()); + // Lock this entry's Named job reference counter. + // Once this passes then we know that this section has been loaded. + it.first->second->NamedJobRefCountMutex.lock(); - // munmap the file that was mapped - FEXCore::Allocator::munmap(it.first->second->CodeData, it.first->second->FileSize); + // Finalize anything the region needs to do first. + CodeObjectCacheService->DoCodeRegionClosure(it.first->second->Base, it.first->second.get()); - // Remove this entry from the unrelocated map as well - { - std::unique_lock lk2 {CodeObjectCacheService->GetUnrelocatedEntryMapMutex()}; - CodeObjectCacheService->GetUnrelocatedEntryMap().erase(it.first->second->EntryHeader.OriginalBase); - } + // munmap the file that was mapped + FEXCore::Allocator::munmap(it.first->second->CodeData, it.first->second->FileSize); - // Now overwrite the entry in the map - it = EntryMap.insert_or_assign(Base, std::move(Entry)); - EntryIterator = it.first; - } - else { - // No overwrite, just insert - EntryIterator = it.first; + // Remove this entry from the unrelocated map as well + { + std::unique_lock lk2 {CodeObjectCacheService->GetUnrelocatedEntryMapMutex()}; + CodeObjectCacheService->GetUnrelocatedEntryMap().erase(it.first->second->EntryHeader.OriginalBase); } + + // Now overwrite the entry in the map + it = EntryMap.insert_or_assign(Base, std::move(Entry)); + EntryIterator = it.first; + } else { + // No overwrite, just insert + EntryIterator = it.first; } + } - // Now that this entry has been added to the map, we can insert a load job using the entry iterator. - // This allows us to quickly unblock the JIT thread when it is loading multiple regions and have the async thread - // do the loading for us. - // - // Create the async work queue job now so it can load - NamedRegionHandler->AsyncAddNamedRegionWorkItem(BaseFilename, filename, true, EntryIterator); + // Now that this entry has been added to the map, we can insert a load job using the entry iterator. + // This allows us to quickly unblock the JIT thread when it is loading multiple regions and have the async thread + // do the loading for us. + // + // Create the async work queue job now so it can load + NamedRegionHandler->AsyncAddNamedRegionWorkItem(BaseFilename, filename, true, EntryIterator); - // Tell the async thread that it has work to do - CodeObjectCacheService->NotifyWork(); - } -#endif + // Tell the async thread that it has work to do + CodeObjectCacheService->NotifyWork(); } +#endif +} - void AsyncJobHandler::AsyncRemoveNamedRegionJob(uintptr_t Base, uintptr_t Size) { +void AsyncJobHandler::AsyncRemoveNamedRegionJob(uintptr_t Base, uintptr_t Size) { #ifndef _WIN32 - // Removing a named region through the job system - // We need to find the entry that we are deleting first - fextl::unique_ptr EntryPointer; - { - std::unique_lock lk {CodeObjectCacheService->GetEntryMapMutex()}; + // Removing a named region through the job system + // We need to find the entry that we are deleting first + fextl::unique_ptr EntryPointer; + { + std::unique_lock lk {CodeObjectCacheService->GetEntryMapMutex()}; - auto &EntryMap = CodeObjectCacheService->GetEntryMap(); - auto it = EntryMap.find(Base); - if (it != EntryMap.end()) { - // Lock the job ref counter since we are erasing it - // Once this passes it will have been loaded - it->second->NamedJobRefCountMutex.lock(); + auto& EntryMap = CodeObjectCacheService->GetEntryMap(); + auto it = EntryMap.find(Base); + if (it != EntryMap.end()) { + // Lock the job ref counter since we are erasing it + // Once this passes it will have been loaded + it->second->NamedJobRefCountMutex.lock(); - // Take the pointer from the map - EntryPointer = std::move(it->second); + // Take the pointer from the map + EntryPointer = std::move(it->second); - // We can now unmap the file data - FEXCore::Allocator::munmap(EntryPointer->CodeData, EntryPointer->FileSize); + // We can now unmap the file data + FEXCore::Allocator::munmap(EntryPointer->CodeData, EntryPointer->FileSize); - // Remove this from the entry map - EntryMap.erase(it); + // Remove this from the entry map + EntryMap.erase(it); - // Remove this entry from the unrelocated map as well - { - std::unique_lock lk2 {CodeObjectCacheService->GetUnrelocatedEntryMapMutex()}; - CodeObjectCacheService->GetUnrelocatedEntryMap().erase(EntryPointer->EntryHeader.OriginalBase); - } - } - else { - // Tried to remove something that wasn't in our code object tracking - return; + // Remove this entry from the unrelocated map as well + { + std::unique_lock lk2 {CodeObjectCacheService->GetUnrelocatedEntryMapMutex()}; + CodeObjectCacheService->GetUnrelocatedEntryMap().erase(EntryPointer->EntryHeader.OriginalBase); } + } else { + // Tried to remove something that wasn't in our code object tracking + return; + } - // Create the async work queue job now so it can finalize what it needs to do - NamedRegionHandler->AsyncRemoveNamedRegionWorkItem(Base, Size, std::move(EntryPointer)); + // Create the async work queue job now so it can finalize what it needs to do + NamedRegionHandler->AsyncRemoveNamedRegionWorkItem(Base, Size, std::move(EntryPointer)); - // Tell the async thread that it has work to do - CodeObjectCacheService->NotifyWork(); - } -#endif + // Tell the async thread that it has work to do + CodeObjectCacheService->NotifyWork(); } +#endif +} - void AsyncJobHandler::AsyncAddSerializationJob(fextl::unique_ptr Data) { - // XXX: Actually add serialization job - } +void AsyncJobHandler::AsyncAddSerializationJob(fextl::unique_ptr Data) { + // XXX: Actually add serialization job } +} // namespace FEXCore::CodeSerialize diff --git a/FEXCore/Source/Interface/Core/ObjectCache/NamedRegionObjectHandler.cpp b/FEXCore/Source/Interface/Core/ObjectCache/NamedRegionObjectHandler.cpp index 3a9c116971..f2beb7d594 100644 --- a/FEXCore/Source/Interface/Core/ObjectCache/NamedRegionObjectHandler.cpp +++ b/FEXCore/Source/Interface/Core/ObjectCache/NamedRegionObjectHandler.cpp @@ -7,66 +7,67 @@ #include namespace FEXCore::CodeSerialize { - NamedRegionObjectHandler::NamedRegionObjectHandler(FEXCore::Context::ContextImpl *ctx) { - DefaultSerializationConfig.Cookie = CODE_COOKIE; +NamedRegionObjectHandler::NamedRegionObjectHandler(FEXCore::Context::ContextImpl* ctx) { + DefaultSerializationConfig.Cookie = CODE_COOKIE; - // Initialize the Arch from CPUID - uint32_t Arch = ctx->CPUID.RunFunction(0x4000'0001, 0).eax & 0xF; - DefaultSerializationConfig.Arch = Arch; + // Initialize the Arch from CPUID + uint32_t Arch = ctx->CPUID.RunFunction(0x4000'0001, 0).eax & 0xF; + DefaultSerializationConfig.Arch = Arch; - DefaultSerializationConfig.MaxInstPerBlock = ctx->Config.MaxInstPerBlock; - DefaultSerializationConfig.MultiBlock = ctx->Config.Multiblock; - DefaultSerializationConfig.TSOEnabled = ctx->Config.TSOEnabled; - DefaultSerializationConfig.ABILocalFlags = ctx->Config.ABILocalFlags; - DefaultSerializationConfig.ParanoidTSO = ctx->Config.ParanoidTSO; - DefaultSerializationConfig.Is64BitMode = ctx->Config.Is64BitMode; - DefaultSerializationConfig.SMCChecks = ctx->Config.SMCChecks; - DefaultSerializationConfig.x87ReducedPrecision = ctx->Config.x87ReducedPrecision; - } + DefaultSerializationConfig.MaxInstPerBlock = ctx->Config.MaxInstPerBlock; + DefaultSerializationConfig.MultiBlock = ctx->Config.Multiblock; + DefaultSerializationConfig.TSOEnabled = ctx->Config.TSOEnabled; + DefaultSerializationConfig.ABILocalFlags = ctx->Config.ABILocalFlags; + DefaultSerializationConfig.ParanoidTSO = ctx->Config.ParanoidTSO; + DefaultSerializationConfig.Is64BitMode = ctx->Config.Is64BitMode; + DefaultSerializationConfig.SMCChecks = ctx->Config.SMCChecks; + DefaultSerializationConfig.x87ReducedPrecision = ctx->Config.x87ReducedPrecision; +} - void NamedRegionObjectHandler::AddNamedRegionObject(CodeRegionMapType::iterator Entry, const fextl::string &base_filename, const fextl::string &filename, bool Executable) { - // XXX: Add named region objects +void NamedRegionObjectHandler::AddNamedRegionObject(CodeRegionMapType::iterator Entry, const fextl::string& base_filename, + const fextl::string& filename, bool Executable) { + // XXX: Add named region objects - // XXX: Until entry loading is complete just claim it is loaded - Entry->second->NamedJobRefCountMutex.unlock(); - } - - void NamedRegionObjectHandler::RemoveNamedRegionObject(uintptr_t Base, uintptr_t Size, fextl::unique_ptr Entry) { - // XXX: Remove named region objects + // XXX: Until entry loading is complete just claim it is loaded + Entry->second->NamedJobRefCountMutex.unlock(); +} - // XXX: Until entry loading is complete just claim it is loaded - Entry->NamedJobRefCountMutex.unlock(); - } +void NamedRegionObjectHandler::RemoveNamedRegionObject(uintptr_t Base, uintptr_t Size, fextl::unique_ptr Entry) { + // XXX: Remove named region objects - void NamedRegionObjectHandler::HandleNamedRegionObjectJobs() { - // Walk through all of our jobs sequentially until the work queue is empty - while (NamedWorkQueueJobs.load()) { - fextl::unique_ptr WorkItem; + // XXX: Until entry loading is complete just claim it is loaded + Entry->NamedJobRefCountMutex.unlock(); +} - { - // Lock the work queue mutex for a short moment and grab an item from the list - std::unique_lock lk {NamedWorkQueueMutex}; - size_t WorkItems = WorkQueue.size(); - if (WorkItems != 0) { - WorkItem = std::move(WorkQueue.front()); - WorkQueue.pop(); - } +void NamedRegionObjectHandler::HandleNamedRegionObjectJobs() { + // Walk through all of our jobs sequentially until the work queue is empty + while (NamedWorkQueueJobs.load()) { + fextl::unique_ptr WorkItem; - // Atomically update the number of jobs - --NamedWorkQueueJobs; + { + // Lock the work queue mutex for a short moment and grab an item from the list + std::unique_lock lk {NamedWorkQueueMutex}; + size_t WorkItems = WorkQueue.size(); + if (WorkItems != 0) { + WorkItem = std::move(WorkQueue.front()); + WorkQueue.pop(); } - if (WorkItem) { - if (WorkItem->GetType() == AsyncJobHandler::NamedRegionJobType::JOB_ADD_NAMED_REGION) { - auto WorkAdd = static_cast(WorkItem.get()); - AddNamedRegionObject(WorkAdd->Entry, WorkAdd->BaseFilename, WorkAdd->Filename, WorkAdd->Executable); - } + // Atomically update the number of jobs + --NamedWorkQueueJobs; + } + + if (WorkItem) { + if (WorkItem->GetType() == AsyncJobHandler::NamedRegionJobType::JOB_ADD_NAMED_REGION) { + auto WorkAdd = static_cast(WorkItem.get()); + AddNamedRegionObject(WorkAdd->Entry, WorkAdd->BaseFilename, WorkAdd->Filename, WorkAdd->Executable); + } - if (WorkItem->GetType() == AsyncJobHandler::NamedRegionJobType::JOB_REMOVE_NAMED_REGION) { - auto WorkRemove = static_cast(WorkItem.get()); - RemoveNamedRegionObject(WorkRemove->Base, WorkRemove->Size, std::move(WorkRemove->Entry)); - } + if (WorkItem->GetType() == AsyncJobHandler::NamedRegionJobType::JOB_REMOVE_NAMED_REGION) { + auto WorkRemove = static_cast(WorkItem.get()); + RemoveNamedRegionObject(WorkRemove->Base, WorkRemove->Size, std::move(WorkRemove->Entry)); } } } } +} // namespace FEXCore::CodeSerialize diff --git a/FEXCore/Source/Interface/Core/ObjectCache/ObjectCacheService.cpp b/FEXCore/Source/Interface/Core/ObjectCache/ObjectCacheService.cpp index 5cea7b7abd..e4aaef1272 100644 --- a/FEXCore/Source/Interface/Core/ObjectCache/ObjectCacheService.cpp +++ b/FEXCore/Source/Interface/Core/ObjectCache/ObjectCacheService.cpp @@ -6,80 +6,80 @@ #include namespace { - static void* ThreadHandler(void *Arg) { - FEXCore::CodeSerialize::CodeObjectSerializeService *This = reinterpret_cast(Arg); - This->ExecutionThread(); - return nullptr; - } +static void* ThreadHandler(void* Arg) { + FEXCore::CodeSerialize::CodeObjectSerializeService* This = reinterpret_cast(Arg); + This->ExecutionThread(); + return nullptr; } +} // namespace namespace FEXCore::CodeSerialize { - CodeObjectSerializeService::CodeObjectSerializeService(FEXCore::Context::ContextImpl *ctx) - : CTX {ctx} - , AsyncHandler { &NamedRegionHandler , this } - , NamedRegionHandler { ctx } { - Initialize(); - } +CodeObjectSerializeService::CodeObjectSerializeService(FEXCore::Context::ContextImpl* ctx) + : CTX {ctx} + , AsyncHandler {&NamedRegionHandler, this} + , NamedRegionHandler {ctx} { + Initialize(); +} - void CodeObjectSerializeService::Shutdown() { - if (CTX->Config.CacheObjectCodeCompilation() == FEXCore::Config::ConfigObjectCodeHandler::CONFIG_NONE) { - return; - } +void CodeObjectSerializeService::Shutdown() { + if (CTX->Config.CacheObjectCodeCompilation() == FEXCore::Config::ConfigObjectCodeHandler::CONFIG_NONE) { + return; + } - WorkerThreadShuttingDown = true; + WorkerThreadShuttingDown = true; - // Kick the working thread - WorkAvailable.NotifyAll(); + // Kick the working thread + WorkAvailable.NotifyAll(); - if (WorkerThread->joinable()) { - // Wait for worker thread to close down - WorkerThread->join(nullptr); - } + if (WorkerThread->joinable()) { + // Wait for worker thread to close down + WorkerThread->join(nullptr); } +} - void CodeObjectSerializeService::Initialize() { - // Add a canary so we don't crash on empty map iterator handling - auto it = AddressToEntryMap.insert_or_assign(~0ULL, fextl::make_unique()); - UnrelocatedAddressToEntryMap.insert_or_assign(~0ULL, it.first->second.get()); - - uint64_t OldMask = FEXCore::Threads::SetSignalMask(~0ULL); - WorkerThread = FEXCore::Threads::Thread::Create(ThreadHandler, this); - FEXCore::Threads::SetSignalMask(OldMask); - } +void CodeObjectSerializeService::Initialize() { + // Add a canary so we don't crash on empty map iterator handling + auto it = AddressToEntryMap.insert_or_assign(~0ULL, fextl::make_unique()); + UnrelocatedAddressToEntryMap.insert_or_assign(~0ULL, it.first->second.get()); - void CodeObjectSerializeService::DoCodeRegionClosure(uint64_t Base, CodeRegionEntry *it) { - if (Base == ~0ULL) { - // Don't do closure on canary - return; - } - // XXX: Do code region closure - } + uint64_t OldMask = FEXCore::Threads::SetSignalMask(~0ULL); + WorkerThread = FEXCore::Threads::Thread::Create(ThreadHandler, this); + FEXCore::Threads::SetSignalMask(OldMask); +} - CodeObjectFileSection const *CodeObjectSerializeService::FetchCodeObjectFromCache(uint64_t GuestRIP) { - // XXX: Actually fetch code objects from cache - return nullptr; +void CodeObjectSerializeService::DoCodeRegionClosure(uint64_t Base, CodeRegionEntry* it) { + if (Base == ~0ULL) { + // Don't do closure on canary + return; } + // XXX: Do code region closure +} - void CodeObjectSerializeService::ExecutionThread() { - // Set our thread name so we can see its relation - FEXCore::Threads::SetThreadName("ObjectCodeSeri\0"); - while (WorkerThreadShuttingDown.load() != true) { - // Wait for work - WorkAvailable.Wait(); +const CodeObjectFileSection* CodeObjectSerializeService::FetchCodeObjectFromCache(uint64_t GuestRIP) { + // XXX: Actually fetch code objects from cache + return nullptr; +} - // Handle named region async jobs first. Highest priority - NamedRegionHandler.HandleNamedRegionObjectJobs(); +void CodeObjectSerializeService::ExecutionThread() { + // Set our thread name so we can see its relation + FEXCore::Threads::SetThreadName("ObjectCodeSeri\0"); + while (WorkerThreadShuttingDown.load() != true) { + // Wait for work + WorkAvailable.Wait(); - // XXX: Handle code serialization jobs second. - } + // Handle named region async jobs first. Highest priority + NamedRegionHandler.HandleNamedRegionObjectJobs(); - // Do final code region closures on thread shutdown - for (auto &it : AddressToEntryMap) { - DoCodeRegionClosure(it.first, it.second.get()); - } + // XXX: Handle code serialization jobs second. + } - // Safely clear our maps now - AddressToEntryMap.clear(); - UnrelocatedAddressToEntryMap.clear(); + // Do final code region closures on thread shutdown + for (auto& it : AddressToEntryMap) { + DoCodeRegionClosure(it.first, it.second.get()); } + + // Safely clear our maps now + AddressToEntryMap.clear(); + UnrelocatedAddressToEntryMap.clear(); } +} // namespace FEXCore::CodeSerialize diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 400973acea..f0bfdceaad 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -41,25 +41,15 @@ void OpDispatchBuilder::SyscallOp(OpcodeArgs) { constexpr size_t SyscallArgs = 7; using SyscallArray = std::array; - size_t NumArguments{}; - const SyscallArray *GPRIndexes {}; + size_t NumArguments {}; + const SyscallArray* GPRIndexes {}; static constexpr SyscallArray GPRIndexes_64 = { - FEXCore::X86State::REG_RAX, - FEXCore::X86State::REG_RDI, - FEXCore::X86State::REG_RSI, - FEXCore::X86State::REG_RDX, - FEXCore::X86State::REG_R10, - FEXCore::X86State::REG_R8, - FEXCore::X86State::REG_R9, + FEXCore::X86State::REG_RAX, FEXCore::X86State::REG_RDI, FEXCore::X86State::REG_RSI, FEXCore::X86State::REG_RDX, + FEXCore::X86State::REG_R10, FEXCore::X86State::REG_R8, FEXCore::X86State::REG_R9, }; static constexpr SyscallArray GPRIndexes_32 = { - FEXCore::X86State::REG_RAX, - FEXCore::X86State::REG_RBX, - FEXCore::X86State::REG_RCX, - FEXCore::X86State::REG_RDX, - FEXCore::X86State::REG_RSI, - FEXCore::X86State::REG_RDI, - FEXCore::X86State::REG_RBP, + FEXCore::X86State::REG_RAX, FEXCore::X86State::REG_RBX, FEXCore::X86State::REG_RCX, FEXCore::X86State::REG_RDX, + FEXCore::X86State::REG_RSI, FEXCore::X86State::REG_RDI, FEXCore::X86State::REG_RBP, }; static constexpr SyscallArray GPRIndexes_Hangover = { @@ -67,12 +57,8 @@ void OpDispatchBuilder::SyscallOp(OpcodeArgs) { }; static constexpr SyscallArray GPRIndexes_Win64 = { - FEXCore::X86State::REG_RAX, - FEXCore::X86State::REG_R10, - FEXCore::X86State::REG_RDX, - FEXCore::X86State::REG_R8, - FEXCore::X86State::REG_R9, - FEXCore::X86State::REG_RSP, + FEXCore::X86State::REG_RAX, FEXCore::X86State::REG_R10, FEXCore::X86State::REG_RDX, + FEXCore::X86State::REG_R8, FEXCore::X86State::REG_R9, FEXCore::X86State::REG_RSP, }; SyscallFlags DefaultSyscallFlags = FEXCore::IR::SyscallFlags::DEFAULT; @@ -81,27 +67,22 @@ void OpDispatchBuilder::SyscallOp(OpcodeArgs) { if (OSABI == FEXCore::HLE::SyscallOSABI::OS_LINUX64) { NumArguments = GPRIndexes_64.size(); GPRIndexes = &GPRIndexes_64; - } - else if (OSABI == FEXCore::HLE::SyscallOSABI::OS_LINUX32) { + } else if (OSABI == FEXCore::HLE::SyscallOSABI::OS_LINUX32) { NumArguments = GPRIndexes_32.size(); GPRIndexes = &GPRIndexes_32; - } - else if (OSABI == FEXCore::HLE::SyscallOSABI::OS_WIN64) { + } else if (OSABI == FEXCore::HLE::SyscallOSABI::OS_WIN64) { NumArguments = 6; GPRIndexes = &GPRIndexes_Win64; DefaultSyscallFlags = FEXCore::IR::SyscallFlags::NORETURNEDRESULT; - } - else if (OSABI == FEXCore::HLE::SyscallOSABI::OS_WIN32) { + } else if (OSABI == FEXCore::HLE::SyscallOSABI::OS_WIN32) { // Since the whole context is going to be saved at entry anyway, theres no need to do additional work to pass in args NumArguments = 0; GPRIndexes = nullptr; DefaultSyscallFlags = FEXCore::IR::SyscallFlags::NORETURNEDRESULT; - } - else if (OSABI == FEXCore::HLE::SyscallOSABI::OS_HANGOVER) { + } else if (OSABI == FEXCore::HLE::SyscallOSABI::OS_HANGOVER) { NumArguments = 1; GPRIndexes = &GPRIndexes_Hangover; - } - else { + } else { LogMan::Msg::DFmt("Unhandled OSABI syscall"); } @@ -112,14 +93,8 @@ void OpDispatchBuilder::SyscallOp(OpcodeArgs) { auto NewRIP = GetRelocatedPC(Op, -Op->InstSize); _StoreContext(GPRSize, GPRClass, NewRIP, offsetof(FEXCore::Core::CPUState, rip)); - OrderedNode *Arguments[SyscallArgs] { - InvalidNode, - InvalidNode, - InvalidNode, - InvalidNode, - InvalidNode, - InvalidNode, - InvalidNode, + OrderedNode* Arguments[SyscallArgs] { + InvalidNode, InvalidNode, InvalidNode, InvalidNode, InvalidNode, InvalidNode, InvalidNode, }; for (size_t i = 0; i < NumArguments; ++i) { Arguments[i] = LoadGPRRegister(GPRIndexes->at(i)); @@ -139,15 +114,7 @@ void OpDispatchBuilder::SyscallOp(OpcodeArgs) { StoreGPRRegister(X86State::REG_RCX, RIPAfterInst, 8); } - auto SyscallOp = _Syscall( - Arguments[0], - Arguments[1], - Arguments[2], - Arguments[3], - Arguments[4], - Arguments[5], - Arguments[6], - DefaultSyscallFlags); + auto SyscallOp = _Syscall(Arguments[0], Arguments[1], Arguments[2], Arguments[3], Arguments[4], Arguments[5], Arguments[6], DefaultSyscallFlags); if (OSABI != FEXCore::HLE::SyscallOSABI::OS_HANGOVER && (DefaultSyscallFlags & FEXCore::IR::SyscallFlags::NORETURNEDRESULT) != FEXCore::IR::SyscallFlags::NORETURNEDRESULT) { @@ -169,27 +136,20 @@ void OpDispatchBuilder::ThunkOp(OpcodeArgs) { CalculateDeferredFlags(); const uint8_t GPRSize = CTX->GetGPRSize(); - uint8_t *sha256 = (uint8_t *)(Op->PC + 2); + uint8_t* sha256 = (uint8_t*)(Op->PC + 2); if (CTX->Config.Is64BitMode) { // x86-64 ABI puts the function argument in RDI - _Thunk( - LoadGPRRegister(X86State::REG_RDI), - *reinterpret_cast(sha256) - ); - } - else { + _Thunk(LoadGPRRegister(X86State::REG_RDI), *reinterpret_cast(sha256)); + } else { // x86 fastcall ABI puts the function argument in ECX - _Thunk( - LoadGPRRegister(X86State::REG_RCX), - *reinterpret_cast(sha256) - ); + _Thunk(LoadGPRRegister(X86State::REG_RCX), *reinterpret_cast(sha256)); } auto Constant = _Constant(GPRSize); auto OldSP = LoadGPRRegister(X86State::REG_RSP); auto NewRIP = _LoadMem(GPRClass, GPRSize, OldSP, GPRSize); - OrderedNode *NewSP = _Add(IR::SizeToOpSize(GPRSize), OldSP, Constant); + OrderedNode* NewSP = _Add(IR::SizeToOpSize(GPRSize), OldSP, Constant); // Store the new stack pointer StoreGPRRegister(X86State::REG_RSP, NewSP); @@ -205,8 +165,9 @@ void OpDispatchBuilder::LEAOp(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); if (CTX->Config.Is64BitMode) { - const uint32_t DstSize = X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0) == X86Tables::DecodeFlags::FLAG_OPERAND_SIZE_LAST ? 2 : - X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0) == X86Tables::DecodeFlags::FLAG_WIDENING_SIZE_LAST ? 8 : 4; + const uint32_t DstSize = X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0) == X86Tables::DecodeFlags::FLAG_OPERAND_SIZE_LAST ? 2 : + X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0) == X86Tables::DecodeFlags::FLAG_WIDENING_SIZE_LAST ? 8 : + 4; auto Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], SrcSize, Op->Flags, {.LoadData = false}); if (DstSize != SrcSize) { @@ -215,8 +176,7 @@ void OpDispatchBuilder::LEAOp(OpcodeArgs) { Src = _Bfe(IR::SizeToOpSize(GPRSize), SrcSize * 8, 0, Src); } StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Src, DstSize, -1); - } - else { + } else { uint32_t DstSize = X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0) == X86Tables::DecodeFlags::FLAG_OPERAND_SIZE_LAST ? 2 : 4; auto Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], SrcSize, Op->Flags, {.LoadData = false}); @@ -224,8 +184,7 @@ void OpDispatchBuilder::LEAOp(OpcodeArgs) { } } -void OpDispatchBuilder::NOPOp(OpcodeArgs) { -} +void OpDispatchBuilder::NOPOp(OpcodeArgs) {} void OpDispatchBuilder::RETOp(OpcodeArgs) { const uint8_t GPRSize = CTX->GetGPRSize(); @@ -235,8 +194,7 @@ void OpDispatchBuilder::RETOp(OpcodeArgs) { _InvalidateFlags(~0UL); // all flags // Deferred flags are invalidated now InvalidateDeferredFlags(); - } - else { + } else { // Calculate flags early. CalculateDeferredFlags(); } @@ -245,12 +203,11 @@ void OpDispatchBuilder::RETOp(OpcodeArgs) { auto OldSP = LoadGPRRegister(X86State::REG_RSP); auto NewRIP = _LoadMem(GPRClass, GPRSize, OldSP, GPRSize); - OrderedNode *NewSP; + OrderedNode* NewSP; if (Op->OP == 0xC2) { auto Offset = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); NewSP = _Add(IR::SizeToOpSize(GPRSize), _Add(IR::SizeToOpSize(GPRSize), OldSP, Constant), Offset); - } - else { + } else { NewSP = _Add(IR::SizeToOpSize(GPRSize), OldSP, Constant); } @@ -298,7 +255,7 @@ void OpDispatchBuilder::IRETOp(OpcodeArgs) { UpdatePrefixFromSegment(NewSegmentCS, FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX); SP = _Add(IR::SizeToOpSize(GPRSize), SP, Constant); - //eflags (lower 16 used) + // eflags (lower 16 used) auto eflags = _LoadMem(GPRClass, GPRSize, SP, GPRSize); SetPackedRFLAG(false, eflags); SP = _Add(IR::SizeToOpSize(GPRSize), SP, Constant); @@ -315,8 +272,7 @@ void OpDispatchBuilder::IRETOp(OpcodeArgs) { UpdatePrefixFromSegment(NewSegmentSS, FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX); _Add(IR::SizeToOpSize(GPRSize), SP, Constant); - } - else { + } else { // Store the stack in 32-bit mode StoreGPRRegister(X86State::REG_RSP, SP); } @@ -343,81 +299,69 @@ void OpDispatchBuilder::SecondaryALUOp(OpcodeArgs) { switch (Op->OP) { case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 0): case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 0): - case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 0): - IROp = FEXCore::IR::IROps::OP_ADD; - break; + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 0): IROp = FEXCore::IR::IROps::OP_ADD; break; case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 1): case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 1): - case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 1): - IROp = FEXCore::IR::IROps::OP_OR; - break; + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 1): IROp = FEXCore::IR::IROps::OP_OR; break; case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 4): case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 4): - case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 4): - IROp = FEXCore::IR::IROps::OP_ANDWITHFLAGS; - break; + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 4): IROp = FEXCore::IR::IROps::OP_ANDWITHFLAGS; break; case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 5): case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 5): - case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 5): - IROp = FEXCore::IR::IROps::OP_SUB; - break; + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 5): IROp = FEXCore::IR::IROps::OP_SUB; break; case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 6): case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 6): - case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 6): - IROp = FEXCore::IR::IROps::OP_XOR; - break; + case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 6): IROp = FEXCore::IR::IROps::OP_XOR; break; default: IROp = FEXCore::IR::IROps::OP_LAST; LOGMAN_MSG_A_FMT("Unknown ALU Op: 0x{:x}", Op->OP); - break; + break; }; #undef OPD // X86 basic ALU ops just do the operation between the destination and a single source uint8_t Size = GetDstSize(Op); auto Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); - OrderedNode *Result{}; - OrderedNode *Dest{}; + OrderedNode* Result {}; + OrderedNode* Dest {}; if (DestIsLockedMem(Op)) { HandledLock = true; auto DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); DestMem = AppendSegmentOffset(DestMem, Op->Flags); switch (IROp) { - case FEXCore::IR::IROps::OP_ADD: { - Dest = _AtomicFetchAdd(IR::SizeToOpSize(Size), Src, DestMem); - Result = _Add(IR::SizeToOpSize(std::max(4u, std::max(GetOpSize(Dest), GetOpSize(Src)))), Dest, Src); - break; - } - case FEXCore::IR::IROps::OP_SUB: { - Dest = _AtomicFetchSub(IR::SizeToOpSize(Size), Src, DestMem); - Result = _Sub(IR::SizeToOpSize(std::max(4u, std::max(GetOpSize(Dest), GetOpSize(Src)))), Dest, Src); - break; - } - case FEXCore::IR::IROps::OP_OR: { - Dest = _AtomicFetchOr(IR::SizeToOpSize(Size), Src, DestMem); - Result = _Or(IR::SizeToOpSize(std::max(4u, std::max(GetOpSize(Dest), GetOpSize(Src)))), Dest, Src); - break; - } - case FEXCore::IR::IROps::OP_ANDWITHFLAGS: { - Dest = _AtomicFetchAnd(IR::SizeToOpSize(Size), Src, DestMem); - Result = _AndWithFlags(IR::SizeToOpSize(std::max(GetOpSize(Dest), GetOpSize(Src))), Dest, Src); - break; - } - case FEXCore::IR::IROps::OP_XOR: { - Dest = _AtomicFetchXor(IR::SizeToOpSize(Size), Src, DestMem); - Result = _Xor(IR::SizeToOpSize(std::max(4u, std::max(GetOpSize(Dest), GetOpSize(Src)))), Dest, Src); - break; - } - default: - LOGMAN_MSG_A_FMT("Unknown Atomic IR Op: {}", ToUnderlying(IROp)); - break; + case FEXCore::IR::IROps::OP_ADD: { + Dest = _AtomicFetchAdd(IR::SizeToOpSize(Size), Src, DestMem); + Result = _Add(IR::SizeToOpSize(std::max(4u, std::max(GetOpSize(Dest), GetOpSize(Src)))), Dest, Src); + break; } - } - else { + case FEXCore::IR::IROps::OP_SUB: { + Dest = _AtomicFetchSub(IR::SizeToOpSize(Size), Src, DestMem); + Result = _Sub(IR::SizeToOpSize(std::max(4u, std::max(GetOpSize(Dest), GetOpSize(Src)))), Dest, Src); + break; + } + case FEXCore::IR::IROps::OP_OR: { + Dest = _AtomicFetchOr(IR::SizeToOpSize(Size), Src, DestMem); + Result = _Or(IR::SizeToOpSize(std::max(4u, std::max(GetOpSize(Dest), GetOpSize(Src)))), Dest, Src); + break; + } + case FEXCore::IR::IROps::OP_ANDWITHFLAGS: { + Dest = _AtomicFetchAnd(IR::SizeToOpSize(Size), Src, DestMem); + Result = _AndWithFlags(IR::SizeToOpSize(std::max(GetOpSize(Dest), GetOpSize(Src))), Dest, Src); + break; + } + case FEXCore::IR::IROps::OP_XOR: { + Dest = _AtomicFetchXor(IR::SizeToOpSize(Size), Src, DestMem); + Result = _Xor(IR::SizeToOpSize(std::max(4u, std::max(GetOpSize(Dest), GetOpSize(Src)))), Dest, Src); + break; + } + default: LOGMAN_MSG_A_FMT("Unknown Atomic IR Op: {}", ToUnderlying(IROp)); break; + } + } else { Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); - if (IROp != FEXCore::IR::IROps::OP_ANDWITHFLAGS) + if (IROp != FEXCore::IR::IROps::OP_ANDWITHFLAGS) { Size = std::max(4u, Size); + } DeriveOp(ALUOp, IROp, _AndWithFlags(IR::SizeToOpSize(Size), Dest, Src)); @@ -429,22 +373,18 @@ void OpDispatchBuilder::SecondaryALUOp(OpcodeArgs) { // Flags set { switch (IROp) { - case FEXCore::IR::IROps::OP_ADD: - GenerateFlags_ADD(Op, Result, Dest, Src); - break; - case FEXCore::IR::IROps::OP_SUB: - GenerateFlags_SUB(Op, Result, Dest, Src); - break; + case FEXCore::IR::IROps::OP_ADD: GenerateFlags_ADD(Op, Result, Dest, Src); break; + case FEXCore::IR::IROps::OP_SUB: GenerateFlags_SUB(Op, Result, Dest, Src); break; case FEXCore::IR::IROps::OP_XOR: case FEXCore::IR::IROps::OP_OR: { GenerateFlags_Logical(Op, Result, Dest, Src); - break; + break; } case FEXCore::IR::IROps::OP_ANDWITHFLAGS: { HandleNZ00Write(); CalculatePF(Result); _InvalidateFlags(1 << X86State::RFLAG_AF_RAW_LOC); - break; + break; } default: break; } @@ -456,30 +396,30 @@ void OpDispatchBuilder::ADCOp(OpcodeArgs) { // Calculate flags early. CalculateDeferredFlags(); - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags); uint8_t Size = GetDstSize(Op); const auto OpSize = IR::SizeToOpSize(std::max(4u, Size)); auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC); auto ALUOp = _Add(OpSize, Src, CF); - OrderedNode *Result{}; - OrderedNode *Before{}; + OrderedNode* Result {}; + OrderedNode* Before {}; if (DestIsLockedMem(Op)) { HandledLock = true; - OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); DestMem = AppendSegmentOffset(DestMem, Op->Flags); Before = _AtomicFetchAdd(IR::SizeToOpSize(Size), ALUOp, DestMem); Result = _Add(OpSize, Before, ALUOp); - } - else { + } else { Before = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); Result = _Add(OpSize, Before, ALUOp); StoreResult(GPRClass, Op, Result, -1); } - if (Size < 4) + if (Size < 4) { Result = _Bfe(IR::SizeToOpSize(std::max(4u, Size)), Size * 8, 0, Result); + } GenerateFlags_ADC(Op, Result, Before, Src, CF); } @@ -488,23 +428,22 @@ void OpDispatchBuilder::SBBOp(OpcodeArgs) { // Calculate flags early. CalculateDeferredFlags(); - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags); auto Size = GetDstSize(Op); const auto OpSize = IR::SizeToOpSize(std::max(4u, Size)); auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC); auto ALUOp = _Add(OpSize, Src, CF); - OrderedNode *Result{}; - OrderedNode *Before{}; + OrderedNode* Result {}; + OrderedNode* Before {}; if (DestIsLockedMem(Op)) { HandledLock = true; - OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); DestMem = AppendSegmentOffset(DestMem, Op->Flags); Before = _AtomicFetchSub(IR::SizeToOpSize(Size), ALUOp, DestMem); Result = _Sub(Size == 8 ? OpSize::i64Bit : OpSize::i32Bit, Before, ALUOp); - } - else { + } else { Before = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); Result = _Sub(Size == 8 ? OpSize::i64Bit : OpSize::i32Bit, Before, ALUOp); StoreResult(GPRClass, Op, Result, -1); @@ -521,7 +460,7 @@ void OpDispatchBuilder::SBBOp(OpcodeArgs) { void OpDispatchBuilder::PUSHOp(OpcodeArgs) { const uint8_t Size = GetSrcSize(Op); - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); auto OldSP = LoadGPRRegister(X86State::REG_RSP); @@ -535,8 +474,7 @@ void OpDispatchBuilder::PUSHOp(OpcodeArgs) { void OpDispatchBuilder::PUSHREGOp(OpcodeArgs) { const uint8_t Size = GetSrcSize(Op); - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Dest , Op->Flags, - {.AllowUpperGarbage = true}); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); auto OldSP = LoadGPRRegister(X86State::REG_RSP); const uint8_t GPRSize = CTX->GetGPRSize(); @@ -562,8 +500,8 @@ void OpDispatchBuilder::PUSHAOp(OpcodeArgs) { // push ESI // push EDI - OrderedNode *Src{}; - OrderedNode *NewSP = OldSP; + OrderedNode* Src {}; + OrderedNode* NewSP = OldSP; const uint8_t GPRSize = CTX->GetGPRSize(); Src = LoadGPRRegister(X86State::REG_RAX); @@ -601,51 +539,50 @@ void OpDispatchBuilder::PUSHSegmentOp(OpcodeArgs) { auto OldSP = LoadGPRRegister(X86State::REG_RSP); - OrderedNode *Src{}; + OrderedNode* Src {}; if (!CTX->Config.Is64BitMode()) { switch (SegmentReg) { - case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX: - Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, es_idx)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX: - Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, cs_idx)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX: - Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, ss_idx)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX: - Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, ds_idx)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX: - Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, fs_idx)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX: - Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, gs_idx)); - break; - default: break; // Do nothing + case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX: + Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, es_idx)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX: + Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, cs_idx)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX: + Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, ss_idx)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX: + Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, ds_idx)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX: + Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, fs_idx)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX: + Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, gs_idx)); + break; + default: break; // Do nothing } - } - else { + } else { switch (SegmentReg) { - case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX: - Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, es_cached)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX: - Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, cs_cached)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX: - Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, ss_cached)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX: - Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, ds_cached)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX: - Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, fs_cached)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX: - Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, gs_cached)); - break; - default: break; // Do nothing + case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX: + Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, es_cached)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX: + Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, cs_cached)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX: + Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, ss_cached)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX: + Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, ds_cached)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX: + Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, fs_cached)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX: + Src = _LoadContext(SrcSize, GPRClass, offsetof(FEXCore::Core::CPUState, gs_cached)); + break; + default: break; // Do nothing } } @@ -691,8 +628,8 @@ void OpDispatchBuilder::POPAOp(OpcodeArgs) { // pop ECX // pop EAX - OrderedNode *Src{}; - OrderedNode *NewSP = OldSP; + OrderedNode* Src {}; + OrderedNode* NewSP = OldSP; Src = _LoadMem(GPRClass, Size, NewSP, Size); StoreGPRRegister(X86State::REG_RDI, Src, Size); NewSP = _Add(OpSize::i64Bit, NewSP, Constant); @@ -740,25 +677,25 @@ void OpDispatchBuilder::POPSegmentOp(OpcodeArgs) { StoreGPRRegister(X86State::REG_RSP, NewSP); switch (SegmentReg) { - case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX: - _StoreContext(DstSize, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, es_idx)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX: - _StoreContext(DstSize, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, cs_idx)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX: - _StoreContext(DstSize, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, ss_idx)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX: - _StoreContext(DstSize, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, ds_idx)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX: - _StoreContext(DstSize, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, fs_idx)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX: - _StoreContext(DstSize, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, gs_idx)); - break; - default: break; // Do nothing + case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX: + _StoreContext(DstSize, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, es_idx)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX: + _StoreContext(DstSize, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, cs_idx)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX: + _StoreContext(DstSize, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, ss_idx)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX: + _StoreContext(DstSize, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, ds_idx)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX: + _StoreContext(DstSize, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, fs_idx)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX: + _StoreContext(DstSize, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, gs_idx)); + break; + default: break; // Do nothing } UpdatePrefixFromSegment(NewSegment, SegmentReg); @@ -791,17 +728,16 @@ void OpDispatchBuilder::CALLOp(OpcodeArgs) { _InvalidateFlags(~0UL); // all flags // Deferred flags are invalidated now InvalidateDeferredFlags(); - } - else { + } else { // Calculate flags early. CalculateDeferredFlags(); } auto ConstantPC = GetRelocatedPC(Op); - OrderedNode *JMPPCOffset = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* JMPPCOffset = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *NewRIP = _Add(IR::SizeToOpSize(GPRSize), ConstantPC, JMPPCOffset); + OrderedNode* NewRIP = _Add(IR::SizeToOpSize(GPRSize), ConstantPC, JMPPCOffset); // Push the return address. auto OldSP = LoadGPRRegister(X86State::REG_RSP); @@ -818,8 +754,7 @@ void OpDispatchBuilder::CALLOp(OpcodeArgs) { if (NextRIP != TargetRIP) { // Store the RIP _ExitFunction(NewRIP); // If we get here then leave the function now - } - else { + } else { NeedsBlockEnd = true; } } @@ -831,7 +766,7 @@ void OpDispatchBuilder::CALLAbsoluteOp(OpcodeArgs) { BlockSetRIP = true; const uint8_t Size = GetSrcSize(Op); - OrderedNode *JMPPCOffset = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* JMPPCOffset = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); auto ConstantPCReturn = GetRelocatedPC(Op); @@ -847,96 +782,92 @@ void OpDispatchBuilder::CALLAbsoluteOp(OpcodeArgs) { _ExitFunction(JMPPCOffset); // If we get here then leave the function now } -OrderedNode *OpDispatchBuilder::SelectBit(OrderedNode *Cmp, bool TrueIsNonzero, IR::OpSize ResultSize, OrderedNode *TrueValue, OrderedNode *FalseValue) { +OrderedNode* +OpDispatchBuilder::SelectBit(OrderedNode* Cmp, bool TrueIsNonzero, IR::OpSize ResultSize, OrderedNode* TrueValue, OrderedNode* FalseValue) { uint64_t TrueConst, FalseConst; - if (IsValueConstant(WrapNode(TrueValue), &TrueConst) && - IsValueConstant(WrapNode(FalseValue), &FalseConst) && - TrueConst == 1 && - FalseConst == 0) { + if (IsValueConstant(WrapNode(TrueValue), &TrueConst) && IsValueConstant(WrapNode(FalseValue), &FalseConst) && TrueConst == 1 && FalseConst == 0) { - if (!TrueIsNonzero) - Cmp = _Not(OpSize::i32Bit, Cmp); + if (!TrueIsNonzero) { + Cmp = _Not(OpSize::i32Bit, Cmp); + } - return _And(ResultSize, Cmp, _Constant(1)); + return _And(ResultSize, Cmp, _Constant(1)); } SaveNZCV(); _TestNZ(OpSize::i32Bit, Cmp, _Constant(1)); - return _NZCVSelect(ResultSize, - TrueIsNonzero ? CondClassType{COND_NEQ} : CondClassType{COND_EQ}, - TrueValue, FalseValue); + return _NZCVSelect(ResultSize, TrueIsNonzero ? CondClassType {COND_NEQ} : CondClassType {COND_EQ}, TrueValue, FalseValue); } std::pair OpDispatchBuilder::DecodeNZCVCondition(uint8_t OP) const { switch (OP) { - case 0x0: { // JO - Jump if OF == 1 - return {false, CondClassType{COND_FU}}; - } - case 0x1:{ // JNO - Jump if OF == 0 - return {false, CondClassType{COND_FNU}}; - } - case 0x2: { // JC - Jump if CF == 1 - return {false, CondClassType{COND_UGE}}; - } - case 0x3: { // JNC - Jump if CF == 0 - return {false, CondClassType{COND_ULT}}; - } - case 0x4: { // JE - Jump if ZF == 1 - return {false, CondClassType{COND_EQ}}; - } - case 0x5: { // JNE - Jump if ZF == 0 - return {false, CondClassType{COND_NEQ}}; - } - case 0x8: { // JS - Jump if SF == 1 - return {false, CondClassType{COND_MI}}; - } - case 0x9: { // JNS - Jump if SF == 0 - return {false, CondClassType{COND_PL}}; - } - case 0xC: { // SF <> OF - return {false, CondClassType{COND_SLT}}; - } - case 0xD: { // SF = OF - return {false, CondClassType{COND_SGE}}; - } - case 0xE: {// ZF = 1 || SF <> OF - return {false, CondClassType{COND_SLE}}; - } - case 0xF: {// ZF = 0 && SF = OF - return {false, CondClassType{COND_SGT}}; - } - default: - // Other conditions do not map directly, caller gets to deal with it. - return {true, CondClassType{0}}; + case 0x0: { // JO - Jump if OF == 1 + return {false, CondClassType {COND_FU}}; + } + case 0x1: { // JNO - Jump if OF == 0 + return {false, CondClassType {COND_FNU}}; + } + case 0x2: { // JC - Jump if CF == 1 + return {false, CondClassType {COND_UGE}}; + } + case 0x3: { // JNC - Jump if CF == 0 + return {false, CondClassType {COND_ULT}}; + } + case 0x4: { // JE - Jump if ZF == 1 + return {false, CondClassType {COND_EQ}}; + } + case 0x5: { // JNE - Jump if ZF == 0 + return {false, CondClassType {COND_NEQ}}; + } + case 0x8: { // JS - Jump if SF == 1 + return {false, CondClassType {COND_MI}}; + } + case 0x9: { // JNS - Jump if SF == 0 + return {false, CondClassType {COND_PL}}; + } + case 0xC: { // SF <> OF + return {false, CondClassType {COND_SLT}}; + } + case 0xD: { // SF = OF + return {false, CondClassType {COND_SGE}}; + } + case 0xE: { // ZF = 1 || SF <> OF + return {false, CondClassType {COND_SLE}}; + } + case 0xF: { // ZF = 0 && SF = OF + return {false, CondClassType {COND_SGT}}; + } + default: + // Other conditions do not map directly, caller gets to deal with it. + return {true, CondClassType {0}}; } } -OrderedNode *OpDispatchBuilder::SelectCC(uint8_t OP, IR::OpSize ResultSize, OrderedNode *TrueValue, OrderedNode *FalseValue) { +OrderedNode* OpDispatchBuilder::SelectCC(uint8_t OP, IR::OpSize ResultSize, OrderedNode* TrueValue, OrderedNode* FalseValue) { auto [Complex, Cond] = DecodeNZCVCondition(OP); - if (!Complex) - return _NZCVSelect(ResultSize, Cond, TrueValue, FalseValue); + if (!Complex) { + return _NZCVSelect(ResultSize, Cond, TrueValue, FalseValue); + } switch (OP) { - case 0x6: { // JNA - Jump if CF == 1 || ZC == 1 - // (A || B) ? C : D is equivalent to B ? C : (A ? C : D) - auto TMP = _NZCVSelect(ResultSize, CondClassType{COND_UGE}, TrueValue, FalseValue); - return _NZCVSelect(ResultSize, CondClassType{COND_EQ}, TrueValue, TMP); - } - case 0x7: { // JA - Jump if CF == 0 && ZF == 0 - // (A && B) ? C : D is equivalent to B ? (A ? C : D) : D - auto TMP = _NZCVSelect(ResultSize, CondClassType{COND_ULT}, TrueValue, FalseValue); - return _NZCVSelect(ResultSize, CondClassType{COND_NEQ}, TMP, FalseValue); - } - case 0xA: { // JP - Jump if PF == 1 - // Raw value contains inverted PF in bottom bit - return SelectBit(LoadPFRaw(), false, ResultSize, TrueValue, FalseValue); - } - case 0xB: { // JNP - Jump if PF == 0 - return SelectBit(LoadPFRaw(), true, ResultSize, TrueValue, FalseValue); - } - default: - LOGMAN_MSG_A_FMT("Unknown CC Op: 0x{:x}\n", OP); - return nullptr; + case 0x6: { // JNA - Jump if CF == 1 || ZC == 1 + // (A || B) ? C : D is equivalent to B ? C : (A ? C : D) + auto TMP = _NZCVSelect(ResultSize, CondClassType {COND_UGE}, TrueValue, FalseValue); + return _NZCVSelect(ResultSize, CondClassType {COND_EQ}, TrueValue, TMP); + } + case 0x7: { // JA - Jump if CF == 0 && ZF == 0 + // (A && B) ? C : D is equivalent to B ? (A ? C : D) : D + auto TMP = _NZCVSelect(ResultSize, CondClassType {COND_ULT}, TrueValue, FalseValue); + return _NZCVSelect(ResultSize, CondClassType {COND_NEQ}, TMP, FalseValue); + } + case 0xA: { // JP - Jump if PF == 1 + // Raw value contains inverted PF in bottom bit + return SelectBit(LoadPFRaw(), false, ResultSize, TrueValue, FalseValue); + } + case 0xB: { // JNP - Jump if PF == 0 + return SelectBit(LoadPFRaw(), true, ResultSize, TrueValue, FalseValue); + } + default: LOGMAN_MSG_A_FMT("Unknown CC Op: 0x{:x}\n", OP); return nullptr; } } @@ -959,12 +890,11 @@ void OpDispatchBuilder::CMOVOp(OpcodeArgs) { CalculateDeferredFlags(); // Destination is always a GPR. - OrderedNode *Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, GPRSize, Op->Flags); - OrderedNode *Src{}; + OrderedNode* Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, GPRSize, Op->Flags); + OrderedNode* Src {}; if (Op->Src[0].IsGPR()) { Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], GPRSize, Op->Flags); - } - else { + } else { Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); } @@ -990,8 +920,7 @@ void OpDispatchBuilder::CondJUMPOp(OpcodeArgs) { if (TargetOffset < 0 && -TargetOffset > InstRIP) { // Invert the signed value if we are underflowing TargetOffset = 0x1'0000'0000ULL + TargetOffset; - } - else if (TargetOffset >= 0 && Target >= 0x1'0000'0000ULL) { + } else if (TargetOffset >= 0 && Target >= 0x1'0000'0000ULL) { // We are overflowing, wrap around TargetOffset = TargetOffset - 0x1'0000'0000ULL; } @@ -1020,8 +949,7 @@ void OpDispatchBuilder::CondJUMPOp(OpcodeArgs) { // Taking branch block if (TrueBlock != JumpTargets.end()) { SetTrueJumpTarget(CondJump_, TrueBlock->second.BlockEntry); - } - else { + } else { // Make sure to start a new block after ending this one auto JumpTarget = CreateNewCodeBlockAtEnd(); SetTrueJumpTarget(CondJump_, JumpTarget); @@ -1037,8 +965,7 @@ void OpDispatchBuilder::CondJUMPOp(OpcodeArgs) { // Failure to take branch if (FalseBlock != JumpTargets.end()) { SetFalseJumpTarget(CondJump_, FalseBlock->second.BlockEntry); - } - else { + } else { // Make sure to start a new block after ending this one // Place it after this block for fallthrough optimization auto JumpTarget = CreateNewCodeBlockAfter(CurrentBlock); @@ -1072,7 +999,7 @@ void OpDispatchBuilder::CondJUMPRCXOp(OpcodeArgs) { uint64_t Target = Op->PC + Op->InstSize + Op->Src[0].Data.Literal.Value; - OrderedNode *CondReg = LoadGPRRegister(X86State::REG_RCX, JcxGPRSize); + OrderedNode* CondReg = LoadGPRRegister(X86State::REG_RCX, JcxGPRSize); auto TrueBlock = JumpTargets.find(Target); auto FalseBlock = JumpTargets.find(Op->PC + Op->InstSize); @@ -1085,8 +1012,7 @@ void OpDispatchBuilder::CondJUMPRCXOp(OpcodeArgs) { // Taking branch block if (TrueBlock != JumpTargets.end()) { SetTrueJumpTarget(CondJump_, TrueBlock->second.BlockEntry); - } - else { + } else { // Make sure to start a new block after ending this one auto JumpTarget = CreateNewCodeBlockAtEnd(); SetTrueJumpTarget(CondJump_, JumpTarget); @@ -1102,8 +1028,7 @@ void OpDispatchBuilder::CondJUMPRCXOp(OpcodeArgs) { // Failure to take branch if (FalseBlock != JumpTargets.end()) { SetFalseJumpTarget(CondJump_, FalseBlock->second.BlockEntry); - } - else { + } else { // Make sure to start a new block after ending this one // Place it after the current block for fallthrough behavior auto JumpTarget = CreateNewCodeBlockAfter(CurrentBlock); @@ -1129,9 +1054,10 @@ void OpDispatchBuilder::LoopOp(OpcodeArgs) { // If LOOPE then jumps to target if RCX != 0 && ZF == 1 // If LOOPNE then jumps to target if RCX != 0 && ZF == 0 - OrderedNode *AndCondWith = nullptr; - if (CheckZF) + OrderedNode* AndCondWith = nullptr; + if (CheckZF) { AndCondWith = GetRFLAG(FEXCore::X86State::RFLAG_ZF_RAW_LOC, !ZFTrue); + } BlockSetRIP = true; auto ZeroConst = _Constant(0); @@ -1146,15 +1072,15 @@ void OpDispatchBuilder::LoopOp(OpcodeArgs) { uint64_t Target = Op->PC + Op->InstSize + Op->Src[1].Data.Literal.Value; - OrderedNode *CondReg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* CondReg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); CondReg = _Sub(SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit, CondReg, _Constant(SrcSize * 8, 1)); StoreResult(GPRClass, Op, Op->Src[0], CondReg, -1); - SrcCond = _Select(FEXCore::IR::COND_NEQ, - CondReg, ZeroConst, TakeBranch, DoNotTakeBranch); + SrcCond = _Select(FEXCore::IR::COND_NEQ, CondReg, ZeroConst, TakeBranch, DoNotTakeBranch); - if (AndCondWith) + if (AndCondWith) { SrcCond = _And(OpSize::i64Bit, SrcCond, AndCondWith); + } CalculateDeferredFlags(); auto TrueBlock = JumpTargets.find(Target); @@ -1166,8 +1092,7 @@ void OpDispatchBuilder::LoopOp(OpcodeArgs) { // Taking branch block if (TrueBlock != JumpTargets.end()) { SetTrueJumpTarget(CondJump_, TrueBlock->second.BlockEntry); - } - else { + } else { // Make sure to start a new block after ending this one auto JumpTarget = CreateNewCodeBlockAtEnd(); SetTrueJumpTarget(CondJump_, JumpTarget); @@ -1183,8 +1108,7 @@ void OpDispatchBuilder::LoopOp(OpcodeArgs) { // Failure to take branch if (FalseBlock != JumpTargets.end()) { SetFalseJumpTarget(CondJump_, FalseBlock->second.BlockEntry); - } - else { + } else { // Make sure to start a new block after ending this one // Place after this block for fallthrough behavior auto JumpTarget = CreateNewCodeBlockAfter(GetCurrentBlock()); @@ -1218,8 +1142,7 @@ void OpDispatchBuilder::JUMPOp(OpcodeArgs) { if (TargetOffset < 0 && -TargetOffset > InstRIP) { // Invert the signed value if we are underflowing TargetOffset = 0x1'0000'0000ULL + TargetOffset; - } - else if (TargetOffset >= 0 && TargetRIP >= 0x1'0000'0000ULL) { + } else if (TargetOffset >= 0 && TargetRIP >= 0x1'0000'0000ULL) { // We are overflowing, wrap around TargetOffset = TargetOffset - 0x1'0000'0000ULL; } @@ -1233,8 +1156,7 @@ void OpDispatchBuilder::JUMPOp(OpcodeArgs) { auto JumpBlock = JumpTargets.find(TargetRIP); if (JumpBlock != JumpTargets.end()) { Jump(GetNewJumpBlock(TargetRIP)); - } - else { + } else { // If the block isn't a jump target then we need to create an exit block auto Jump_ = Jump(); @@ -1277,18 +1199,17 @@ template void OpDispatchBuilder::TESTOp(OpcodeArgs) { // TEST is an instruction that does an AND between the sources // Result isn't stored in result, only writes to flags - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags, - {.AllowUpperGarbage = true}); - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, - {.AllowUpperGarbage = true}); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); auto Size = GetDstSize(Op); // Optimize out masking constants uint64_t Const; if (IsValueConstant(WrapNode(Src), &Const)) { - if (Const == (Size == 8 ? ~0ULL : ((1ull << Size * 8) - 1))) + if (Const == (Size == 8 ? ~0ULL : ((1ull << Size * 8) - 1))) { Src = Dest; + } } HandleNZ00Write(); @@ -1308,17 +1229,15 @@ void OpDispatchBuilder::MOVSXDOp(OpcodeArgs) { // uint8_t Size = std::min(static_cast(4), GetSrcSize(Op)); - OrderedNode *Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], Size, Op->Flags); + OrderedNode* Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], Size, Op->Flags); if (Size == 2) { // This'll make sure to insert in to the lower 16bits without modifying upper bits StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Src, Size, -1); - } - else if (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REX_WIDENING) { + } else if (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REX_WIDENING) { // With REX.W then Sext Src = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src); StoreResult(GPRClass, Op, Src, -1); - } - else { + } else { // Without REX.W then Zext (store result implicitly zero extends) StoreResult(GPRClass, Op, Src, -1); } @@ -1328,13 +1247,13 @@ void OpDispatchBuilder::MOVSXOp(OpcodeArgs) { // This will ZExt the loaded size // We want to Sext it uint8_t Size = GetSrcSize(Op); - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); Src = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src); StoreResult(GPRClass, Op, Op->Dest, Src, -1); } void OpDispatchBuilder::MOVZXOp(OpcodeArgs) { - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); // Store result implicitly zero extends StoreResult(GPRClass, Op, Src, -1); } @@ -1344,28 +1263,26 @@ void OpDispatchBuilder::CMPOp(OpcodeArgs) { // CMP is an instruction that does a SUB between the sources // Result isn't stored in result, only writes to flags auto Size = GetDstSize(Op); - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags, {.AllowUpperGarbage = true}); - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); auto ALUOp = _Sub(Size == 8 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src); - OrderedNode *Result = ALUOp; + OrderedNode* Result = ALUOp; GenerateFlags_SUB(Op, Result, Dest, Src); } void OpDispatchBuilder::CQOOp(OpcodeArgs) { - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); auto Size = GetSrcSize(Op); - OrderedNode *Upper = _Sbfe(OpSize::i64Bit, 1, Size * 8 - 1, Src); + OrderedNode* Upper = _Sbfe(OpSize::i64Bit, 1, Size * 8 - 1, Src); StoreResult(GPRClass, Op, Upper, -1); } void OpDispatchBuilder::XCHGOp(OpcodeArgs) { // Load both the source and the destination - if (Op->OP == 0x90 && - GetSrcSize(Op) >= 4 && - Op->Src[0].IsGPR() && Op->Src[0].Data.GPR.GPR == FEXCore::X86State::REG_RAX && + if (Op->OP == 0x90 && GetSrcSize(Op) >= 4 && Op->Src[0].IsGPR() && Op->Src[0].Data.GPR.GPR == FEXCore::X86State::REG_RAX && Op->Dest.IsGPR() && Op->Dest.Data.GPR.GPR == FEXCore::X86State::REG_RAX) { // This is one heck of a sucky special case // If we are the 0x90 XCHG opcode (Meaning source is GPR RAX) @@ -1386,20 +1303,18 @@ void OpDispatchBuilder::XCHGOp(OpcodeArgs) { } // AllowUpperGarbage: OK to allow as it will be overwritten by StoreResult. - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, - {.AllowUpperGarbage = true}); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); if (DestIsMem(Op)) { HandledLock = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK; - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); Dest = AppendSegmentOffset(Dest, Op->Flags); auto Result = _AtomicSwap(OpSizeFromSrc(Op), Src, Dest); StoreResult(GPRClass, Op, Op->Src[0], Result, -1); - } - else { + } else { // AllowUpperGarbage: OK to allow as it will be overwritten by StoreResult. - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); // Swap the contents // Order matters here since we don't want to swap context contents for one that effects the other @@ -1411,17 +1326,16 @@ void OpDispatchBuilder::XCHGOp(OpcodeArgs) { void OpDispatchBuilder::CDQOp(OpcodeArgs) { uint8_t DstSize = GetDstSize(Op); uint8_t SrcSize = DstSize >> 1; - OrderedNode *Src = LoadGPRRegister(X86State::REG_RAX, SrcSize, 0, true); + OrderedNode* Src = LoadGPRRegister(X86State::REG_RAX, SrcSize, 0, true); - Src = _Sbfe(DstSize <= 4 ? OpSize::i32Bit : OpSize::i64Bit, SrcSize * 8, 0, - Src); + Src = _Sbfe(DstSize <= 4 ? OpSize::i32Bit : OpSize::i64Bit, SrcSize * 8, 0, Src); StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Src, DstSize, -1); } void OpDispatchBuilder::SAHFOp(OpcodeArgs) { // Extract AH - OrderedNode *Src = LoadGPRRegister(X86State::REG_RAX, 1, 8); + OrderedNode* Src = LoadGPRRegister(X86State::REG_RAX, 1, 8); // Clear bits that aren't supposed to be set Src = _Andn(OpSize::i64Bit, Src, _Constant(0b101000)); @@ -1447,19 +1361,19 @@ void OpDispatchBuilder::FLAGControlOp(OpcodeArgs) { switch (Op->OP) { case 0xF5: // CMC CarryInvert(); - break; + break; case 0xF8: // CLC SetRFLAG(_Constant(0), FEXCore::X86State::RFLAG_CF_RAW_LOC); - break; + break; case 0xF9: // STC SetRFLAG(_Constant(1), FEXCore::X86State::RFLAG_CF_RAW_LOC); - break; + break; case 0xFC: // CLD SetRFLAG(_Constant(0), FEXCore::X86State::RFLAG_DF_LOC); - break; + break; case 0xFD: // STD SetRFLAG(_Constant(1), FEXCore::X86State::RFLAG_DF_LOC); - break; + break; } } @@ -1482,108 +1396,104 @@ void OpDispatchBuilder::MOVSegOp(OpcodeArgs) { // The loads here also load the selector, NOT the base if constexpr (ToSeg) { - OrderedNode *Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], 2, Op->Flags); + OrderedNode* Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], 2, Op->Flags); switch (Op->Dest.Data.GPR.GPR) { - case FEXCore::X86State::REG_RAX: // ES - case FEXCore::X86State::REG_R8: // ES - _StoreContext(2, GPRClass, Src, offsetof(FEXCore::Core::CPUState, es_idx)); - UpdatePrefixFromSegment(Src, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX); - break; - case FEXCore::X86State::REG_RBX: // DS - case FEXCore::X86State::REG_R11: // DS - _StoreContext(2, GPRClass, Src, offsetof(FEXCore::Core::CPUState, ds_idx)); - UpdatePrefixFromSegment(Src, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX); - break; - case FEXCore::X86State::REG_RCX: // CS - case FEXCore::X86State::REG_R9: // CS - // CPL3 can't write to this - _Break(FEXCore::IR::BreakDefinition { - .ErrorRegister = 0, - .Signal = SIGILL, - .TrapNumber = 0, - .si_code = 0, - }); - break; - case FEXCore::X86State::REG_RDX: // SS - case FEXCore::X86State::REG_R10: // SS - _StoreContext(2, GPRClass, Src, offsetof(FEXCore::Core::CPUState, ss_idx)); - UpdatePrefixFromSegment(Src, FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX); - break; - case FEXCore::X86State::REG_RBP: // GS - case FEXCore::X86State::REG_R13: // GS - if (!CTX->Config.Is64BitMode) { - _StoreContext(2, GPRClass, Src, offsetof(FEXCore::Core::CPUState, gs_idx)); - UpdatePrefixFromSegment(Src, FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX); - } else { - LogMan::Msg::EFmt("We don't support modifying GS selector in 64bit mode!"); - DecodeFailure = true; - } - break; - case FEXCore::X86State::REG_RSP: // FS - case FEXCore::X86State::REG_R12: // FS - if (!CTX->Config.Is64BitMode) { - _StoreContext(2, GPRClass, Src, offsetof(FEXCore::Core::CPUState, fs_idx)); - UpdatePrefixFromSegment(Src, FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX); - } else { - LogMan::Msg::EFmt("We don't support modifying FS selector in 64bit mode!"); - DecodeFailure = true; - } - break; - default: - LogMan::Msg::EFmt("Unknown segment register: {}", Op->Dest.Data.GPR.GPR); + case FEXCore::X86State::REG_RAX: // ES + case FEXCore::X86State::REG_R8: // ES + _StoreContext(2, GPRClass, Src, offsetof(FEXCore::Core::CPUState, es_idx)); + UpdatePrefixFromSegment(Src, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX); + break; + case FEXCore::X86State::REG_RBX: // DS + case FEXCore::X86State::REG_R11: // DS + _StoreContext(2, GPRClass, Src, offsetof(FEXCore::Core::CPUState, ds_idx)); + UpdatePrefixFromSegment(Src, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX); + break; + case FEXCore::X86State::REG_RCX: // CS + case FEXCore::X86State::REG_R9: // CS + // CPL3 can't write to this + _Break(FEXCore::IR::BreakDefinition { + .ErrorRegister = 0, + .Signal = SIGILL, + .TrapNumber = 0, + .si_code = 0, + }); + break; + case FEXCore::X86State::REG_RDX: // SS + case FEXCore::X86State::REG_R10: // SS + _StoreContext(2, GPRClass, Src, offsetof(FEXCore::Core::CPUState, ss_idx)); + UpdatePrefixFromSegment(Src, FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX); + break; + case FEXCore::X86State::REG_RBP: // GS + case FEXCore::X86State::REG_R13: // GS + if (!CTX->Config.Is64BitMode) { + _StoreContext(2, GPRClass, Src, offsetof(FEXCore::Core::CPUState, gs_idx)); + UpdatePrefixFromSegment(Src, FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX); + } else { + LogMan::Msg::EFmt("We don't support modifying GS selector in 64bit mode!"); DecodeFailure = true; - break; + } + break; + case FEXCore::X86State::REG_RSP: // FS + case FEXCore::X86State::REG_R12: // FS + if (!CTX->Config.Is64BitMode) { + _StoreContext(2, GPRClass, Src, offsetof(FEXCore::Core::CPUState, fs_idx)); + UpdatePrefixFromSegment(Src, FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX); + } else { + LogMan::Msg::EFmt("We don't support modifying FS selector in 64bit mode!"); + DecodeFailure = true; + } + break; + default: + LogMan::Msg::EFmt("Unknown segment register: {}", Op->Dest.Data.GPR.GPR); + DecodeFailure = true; + break; } - } - else { - OrderedNode *Segment{}; + } else { + OrderedNode* Segment {}; switch (Op->Src[0].Data.GPR.GPR) { - case FEXCore::X86State::REG_RAX: // ES - case FEXCore::X86State::REG_R8: // ES - Segment = _LoadContext(2, GPRClass, offsetof(FEXCore::Core::CPUState, es_idx)); - break; - case FEXCore::X86State::REG_RBX: // DS - case FEXCore::X86State::REG_R11: // DS - Segment = _LoadContext(2, GPRClass, offsetof(FEXCore::Core::CPUState, ds_idx)); - break; - case FEXCore::X86State::REG_RCX: // CS - case FEXCore::X86State::REG_R9: // CS - Segment = _LoadContext(2, GPRClass, offsetof(FEXCore::Core::CPUState, cs_idx)); - break; - case FEXCore::X86State::REG_RDX: // SS - case FEXCore::X86State::REG_R10: // SS - Segment = _LoadContext(2, GPRClass, offsetof(FEXCore::Core::CPUState, ss_idx)); - break; - case FEXCore::X86State::REG_RBP: // GS - case FEXCore::X86State::REG_R13: // GS - if (CTX->Config.Is64BitMode) { - Segment = _Constant(0); - } - else { - Segment = _LoadContext(2, GPRClass, offsetof(FEXCore::Core::CPUState, gs_idx)); - } - break; - case FEXCore::X86State::REG_RSP: // FS - case FEXCore::X86State::REG_R12: // FS - if (CTX->Config.Is64BitMode) { - Segment = _Constant(0); - } - else { - Segment = _LoadContext(2, GPRClass, offsetof(FEXCore::Core::CPUState, fs_idx)); - } - break; - default: - LogMan::Msg::EFmt("Unknown segment register: {}", Op->Dest.Data.GPR.GPR); - DecodeFailure = true; - return; + case FEXCore::X86State::REG_RAX: // ES + case FEXCore::X86State::REG_R8: // ES + Segment = _LoadContext(2, GPRClass, offsetof(FEXCore::Core::CPUState, es_idx)); + break; + case FEXCore::X86State::REG_RBX: // DS + case FEXCore::X86State::REG_R11: // DS + Segment = _LoadContext(2, GPRClass, offsetof(FEXCore::Core::CPUState, ds_idx)); + break; + case FEXCore::X86State::REG_RCX: // CS + case FEXCore::X86State::REG_R9: // CS + Segment = _LoadContext(2, GPRClass, offsetof(FEXCore::Core::CPUState, cs_idx)); + break; + case FEXCore::X86State::REG_RDX: // SS + case FEXCore::X86State::REG_R10: // SS + Segment = _LoadContext(2, GPRClass, offsetof(FEXCore::Core::CPUState, ss_idx)); + break; + case FEXCore::X86State::REG_RBP: // GS + case FEXCore::X86State::REG_R13: // GS + if (CTX->Config.Is64BitMode) { + Segment = _Constant(0); + } else { + Segment = _LoadContext(2, GPRClass, offsetof(FEXCore::Core::CPUState, gs_idx)); + } + break; + case FEXCore::X86State::REG_RSP: // FS + case FEXCore::X86State::REG_R12: // FS + if (CTX->Config.Is64BitMode) { + Segment = _Constant(0); + } else { + Segment = _LoadContext(2, GPRClass, offsetof(FEXCore::Core::CPUState, fs_idx)); + } + break; + default: + LogMan::Msg::EFmt("Unknown segment register: {}", Op->Dest.Data.GPR.GPR); + DecodeFailure = true; + return; } if (DestIsMem(Op)) { // If the destination is memory then we always store 16-bits only StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Segment, 2, -1); - } - else { + } else { // If the destination is a GPR then we follow register storing rules StoreResult(GPRClass, Op, Segment, -1); } @@ -1591,7 +1501,7 @@ void OpDispatchBuilder::MOVSegOp(OpcodeArgs) { } void OpDispatchBuilder::MOVOffsetOp(OpcodeArgs) { - OrderedNode *Src; + OrderedNode* Src; switch (Op->OP) { case 0xA0: @@ -1616,27 +1526,27 @@ void OpDispatchBuilder::MOVOffsetOp(OpcodeArgs) { void OpDispatchBuilder::CPUIDOp(OpcodeArgs) { const auto GPRSize = CTX->GetGPRSize(); - OrderedNode *Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], GPRSize, Op->Flags); - OrderedNode *Leaf = LoadGPRRegister(X86State::REG_RCX); + OrderedNode* Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], GPRSize, Op->Flags); + OrderedNode* Leaf = LoadGPRRegister(X86State::REG_RCX); auto Res = _CPUID(Src, Leaf); - OrderedNode *Result_Lower = _ExtractElementPair(OpSize::i64Bit, Res, 0); - OrderedNode *Result_Upper = _ExtractElementPair(OpSize::i64Bit, Res, 1); + OrderedNode* Result_Lower = _ExtractElementPair(OpSize::i64Bit, Res, 0); + OrderedNode* Result_Upper = _ExtractElementPair(OpSize::i64Bit, Res, 1); - StoreGPRRegister(X86State::REG_RAX, _Bfe(OpSize::i64Bit, 32, 0, Result_Lower)); + StoreGPRRegister(X86State::REG_RAX, _Bfe(OpSize::i64Bit, 32, 0, Result_Lower)); StoreGPRRegister(X86State::REG_RBX, _Bfe(OpSize::i64Bit, 32, 32, Result_Lower)); StoreGPRRegister(X86State::REG_RDX, _Bfe(OpSize::i64Bit, 32, 32, Result_Upper)); - StoreGPRRegister(X86State::REG_RCX, _Bfe(OpSize::i64Bit, 32, 0, Result_Upper)); + StoreGPRRegister(X86State::REG_RCX, _Bfe(OpSize::i64Bit, 32, 0, Result_Upper)); } void OpDispatchBuilder::XGetBVOp(OpcodeArgs) { - OrderedNode *Function = LoadGPRRegister(X86State::REG_RCX); + OrderedNode* Function = LoadGPRRegister(X86State::REG_RCX); auto Res = _XGetBV(Function); - OrderedNode *Result_Lower = _ExtractElementPair(OpSize::i32Bit, Res, 0); - OrderedNode *Result_Upper = _ExtractElementPair(OpSize::i32Bit, Res, 1); + OrderedNode* Result_Lower = _ExtractElementPair(OpSize::i32Bit, Res, 0); + OrderedNode* Result_Upper = _ExtractElementPair(OpSize::i32Bit, Res, 1); StoreGPRRegister(X86State::REG_RAX, Result_Lower); StoreGPRRegister(X86State::REG_RDX, Result_Upper); @@ -1644,18 +1554,17 @@ void OpDispatchBuilder::XGetBVOp(OpcodeArgs) { template void OpDispatchBuilder::SHLOp(OpcodeArgs) { - OrderedNode *Src{}; - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src {}; + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); if constexpr (SHL1Bit) { Src = _Constant(1); - } - else { + } else { Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags); } const auto Size = GetSrcBitSize(Op); - OrderedNode *Result = _Lshl(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src); + OrderedNode* Result = _Lshl(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src); StoreResult(GPRClass, Op, Result, -1); if (Size < 32) { @@ -1664,14 +1573,13 @@ void OpDispatchBuilder::SHLOp(OpcodeArgs) { if constexpr (SHL1Bit) { GenerateFlags_ShiftLeftImmediate(Op, Result, Dest, 1); - } - else { + } else { GenerateFlags_ShiftLeft(Op, Result, Dest, Src); } } void OpDispatchBuilder::SHLImmediateOp(OpcodeArgs) { - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); @@ -1685,8 +1593,8 @@ void OpDispatchBuilder::SHLImmediateOp(OpcodeArgs) { Shift &= 0x1F; } - OrderedNode *Src = _Constant(Size, Shift); - OrderedNode *Result = _Lshl(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src); + OrderedNode* Src = _Constant(Size, Shift); + OrderedNode* Result = _Lshl(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src); StoreResult(GPRClass, Op, Result, -1); @@ -1695,13 +1603,12 @@ void OpDispatchBuilder::SHLImmediateOp(OpcodeArgs) { template void OpDispatchBuilder::SHROp(OpcodeArgs) { - OrderedNode *Src; + OrderedNode* Src; auto Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); if constexpr (SHR1Bit) { Src = _Constant(1); - } - else { + } else { Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags); } @@ -1710,8 +1617,7 @@ void OpDispatchBuilder::SHROp(OpcodeArgs) { if constexpr (SHR1Bit) { GenerateFlags_ShiftRightImmediate(Op, ALUOp, Dest, 1); - } - else { + } else { GenerateFlags_ShiftRight(Op, ALUOp, Dest, Src); } } @@ -1731,7 +1637,7 @@ void OpDispatchBuilder::SHRImmediateOp(OpcodeArgs) { Shift &= 0x1F; } - OrderedNode *Src = _Constant(Size, Shift); + OrderedNode* Src = _Constant(Size, Shift); auto ALUOp = _Lshr(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src); StoreResult(GPRClass, Op, ALUOp, -1); @@ -1742,11 +1648,11 @@ void OpDispatchBuilder::SHLDOp(OpcodeArgs) { // Calculate flags early. CalculateDeferredFlags(); - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); // Allow garbage on the shift, we're masking it anyway. - OrderedNode *Shift = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Shift = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); const auto Size = GetSrcBitSize(Op); @@ -1765,13 +1671,12 @@ void OpDispatchBuilder::SHLDOp(OpcodeArgs) { // a64 masks the bottom bits, so if we're using a native 32/64-bit shift, we // can negate to do the subtract (it's congruent), which saves a constant. - auto ShiftRight = Size >= 32 ? _Neg(OpSize::i64Bit, Shift) : - _Sub(OpSize::i64Bit, _Constant(Size), Shift); + auto ShiftRight = Size >= 32 ? _Neg(OpSize::i64Bit, Shift) : _Sub(OpSize::i64Bit, _Constant(Size), Shift); auto Tmp1 = _Lshl(OpSize::i64Bit, Dest, Shift); auto Tmp2 = _Lshr(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Src, ShiftRight); - OrderedNode *Res = _Or(OpSize::i64Bit, Tmp1, Tmp2); + OrderedNode* Res = _Or(OpSize::i64Bit, Tmp1, Tmp2); // If shift count was zero then output doesn't change // Needs to be checked for the 32bit operand case @@ -1782,9 +1687,7 @@ void OpDispatchBuilder::SHLDOp(OpcodeArgs) { // // TODO: This whole function wants to be wrapped in the if. Maybe b/w pass is // a good idea after all. - Res = _Select(FEXCore::IR::COND_EQ, - Shift, _Constant(0), - Dest, Res); + Res = _Select(FEXCore::IR::COND_EQ, Shift, _Constant(0), Dest, Res); StoreResult(GPRClass, Op, Res, -1); @@ -1795,8 +1698,8 @@ void OpDispatchBuilder::SHLDOp(OpcodeArgs) { } void OpDispatchBuilder::SHLDImmediateOp(OpcodeArgs) { - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); @@ -1811,25 +1714,23 @@ void OpDispatchBuilder::SHLDImmediateOp(OpcodeArgs) { } if (Shift != 0) { - OrderedNode *Res{}; + OrderedNode* Res {}; if (Size < 32) { - OrderedNode *ShiftLeft = _Constant(Shift); + OrderedNode* ShiftLeft = _Constant(Shift); auto ShiftRight = _Constant(Size - Shift); auto Tmp1 = _Lshl(OpSize::i64Bit, Dest, ShiftLeft); auto Tmp2 = _Lshr(OpSize::i32Bit, Src, ShiftRight); Res = _Or(OpSize::i64Bit, Tmp1, Tmp2); - } - else { + } else { // 32-bit and 64-bit SHLD behaves like an EXTR where the lower bits are filled from the source. Res = _Extr(OpSizeFromSrc(Op), Dest, Src, Size - Shift); } StoreResult(GPRClass, Op, Res, -1); GenerateFlags_ShiftLeftImmediate(Op, Res, Dest, Shift); - } - else if (Shift == 0 && Size == 32) { + } else if (Shift == 0 && Size == 32) { // Ensure Zext still occurs StoreResult(GPRClass, Op, Dest, -1); } @@ -1840,10 +1741,10 @@ void OpDispatchBuilder::SHRDOp(OpcodeArgs) { // This instruction conditionally generates flags so we need to insure sane state going in. CalculateDeferredFlags(); - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Shift = LoadGPRRegister(X86State::REG_RCX); + OrderedNode* Shift = LoadGPRRegister(X86State::REG_RCX); const auto Size = GetDstBitSize(Op); @@ -1859,14 +1760,12 @@ void OpDispatchBuilder::SHRDOp(OpcodeArgs) { auto Tmp1 = _Lshr(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Shift); auto Tmp2 = _Lshl(OpSize::i64Bit, Src, ShiftLeft); - OrderedNode *Res = _Or(OpSize::i64Bit, Tmp1, Tmp2); + OrderedNode* Res = _Or(OpSize::i64Bit, Tmp1, Tmp2); // If shift count was zero then output doesn't change // Needs to be checked for the 32bit operand case // where shift = 0 and the source register still gets Zext - Res = _Select(FEXCore::IR::COND_EQ, - Shift, _Constant(0), - Dest, Res); + Res = _Select(FEXCore::IR::COND_EQ, Shift, _Constant(0), Dest, Res); StoreResult(GPRClass, Op, Res, -1); @@ -1877,8 +1776,8 @@ void OpDispatchBuilder::SHRDOp(OpcodeArgs) { } void OpDispatchBuilder::SHRDImmediateOp(OpcodeArgs) { - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); @@ -1894,25 +1793,23 @@ void OpDispatchBuilder::SHRDImmediateOp(OpcodeArgs) { if (Shift != 0) { - OrderedNode *Res{}; + OrderedNode* Res {}; if (Size < 32) { - OrderedNode *ShiftRight = _Constant(Shift); + OrderedNode* ShiftRight = _Constant(Shift); auto ShiftLeft = _Constant(Size - Shift); auto Tmp1 = _Lshr(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, ShiftRight); auto Tmp2 = _Lshl(OpSize::i64Bit, Src, ShiftLeft); Res = _Or(OpSize::i64Bit, Tmp1, Tmp2); - } - else { + } else { // 32-bit and 64-bit SHRD behaves like an EXTR where the upper bits are filled from the source. Res = _Extr(OpSizeFromSrc(Op), Src, Dest, Shift); } StoreResult(GPRClass, Op, Res, -1); GenerateFlags_ShiftRightDoubleImmediate(Op, Res, Dest, Shift); - } - else if (Shift == 0 && Size == 32) { + } else if (Shift == 0 && Size == 32) { // Ensure Zext still occurs StoreResult(GPRClass, Op, Dest, -1); } @@ -1920,8 +1817,8 @@ void OpDispatchBuilder::SHRDImmediateOp(OpcodeArgs) { template void OpDispatchBuilder::ASHROp(OpcodeArgs) { - OrderedNode *Src; - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src; + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); const auto Size = GetSrcBitSize(Op); @@ -1935,7 +1832,7 @@ void OpDispatchBuilder::ASHROp(OpcodeArgs) { Dest = _Sbfe(OpSize::i64Bit, Size, 0, Dest); } - OrderedNode *Result = _Ashr(IR::SizeToOpSize(std::max(4, GetSrcSize(Op))), Dest, Src); + OrderedNode* Result = _Ashr(IR::SizeToOpSize(std::max(4, GetSrcSize(Op))), Dest, Src); StoreResult(GPRClass, Op, Result, -1); if constexpr (SHR1Bit) { @@ -1946,7 +1843,7 @@ void OpDispatchBuilder::ASHROp(OpcodeArgs) { } void OpDispatchBuilder::ASHRImmediateOp(OpcodeArgs) { - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); @@ -1964,8 +1861,8 @@ void OpDispatchBuilder::ASHRImmediateOp(OpcodeArgs) { Dest = _Sbfe(OpSize::i64Bit, Size, 0, Dest); } - OrderedNode *Src = _Constant(Size, Shift); - OrderedNode *Result = _Ashr(IR::SizeToOpSize(std::max(4, GetOpSize(Dest))), Dest, Src); + OrderedNode* Src = _Constant(Size, Shift); + OrderedNode* Result = _Ashr(IR::SizeToOpSize(std::max(4, GetOpSize(Dest))), Dest, Src); StoreResult(GPRClass, Op, Result, -1); @@ -1974,8 +1871,8 @@ void OpDispatchBuilder::ASHRImmediateOp(OpcodeArgs) { template void OpDispatchBuilder::ROROp(OpcodeArgs) { - OrderedNode *Src; - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src; + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); const uint32_t Size = GetSrcBitSize(Op); if constexpr (Is1Bit) { @@ -2015,7 +1912,7 @@ void OpDispatchBuilder::ROROp(OpcodeArgs) { void OpDispatchBuilder::RORImmediateOp(OpcodeArgs) { // See ROLImmediateOp for masking explanation - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); @@ -2029,7 +1926,7 @@ void OpDispatchBuilder::RORImmediateOp(OpcodeArgs) { Shift &= 0x1F; } - OrderedNode *Src = _Constant(std::max(32U, Size), Shift); + OrderedNode* Src = _Constant(std::max(32U, Size), Shift); if (Size < 32) { // ARM doesn't support 8/16bit rotates. Emulate with an insert @@ -2051,8 +1948,8 @@ void OpDispatchBuilder::RORImmediateOp(OpcodeArgs) { template void OpDispatchBuilder::ROLOp(OpcodeArgs) { - OrderedNode *Src; - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src; + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); const uint32_t Size = GetSrcBitSize(Op); @@ -2081,9 +1978,8 @@ void OpDispatchBuilder::ROLOp(OpcodeArgs) { } } - auto ALUOp = _Ror(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, - Dest, - _Sub(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, _Constant(Size, std::max(32U, Size)), Src)); + auto ALUOp = _Ror(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, + _Sub(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, _Constant(Size, std::max(32U, Size)), Src)); StoreResult(GPRClass, Op, ALUOp, -1); @@ -2096,7 +1992,7 @@ void OpDispatchBuilder::ROLOp(OpcodeArgs) { void OpDispatchBuilder::ROLImmediateOp(OpcodeArgs) { // For 32-bit, garbage is ignored in hardware. For < 32, see Bfi comment. - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); @@ -2112,7 +2008,7 @@ void OpDispatchBuilder::ROLImmediateOp(OpcodeArgs) { // We also negate the shift so we can emulate Rol with Ror. const auto NegatedShift = std::max(32U, Size) - Shift; - OrderedNode *Src = _Constant(Size, NegatedShift); + OrderedNode* Src = _Constant(Size, NegatedShift); if (Size < 32) { // ARM doesn't support 8/16bit rotates. Emulate with an insert @@ -2161,9 +2057,7 @@ void OpDispatchBuilder::BEXTRBMIOp(OpcodeArgs) { auto Shifted = _Lshr(IR::SizeToOpSize(Size), Src1, Start); // Shifts larger than operand size need to be set to zero. - auto SanitizedShifted = _Select(IR::COND_ULE, - Start, MaxSrcBitOp, - Shifted, _Constant(SrcSize, 0)); + auto SanitizedShifted = _Select(IR::COND_ULE, Start, MaxSrcBitOp, Shifted, _Constant(SrcSize, 0)); // Now handle the length specifier. auto Length = _Bfe(OpSizeFromSrc(Op), 8, 8, Src2); @@ -2258,11 +2152,9 @@ void OpDispatchBuilder::BZHI(OpcodeArgs) { // In 32-bit mode we only look at bottom 32-bit, no 8 or 16-bit BZHI so no // need to zero-extend sources - auto* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, - {.AllowUpperGarbage = true}); + auto* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); - auto* Index = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, - {.AllowUpperGarbage = true}); + auto* Index = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); // Clear the high bits specified by the index. A64 only considers bottom bits // of the shift, so we don't need to mask bottom 8-bits ourselves. @@ -2275,13 +2167,12 @@ void OpDispatchBuilder::BZHI(OpcodeArgs) { // considers the bottom 8-bits, so we really want to know if the bottom 8-bits // have their top bits set. Test exactly that. _TestNZ(OpSize::i64Bit, Index, _Constant(0xFF & ~(OperandSize - 1))); - auto Result = _NZCVSelect(IR::SizeToOpSize(Size), CondClassType{COND_NEQ}, - Src, MaskResult); + auto Result = _NZCVSelect(IR::SizeToOpSize(Size), CondClassType {COND_NEQ}, Src, MaskResult); StoreResult(GPRClass, Op, Result, -1); auto Zero = _Constant(0); auto One = _Constant(1); - auto CF = _NZCVSelect(OpSize::i32Bit, CondClassType{COND_NEQ}, One, Zero); + auto CF = _NZCVSelect(OpSize::i32Bit, CondClassType {COND_NEQ}, One, Zero); GenerateFlags_BZHI(Op, Result, CF); } @@ -2293,8 +2184,7 @@ void OpDispatchBuilder::RORX(OpcodeArgs) { const auto GPRSize = CTX->GetGPRSize(); const auto DoRotation = Amount != 0 && Amount < SrcSizeBits; - const auto IsSameGPR = Op->Src[0].IsGPR() && Op->Dest.IsGPR() && - Op->Src[0].Data.GPR.GPR == Op->Dest.Data.GPR.GPR; + const auto IsSameGPR = Op->Src[0].IsGPR() && Op->Dest.IsGPR() && Op->Src[0].Data.GPR.GPR == Op->Dest.Data.GPR.GPR; const auto SrcSizeIsGPRSize = SrcSize == GPRSize; // If we don't need to rotate and our source is the same as the destination @@ -2406,10 +2296,10 @@ void OpDispatchBuilder::RCROp1Bit(OpcodeArgs) { CalculateDeferredFlags(); // We expliclty mask for <32-bit so allow garbage - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); const auto Size = GetSrcBitSize(Op); auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC); - OrderedNode *Res; + OrderedNode* Res; // Our new CF will be bit 0 of the source. Set upfront to avoid a move. SetRFLAG(Dest, 0, true); @@ -2419,8 +2309,7 @@ void OpDispatchBuilder::RCROp1Bit(OpcodeArgs) { if (Size == 32 || Size == 64) { // Rotate and insert CF in the upper bit Res = _Extr(OpSizeFromSrc(Op), CF, Dest, Shift); - } - else { + } else { // Res = Src >> Shift Res = _Bfe(OpSize::i32Bit, Size - Shift, Shift, Dest); @@ -2439,7 +2328,7 @@ void OpDispatchBuilder::RCROp8x1Bit(OpcodeArgs) { // Calculate flags early. CalculateDeferredFlags(); - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); const auto SizeBit = GetSrcBitSize(Op); auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC); @@ -2447,7 +2336,7 @@ void OpDispatchBuilder::RCROp8x1Bit(OpcodeArgs) { SetRFLAG(Dest, 0, true); // Rotate and insert CF in the upper bit - OrderedNode *Res = _Bfe(OpSize::i32Bit, 7, 1, Dest); + OrderedNode* Res = _Bfe(OpSize::i32Bit, 7, 1, Dest); Res = _Bfi(OpSize::i32Bit, 1, 7, Res, CF); StoreResult(GPRClass, Op, Res, -1); @@ -2469,25 +2358,27 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) { // Calculate flags early. CalculateDeferredFlags(); - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); const auto OpSize = OpSizeFromSrc(Op); // Res = Src >> Shift - OrderedNode *Res = _Lshr(OpSize, Dest, Src); + OrderedNode* Res = _Lshr(OpSize, Dest, Src); auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC); uint64_t Const; if (IsValueConstant(WrapNode(Src), &Const)) { Const &= Mask; - if (!Const) + if (!Const) { return; + } InvalidateDeferredFlags(); // Constant folded version of the above, with fused shifts. - if (Const > 1) + if (Const > 1) { Res = _Orlshl(OpSize, Res, Dest, Size + 1 - Const); + } // Our new CF will be bit (Shift - 1) of the source. SetRFLAG(Dest, Const - 1, true); @@ -2506,15 +2397,15 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) { return; } - OrderedNode *SrcMasked = _And(OpSize, Src, _Constant(Size, Mask)); - CalculateFlags_ShiftVariable(SrcMasked, [this, CF, Op, Size, OpSize, SrcMasked, Dest, &Res](){ + OrderedNode* SrcMasked = _And(OpSize, Src, _Constant(Size, Mask)); + CalculateFlags_ShiftVariable(SrcMasked, [this, CF, Op, Size, OpSize, SrcMasked, Dest, &Res]() { auto One = _Constant(Size, 1); // Res |= (SrcMasked << (Size - Shift + 1)); // Expressed as Res | ((SrcMasked << (Size - Shift)) << 1) to get correct // behaviour for Shift without clobbering NZCV. Then observe that modulo // Size, Size - Shift = -Shift so we can use a simple Neg. - OrderedNode *NegSrc = _Neg(OpSize, SrcMasked); + OrderedNode* NegSrc = _Neg(OpSize, SrcMasked); Res = _Orlshl(OpSize, Res, _Lshl(OpSize, Dest, NegSrc), 1); // Our new CF will be bit (Shift - 1) of the source. this is hoisted up to @@ -2543,14 +2434,14 @@ void OpDispatchBuilder::RCRSmallerOp(OpcodeArgs) { const auto Size = GetSrcBitSize(Op); // x86 masks the shift by 0x3F or 0x1F depending on size of op - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); Src = AndConst(OpSize::i32Bit, Src, 0x1F); // CF only changes if we actually shifted. OF undefined if we didn't shift. // The result is unchanged if we didn't shift. So branch over the whole thing. - CalculateFlags_ShiftVariable(Src, [this, CF, Op, Size, Src](){ - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Tmp{}; + CalculateFlags_ShiftVariable(Src, [this, CF, Op, Size, Src]() { + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Tmp {}; // Insert the incoming value across the temporary 64bit source // Make sure to insert at + 1 offsets @@ -2585,8 +2476,7 @@ void OpDispatchBuilder::RCRSmallerOp(OpcodeArgs) { // Final cascade, copies 9 bits again from itself. Tmp = _Bfi(OpSize::i64Bit, 9, 36, Tmp, Tmp); - } - else { + } else { // 16-bit optimal cascade // Cascade: 0 // Data: -> [15:0] @@ -2610,7 +2500,7 @@ void OpDispatchBuilder::RCRSmallerOp(OpcodeArgs) { // Entire bitfield has been setup // Just extract the 8 or 16bits we need - OrderedNode *Res = _Lshr(OpSize::i64Bit, Tmp, Src); + OrderedNode* Res = _Lshr(OpSize::i64Bit, Tmp, Src); StoreResult(GPRClass, Op, Res, -1); @@ -2639,14 +2529,14 @@ void OpDispatchBuilder::RCLOp1Bit(OpcodeArgs) { // Calculate flags early. CalculateDeferredFlags(); - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); const auto Size = GetSrcBitSize(Op); const auto OpSize = Size == 64 ? OpSize::i64Bit : OpSize::i32Bit; auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC); // Rotate left and insert CF in to lowest bit // TODO: Use `adc Res, xzr, Dest, lsl 1` to save an instruction - OrderedNode *Res = _Orlshl(OpSize, CF, Dest, 1); + OrderedNode* Res = _Orlshl(OpSize, CF, Dest, 1); StoreResult(GPRClass, Op, Res, -1); @@ -2671,25 +2561,27 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) { // Calculate flags early. CalculateDeferredFlags(); - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); const auto OpSize = OpSizeFromSrc(Op); // Res = Src << Shift - OrderedNode *Res = _Lshl(OpSize, Dest, Src); + OrderedNode* Res = _Lshl(OpSize, Dest, Src); auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC); uint64_t Const; if (IsValueConstant(WrapNode(Src), &Const)) { Const &= Mask; - if (!Const) + if (!Const) { return; + } InvalidateDeferredFlags(); // Res |= (Src << (Size - Shift + 1)); - if (Const > 1) + if (Const > 1) { Res = _Orlshr(OpSize, Res, Dest, Size + 1 - Const); + } // Our new CF will be bit (Shift - 1) of the source SetRFLAG(Dest, Size - Const, true); @@ -2708,8 +2600,8 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) { return; } - OrderedNode *SrcMasked = _And(OpSize, Src, _Constant(Size, Mask)); - CalculateFlags_ShiftVariable(SrcMasked, [this, CF, Op, Size, OpSize, SrcMasked, Dest, &Res](){ + OrderedNode* SrcMasked = _And(OpSize, Src, _Constant(Size, Mask)); + CalculateFlags_ShiftVariable(SrcMasked, [this, CF, Op, Size, OpSize, SrcMasked, Dest, &Res]() { // Res |= (SrcMasked >> (Size - Shift + 1)), expressed as // Res | ((SrcMasked >> (-Shift)) >> 1), since Size - Shift = -Shift mod // Size. @@ -2721,7 +2613,7 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) { SetRFLAG(NewCF, 0, true); // Since Shift != 0 we can inject the CF - OrderedNode *CFShl = _Sub(OpSize, SrcMasked, _Constant(Size, 1)); + OrderedNode* CFShl = _Sub(OpSize, SrcMasked, _Constant(Size, 1)); auto TmpCF = _Lshl(OpSize::i64Bit, CF, CFShl); Res = _Or(OpSize, Res, TmpCF); @@ -2746,15 +2638,15 @@ void OpDispatchBuilder::RCLSmallerOp(OpcodeArgs) { const auto Size = GetSrcBitSize(Op); // x86 masks the shift by 0x3F or 0x1F depending on size of op - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); Src = AndConst(OpSize::i32Bit, Src, 0x1F); // CF only changes if we actually shifted. OF undefined if we didn't shift. // The result is unchanged if we didn't shift. So branch over the whole thing. - CalculateFlags_ShiftVariable(Src, [this, CF, Op, Size, Src](){ - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + CalculateFlags_ShiftVariable(Src, [this, CF, Op, Size, Src]() { + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Tmp = _Constant(64, 0); + OrderedNode* Tmp = _Constant(64, 0); for (size_t i = 0; i < (32 + Size + 1); i += (Size + 1)) { // Insert incoming value @@ -2772,7 +2664,7 @@ void OpDispatchBuilder::RCLSmallerOp(OpcodeArgs) { // Shift 1 more bit that expected to get our result // Shifting to the right will now behave like a rotate to the left // Which we emulate with a _Ror - OrderedNode *Res = _Ror(OpSize::i64Bit, Tmp, _Sub(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, _Constant(Size, 64), Src)); + OrderedNode* Res = _Ror(OpSize::i64Bit, Tmp, _Sub(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, _Constant(Size, 64), Src)); StoreResult(GPRClass, Op, Res, -1); @@ -2794,8 +2686,8 @@ void OpDispatchBuilder::RCLSmallerOp(OpcodeArgs) { template void OpDispatchBuilder::BTOp(OpcodeArgs) { - OrderedNode *Value; - OrderedNode *Src{}; + OrderedNode* Value; + OrderedNode* Src {}; bool IsNonconstant = Op->Src[SrcIndex].IsGPR(); uint8_t ConstantShift = 0; @@ -2807,8 +2699,7 @@ void OpDispatchBuilder::BTOp(OpcodeArgs) { if (IsNonconstant) { // Because we mask explicitly with And/Bfe/Sbfe after, we can allow garbage here. - Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags, - {.AllowUpperGarbage = true }); + Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags, {.AllowUpperGarbage = true}); } else { // Can only be an immediate // Masked by operand size @@ -2842,21 +2733,21 @@ void OpDispatchBuilder::BTOp(OpcodeArgs) { } case BTAction::BTClear: { - OrderedNode *BitMask = _Lshl(IR::SizeToOpSize(LshrSize), _Constant(1), BitSelect); + OrderedNode* BitMask = _Lshl(IR::SizeToOpSize(LshrSize), _Constant(1), BitSelect); Dest = _Andn(IR::SizeToOpSize(LshrSize), Dest, BitMask); StoreResult(GPRClass, Op, Dest, -1); break; } case BTAction::BTSet: { - OrderedNode *BitMask = _Lshl(IR::SizeToOpSize(LshrSize), _Constant(1), BitSelect); + OrderedNode* BitMask = _Lshl(IR::SizeToOpSize(LshrSize), _Constant(1), BitSelect); Dest = _Or(IR::SizeToOpSize(LshrSize), Dest, BitMask); StoreResult(GPRClass, Op, Dest, -1); break; } case BTAction::BTComplement: { - OrderedNode *BitMask = _Lshl(IR::SizeToOpSize(LshrSize), _Constant(1), BitSelect); + OrderedNode* BitMask = _Lshl(IR::SizeToOpSize(LshrSize), _Constant(1), BitSelect); Dest = _Xor(IR::SizeToOpSize(LshrSize), Dest, BitMask); StoreResult(GPRClass, Op, Dest, -1); break; @@ -2864,10 +2755,10 @@ void OpDispatchBuilder::BTOp(OpcodeArgs) { } } else { // Load the address to the memory location - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); Dest = AppendSegmentOffset(Dest, Op->Flags); // Get the bit selection from the src - OrderedNode *BitSelect = _Bfe(IR::SizeToOpSize(std::max(4u, GetOpSize(Src))), 3, 0, Src); + OrderedNode* BitSelect = _Bfe(IR::SizeToOpSize(std::max(4u, GetOpSize(Src))), 3, 0, Src); // Address is provided as bits we want BYTE offsets // Extract Signed offset @@ -2877,7 +2768,7 @@ void OpDispatchBuilder::BTOp(OpcodeArgs) { // Then use that to index in to the memory location by size of op // Now add the addresses together and load the memory - OrderedNode *MemoryLocation = _Add(OpSize::i64Bit, Dest, Src); + OrderedNode* MemoryLocation = _Add(OpSize::i64Bit, Dest, Src); ConstantShift = 0; @@ -2888,7 +2779,7 @@ void OpDispatchBuilder::BTOp(OpcodeArgs) { } case BTAction::BTClear: { - OrderedNode *BitMask = _Lshl(OpSize::i64Bit, _Constant(1), BitSelect); + OrderedNode* BitMask = _Lshl(OpSize::i64Bit, _Constant(1), BitSelect); if (DestIsLockedMem(Op)) { HandledLock = true; @@ -2903,7 +2794,7 @@ void OpDispatchBuilder::BTOp(OpcodeArgs) { } case BTAction::BTSet: { - OrderedNode *BitMask = _Lshl(OpSize::i64Bit, _Constant(1), BitSelect); + OrderedNode* BitMask = _Lshl(OpSize::i64Bit, _Constant(1), BitSelect); if (DestIsLockedMem(Op)) { HandledLock = true; @@ -2918,7 +2809,7 @@ void OpDispatchBuilder::BTOp(OpcodeArgs) { } case BTAction::BTComplement: { - OrderedNode *BitMask = _Lshl(OpSize::i64Bit, _Constant(1), BitSelect); + OrderedNode* BitMask = _Lshl(OpSize::i64Bit, _Constant(1), BitSelect); if (DestIsLockedMem(Op)) { HandledLock = true; @@ -2943,36 +2834,36 @@ void OpDispatchBuilder::BTOp(OpcodeArgs) { void OpDispatchBuilder::IMUL1SrcOp(OpcodeArgs) { /* We're just going to sign-extend the non-garbage anyway.. */ - OrderedNode *Src1 = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); - OrderedNode *Src2 = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Src1 = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Src2 = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); uint8_t Size = GetSrcSize(Op); - OrderedNode *Dest{}; - OrderedNode *ResultHigh{}; + OrderedNode* Dest {}; + OrderedNode* ResultHigh {}; switch (Size) { - case 1: - case 2: { - Src1 = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src1); - Src2 = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src2); - Dest = _Mul(OpSize::i64Bit, Src1, Src2); - ResultHigh = _Sbfe(OpSize::i64Bit, Size * 8, Size * 8, Dest); - break; - } - case 4: { - ResultHigh = _SMull(Src1, Src2); - ResultHigh = _Sbfe(OpSize::i64Bit, Size * 8, Size * 8, ResultHigh); - // Flipped order to save a move - Dest = _Mul(OpSize::i32Bit, Src1, Src2); - break; - } - case 8: { - ResultHigh = _MulH(OpSize::i64Bit, Src1, Src2); - // Flipped order to save a move - Dest = _Mul(OpSize::i64Bit, Src1, Src2); - break; - } - default: FEX_UNREACHABLE; + case 1: + case 2: { + Src1 = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src1); + Src2 = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src2); + Dest = _Mul(OpSize::i64Bit, Src1, Src2); + ResultHigh = _Sbfe(OpSize::i64Bit, Size * 8, Size * 8, Dest); + break; + } + case 4: { + ResultHigh = _SMull(Src1, Src2); + ResultHigh = _Sbfe(OpSize::i64Bit, Size * 8, Size * 8, ResultHigh); + // Flipped order to save a move + Dest = _Mul(OpSize::i32Bit, Src1, Src2); + break; + } + case 8: { + ResultHigh = _MulH(OpSize::i64Bit, Src1, Src2); + // Flipped order to save a move + Dest = _Mul(OpSize::i64Bit, Src1, Src2); + break; + } + default: FEX_UNREACHABLE; } StoreResult(GPRClass, Op, Dest, -1); @@ -2980,37 +2871,37 @@ void OpDispatchBuilder::IMUL1SrcOp(OpcodeArgs) { } void OpDispatchBuilder::IMUL2SrcOp(OpcodeArgs) { - OrderedNode *Src1 = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); - OrderedNode *Src2 = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Src1 = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Src2 = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); uint8_t Size = GetSrcSize(Op); - OrderedNode *Dest{}; - OrderedNode *ResultHigh{}; + OrderedNode* Dest {}; + OrderedNode* ResultHigh {}; switch (Size) { - case 1: - case 2: { - Src1 = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src1); - Src2 = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src2); - Dest = _Mul(OpSize::i64Bit, Src1, Src2); - ResultHigh = _Sbfe(OpSize::i64Bit, Size * 8, Size * 8, Dest); - break; - } - case 4: { - ResultHigh = _SMull(Src1, Src2); - ResultHigh = _Sbfe(OpSize::i64Bit, Size * 8, Size * 8, ResultHigh); - // Flipped order to save a move - Dest = _Mul(OpSize::i32Bit, Src1, Src2); - break; - } - case 8: { - ResultHigh = _MulH(OpSize::i64Bit, Src1, Src2); - // Flipped order to save a move - Dest = _Mul(OpSize::i64Bit, Src1, Src2); - break; - } - default: FEX_UNREACHABLE; + case 1: + case 2: { + Src1 = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src1); + Src2 = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src2); + Dest = _Mul(OpSize::i64Bit, Src1, Src2); + ResultHigh = _Sbfe(OpSize::i64Bit, Size * 8, Size * 8, Dest); + break; + } + case 4: { + ResultHigh = _SMull(Src1, Src2); + ResultHigh = _Sbfe(OpSize::i64Bit, Size * 8, Size * 8, ResultHigh); + // Flipped order to save a move + Dest = _Mul(OpSize::i32Bit, Src1, Src2); + break; + } + case 8: { + ResultHigh = _MulH(OpSize::i64Bit, Src1, Src2); + // Flipped order to save a move + Dest = _Mul(OpSize::i64Bit, Src1, Src2); + break; + } + default: FEX_UNREACHABLE; } StoreResult(GPRClass, Op, Dest, -1); @@ -3020,7 +2911,7 @@ void OpDispatchBuilder::IMUL2SrcOp(OpcodeArgs) { void OpDispatchBuilder::IMULOp(OpcodeArgs) { const uint8_t Size = GetSrcSize(Op); - OrderedNode *Src1 = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Src1 = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); OrderedNode* Src2 = LoadGPRRegister(X86State::REG_RAX); if (Size != 8) { @@ -3029,21 +2920,19 @@ void OpDispatchBuilder::IMULOp(OpcodeArgs) { } // 64-bit special cased to save a move - OrderedNode *Result = Size < 8 ? _Mul(OpSize::i64Bit, Src1, Src2) : nullptr; - OrderedNode *ResultHigh{}; + OrderedNode* Result = Size < 8 ? _Mul(OpSize::i64Bit, Src1, Src2) : nullptr; + OrderedNode* ResultHigh {}; if (Size == 1) { // Result is stored in AX StoreGPRRegister(X86State::REG_RAX, Result, 2); ResultHigh = _Sbfe(OpSize::i64Bit, 8, 8, Result); - } - else if (Size == 2) { + } else if (Size == 2) { // 16bits stored in AX // 16bits stored in DX StoreGPRRegister(X86State::REG_RAX, Result, Size); ResultHigh = _Sbfe(OpSize::i64Bit, 16, 16, Result); StoreGPRRegister(X86State::REG_RDX, ResultHigh, Size); - } - else if (Size == 4) { + } else if (Size == 4) { // 32bits stored in EAX // 32bits stored in EDX // Make sure they get Zext correctly @@ -3053,8 +2942,7 @@ void OpDispatchBuilder::IMULOp(OpcodeArgs) { Result = _Sbfe(OpSize::i64Bit, 32, 0, Result); StoreGPRRegister(X86State::REG_RAX, LocalResult); StoreGPRRegister(X86State::REG_RDX, LocalResultHigh); - } - else if (Size == 8) { + } else if (Size == 8) { if (!CTX->Config.Is64BitMode) { LogMan::Msg::EFmt("Doesn't exist in 32bit mode"); DecodeFailure = true; @@ -3074,37 +2962,34 @@ void OpDispatchBuilder::IMULOp(OpcodeArgs) { void OpDispatchBuilder::MULOp(OpcodeArgs) { const uint8_t Size = GetSrcSize(Op); - OrderedNode *Src1 = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Src1 = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); OrderedNode* Src2 = LoadGPRRegister(X86State::REG_RAX); if (Size != 8) { Src1 = _Bfe(OpSize::i64Bit, Size * 8, 0, Src1); Src2 = _Bfe(OpSize::i64Bit, Size * 8, 0, Src2); } - OrderedNode *Result = _UMul(OpSize::i64Bit, Src1, Src2); - OrderedNode *ResultHigh{}; + OrderedNode* Result = _UMul(OpSize::i64Bit, Src1, Src2); + OrderedNode* ResultHigh {}; if (Size == 1) { // Result is stored in AX StoreGPRRegister(X86State::REG_RAX, Result, 2); ResultHigh = _Bfe(OpSize::i64Bit, 8, 8, Result); - } - else if (Size == 2) { + } else if (Size == 2) { // 16bits stored in AX // 16bits stored in DX StoreGPRRegister(X86State::REG_RAX, Result, Size); ResultHigh = _Bfe(OpSize::i64Bit, 16, 16, Result); StoreGPRRegister(X86State::REG_RDX, ResultHigh, Size); - } - else if (Size == 4) { + } else if (Size == 4) { // 32bits stored in EAX // 32bits stored in EDX - OrderedNode *ResultLow = _Bfe(OpSize::i64Bit, 32, 0, Result); + OrderedNode* ResultLow = _Bfe(OpSize::i64Bit, 32, 0, Result); ResultHigh = _Bfe(OpSize::i64Bit, 32, 32, Result); StoreGPRRegister(X86State::REG_RAX, ResultLow); StoreGPRRegister(X86State::REG_RDX, ResultHigh); - } - else if (Size == 8) { + } else if (Size == 8) { if (!CTX->Config.Is64BitMode) { LogMan::Msg::EFmt("Doesn't exist in 32bit mode"); DecodeFailure = true; @@ -3122,22 +3007,21 @@ void OpDispatchBuilder::MULOp(OpcodeArgs) { void OpDispatchBuilder::NOTOp(OpcodeArgs) { uint8_t Size = GetSrcSize(Op); - OrderedNode *MaskConst{}; + OrderedNode* MaskConst {}; if (Size == 8) { MaskConst = _Constant(~0ULL); - } - else { + } else { MaskConst = _Constant((1ULL << (Size * 8)) - 1); } if (DestIsLockedMem(Op)) { HandledLock = true; - OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); DestMem = AppendSegmentOffset(DestMem, Op->Flags); _AtomicXor(IR::SizeToOpSize(Size), MaskConst, DestMem); } else if (!Op->Dest.IsGPR()) { // GPR version plays fast and loose with sizes, be safe for memory tho. - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); Src = _Xor(OpSize::i64Bit, Src, MaskConst); StoreResult(GPRClass, Op, Src, -1); } else { @@ -3153,17 +3037,18 @@ void OpDispatchBuilder::NOTOp(OpcodeArgs) { // Always load full size, we explicitly want the upper bits to get the // insert behaviour for free/implicitly. const uint8_t GPRSize = CTX->GetGPRSize(); - OrderedNode *Src = LoadSource_WithOpSize(GPRClass, Op, Dest, GPRSize, Op->Flags); + OrderedNode* Src = LoadSource_WithOpSize(GPRClass, Op, Dest, GPRSize, Op->Flags); // For 8/16-bit, use 64-bit invert so we invert in place, while getting // insert behaviour. For 32-bit, use 32-bit invert to zero the upper bits. unsigned EffectiveSize = Size == 4 ? 4 : GPRSize; // If we're inverting the whole thing, use Not instead of Xor to save a constant. - if (Size >= 4) + if (Size >= 4) { Src = _Not(IR::SizeToOpSize(EffectiveSize), Src); - else + } else { Src = _Xor(IR::SizeToOpSize(EffectiveSize), Src, MaskConst); + } // Always store 64-bit, the Not/Xor correctly handle the upper bits and this // way we can delete the store. @@ -3172,9 +3057,9 @@ void OpDispatchBuilder::NOTOp(OpcodeArgs) { } void OpDispatchBuilder::XADDOp(OpcodeArgs) { - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result; + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Result; const auto Size = GetSrcBitSize(Op); const auto OpSize = Size == 64 ? OpSize::i64Bit : OpSize::i32Bit; @@ -3190,8 +3075,7 @@ void OpDispatchBuilder::XADDOp(OpcodeArgs) { StoreResult(GPRClass, Op, Result, -1); GenerateFlags_ADD(Op, Result, Dest, Src); - } - else { + } else { HandledLock = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK; Dest = AppendSegmentOffset(Dest, Op->Flags); auto Before = _AtomicFetchAdd(OpSizeFromSrc(Op), Src, Dest); @@ -3203,7 +3087,7 @@ void OpDispatchBuilder::XADDOp(OpcodeArgs) { } void OpDispatchBuilder::PopcountOp(OpcodeArgs) { - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = GetSrcSize(Op) >= 4}); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = GetSrcSize(Op) >= 4}); Src = _Popcount(OpSizeFromSrc(Op), Src); StoreResult(GPRClass, Op, Src, -1); @@ -3219,7 +3103,8 @@ void OpDispatchBuilder::DAAOp(OpcodeArgs) { SetRFLAG(_Constant(0)); CalculateDeferredFlags(); - auto Cond = _Or(OpSize::i64Bit, AF, _Select(FEXCore::IR::COND_UGT, _And(OpSize::i64Bit, AL, _Constant(0xF)), _Constant(9), _Constant(1), _Constant(0))); + auto Cond = _Or(OpSize::i64Bit, AF, + _Select(FEXCore::IR::COND_UGT, _And(OpSize::i64Bit, AL, _Constant(0xF)), _Constant(9), _Constant(1), _Constant(0))); auto FalseBlock = CreateNewCodeBlockAfter(GetCurrentBlock()); auto TrueBlock = CreateNewCodeBlockAfter(FalseBlock); auto EndBlock = CreateNewCodeBlockAfter(TrueBlock); @@ -3279,8 +3164,10 @@ void OpDispatchBuilder::DAAOp(OpcodeArgs) { // Update Flags AL = LoadGPRRegister(X86State::REG_RAX, 1); - SetRFLAG(_Select(FEXCore::IR::COND_UGE, _And(OpSize::i64Bit, AL, _Constant(0x80)), _Constant(0), _Constant(1), _Constant(0))); - SetRFLAG(_Select(FEXCore::IR::COND_EQ, _And(OpSize::i64Bit, AL, _Constant(0xFF)), _Constant(0), _Constant(1), _Constant(0))); + SetRFLAG( + _Select(FEXCore::IR::COND_UGE, _And(OpSize::i64Bit, AL, _Constant(0x80)), _Constant(0), _Constant(1), _Constant(0))); + SetRFLAG( + _Select(FEXCore::IR::COND_EQ, _And(OpSize::i64Bit, AL, _Constant(0xFF)), _Constant(0), _Constant(1), _Constant(0))); CalculatePF(AL); FixupAF(); } @@ -3294,7 +3181,8 @@ void OpDispatchBuilder::DASOp(OpcodeArgs) { SetRFLAG(_Constant(0)); CalculateDeferredFlags(); - auto Cond = _Or(OpSize::i64Bit, AF, _Select(FEXCore::IR::COND_UGT, _And(OpSize::i64Bit, AL, _Constant(0xf)), _Constant(9), _Constant(1), _Constant(0))); + auto Cond = _Or(OpSize::i64Bit, AF, + _Select(FEXCore::IR::COND_UGT, _And(OpSize::i64Bit, AL, _Constant(0xf)), _Constant(9), _Constant(1), _Constant(0))); auto FalseBlock = CreateNewCodeBlockAfter(GetCurrentBlock()); auto TrueBlock = CreateNewCodeBlockAfter(FalseBlock); auto EndBlock = CreateNewCodeBlockAfter(TrueBlock); @@ -3352,8 +3240,10 @@ void OpDispatchBuilder::DASOp(OpcodeArgs) { StartNewBlock(); // Update Flags AL = LoadGPRRegister(X86State::REG_RAX, 1); - SetRFLAG(_Select(FEXCore::IR::COND_UGE, _And(OpSize::i64Bit, AL, _Constant(0x80)), _Constant(0), _Constant(1), _Constant(0))); - SetRFLAG(_Select(FEXCore::IR::COND_EQ, _And(OpSize::i64Bit, AL, _Constant(0xFF)), _Constant(0), _Constant(1), _Constant(0))); + SetRFLAG( + _Select(FEXCore::IR::COND_UGE, _And(OpSize::i64Bit, AL, _Constant(0x80)), _Constant(0), _Constant(1), _Constant(0))); + SetRFLAG( + _Select(FEXCore::IR::COND_EQ, _And(OpSize::i64Bit, AL, _Constant(0xFF)), _Constant(0), _Constant(1), _Constant(0))); CalculatePF(AL); FixupAF(); } @@ -3364,7 +3254,8 @@ void OpDispatchBuilder::AAAOp(OpcodeArgs) { auto AF = LoadAF(); auto AL = LoadGPRRegister(X86State::REG_RAX, 1); auto AX = LoadGPRRegister(X86State::REG_RAX, 2); - auto Cond = _Or(OpSize::i64Bit, AF, _Select(FEXCore::IR::COND_UGT, _And(OpSize::i64Bit, AL, _Constant(0xF)), _Constant(9), _Constant(1), _Constant(0))); + auto Cond = _Or(OpSize::i64Bit, AF, + _Select(FEXCore::IR::COND_UGT, _And(OpSize::i64Bit, AL, _Constant(0xF)), _Constant(9), _Constant(1), _Constant(0))); auto FalseBlock = CreateNewCodeBlockAfter(GetCurrentBlock()); auto TrueBlock = CreateNewCodeBlockAfter(FalseBlock); @@ -3404,7 +3295,8 @@ void OpDispatchBuilder::AASOp(OpcodeArgs) { auto AF = LoadAF(); auto AL = LoadGPRRegister(X86State::REG_RAX, 1); auto AX = LoadGPRRegister(X86State::REG_RAX, 2); - auto Cond = _Or(OpSize::i64Bit, AF, _Select(FEXCore::IR::COND_UGT, _And(OpSize::i64Bit, AL, _Constant(0xF)), _Constant(9), _Constant(1), _Constant(0))); + auto Cond = _Or(OpSize::i64Bit, AF, + _Select(FEXCore::IR::COND_UGT, _And(OpSize::i64Bit, AL, _Constant(0xF)), _Constant(9), _Constant(1), _Constant(0))); auto FalseBlock = CreateNewCodeBlockAfter(GetCurrentBlock()); auto TrueBlock = CreateNewCodeBlockAfter(FalseBlock); @@ -3474,8 +3366,8 @@ void OpDispatchBuilder::AADOp(OpcodeArgs) { } void OpDispatchBuilder::XLATOp(OpcodeArgs) { - OrderedNode *Src = LoadGPRRegister(X86State::REG_RBX); - OrderedNode *Offset = LoadGPRRegister(X86State::REG_RAX, 1); + OrderedNode* Src = LoadGPRRegister(X86State::REG_RBX); + OrderedNode* Offset = LoadGPRRegister(X86State::REG_RAX, 1); Src = AppendSegmentOffset(Src, Op->Flags, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX); Src = _Add(OpSize::i64Bit, Src, Offset); @@ -3490,11 +3382,10 @@ void OpDispatchBuilder::ReadSegmentReg(OpcodeArgs) { // 64-bit only // Doesn't hit the segment register optimization auto Size = GetSrcSize(Op); - OrderedNode *Src{}; + OrderedNode* Src {}; if constexpr (Seg == Segment::FS) { Src = _LoadContext(Size, GPRClass, offsetof(FEXCore::Core::CPUState, fs_cached)); - } - else { + } else { Src = _LoadContext(Size, GPRClass, offsetof(FEXCore::Core::CPUState, gs_cached)); } @@ -3506,11 +3397,10 @@ void OpDispatchBuilder::WriteSegmentReg(OpcodeArgs) { // Documentation claims that the 32-bit version of this instruction inserts in to the lower 32-bits of the segment // This is incorrect and it instead zero extends the 32-bit value to 64-bit auto Size = GetDstSize(Op); - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); if constexpr (Seg == Segment::FS) { _StoreContext(Size, GPRClass, Src, offsetof(FEXCore::Core::CPUState, fs_cached)); - } - else { + } else { _StoreContext(Size, GPRClass, Src, offsetof(FEXCore::Core::CPUState, gs_cached)); } } @@ -3524,7 +3414,7 @@ void OpDispatchBuilder::EnterOp(OpcodeArgs) { const uint16_t AllocSpace = Value & 0xFFFF; const uint8_t Level = (Value >> 16) & 0x1F; - const auto PushValue = [&](uint8_t Size, OrderedNode *Src) -> OrderedNode* { + const auto PushValue = [&](uint8_t Size, OrderedNode* Src) -> OrderedNode* { const uint8_t GPRSize = CTX->GetGPRSize(); auto OldSP = LoadGPRRegister(X86State::REG_RSP); @@ -3578,15 +3468,14 @@ void OpDispatchBuilder::SGDTOp(OpcodeArgs) { OpDispatchBuilder::CycleCounterPair OpDispatchBuilder::CycleCounter() { - OrderedNode *CounterLow{}; - OrderedNode *CounterHigh{}; + OrderedNode* CounterLow {}; + OrderedNode* CounterHigh {}; auto Counter = _CycleCounter(); if (CTX->Config.SmallTSCScale()) { const auto ShiftAmount = FEXCore::ilog2(FEXCore::Context::TSC_SCALE); CounterLow = _Lshl(OpSize::i32Bit, Counter, _Constant(ShiftAmount)); CounterHigh = _Lshr(OpSize::i64Bit, Counter, _Constant(32 - ShiftAmount)); - } - else { + } else { CounterLow = _Bfe(OpSize::i64Bit, 32, 0, Counter); CounterHigh = _Bfe(OpSize::i64Bit, 32, 32, Counter); } @@ -3610,8 +3499,8 @@ void OpDispatchBuilder::INCOp(OpcodeArgs) { return; } - OrderedNode *Dest; - OrderedNode *Result; + OrderedNode* Dest; + OrderedNode* Result; const auto Size = GetSrcBitSize(Op); auto OneConst = _Constant(Size, 1); @@ -3642,8 +3531,8 @@ void OpDispatchBuilder::DECOp(OpcodeArgs) { return; } - OrderedNode *Dest; - OrderedNode *Result; + OrderedNode* Dest; + OrderedNode* Result; const auto Size = GetSrcBitSize(Op); auto OneConst = _Constant(Size, 1); @@ -3677,8 +3566,8 @@ void OpDispatchBuilder::STOSOp(OpcodeArgs) { const bool Repeat = (Op->Flags & (FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX | FEXCore::X86Tables::DecodeFlags::FLAG_REPNE_PREFIX)) != 0; if (!Repeat) { - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Dest = LoadGPRRegister(X86State::REG_RDI); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadGPRRegister(X86State::REG_RDI); // Only ES prefix Dest = AppendSegmentOffset(Dest, 0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true); @@ -3692,22 +3581,21 @@ void OpDispatchBuilder::STOSOp(OpcodeArgs) { auto PtrDir = _SubShift(IR::SizeToOpSize(CTX->GetGPRSize()), SizeConst, DF, ShiftType::LSL, FEXCore::ilog2(Size) + 1); // Offset the pointer - OrderedNode *TailDest = LoadGPRRegister(X86State::REG_RDI); + OrderedNode* TailDest = LoadGPRRegister(X86State::REG_RDI); TailDest = _Add(OpSize::i64Bit, TailDest, PtrDir); StoreGPRRegister(X86State::REG_RDI, TailDest); - } - else { + } else { // FEX doesn't support partial faulting REP instructions. // Converting this to a `MemSet` IR op optimizes this quite significantly in our codegen. // If FEX is to gain support for faulting REP instructions, then this implementation needs to change significantly. - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Dest = LoadGPRRegister(X86State::REG_RDI); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadGPRRegister(X86State::REG_RDI); // Only ES prefix auto Segment = GetSegment(0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true); - OrderedNode *Counter = LoadGPRRegister(X86State::REG_RCX); + OrderedNode* Counter = LoadGPRRegister(X86State::REG_RCX); auto DF = GetRFLAG(FEXCore::X86State::RFLAG_DF_LOC); auto Result = _MemSet(CTX->IsAtomicTSOEnabled(), Size, Segment ?: InvalidNode, Dest, Src, Counter, DF); @@ -3737,25 +3625,21 @@ void OpDispatchBuilder::MOVSOp(OpcodeArgs) { auto DstSegment = GetSegment(0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true); auto SrcSegment = GetSegment(Op->Flags, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX); - auto Result = _MemCpy(CTX->IsAtomicTSOEnabled(), Size, - DstSegment ?: InvalidNode, - SrcSegment ?: InvalidNode, - DstAddr, SrcAddr, Counter, DF); + auto Result = _MemCpy(CTX->IsAtomicTSOEnabled(), Size, DstSegment ?: InvalidNode, SrcSegment ?: InvalidNode, DstAddr, SrcAddr, Counter, DF); - OrderedNode *Result_Dst = _ExtractElementPair(OpSize::i64Bit, Result, 0); - OrderedNode *Result_Src = _ExtractElementPair(OpSize::i64Bit, Result, 1); + OrderedNode* Result_Dst = _ExtractElementPair(OpSize::i64Bit, Result, 0); + OrderedNode* Result_Src = _ExtractElementPair(OpSize::i64Bit, Result, 1); StoreGPRRegister(X86State::REG_RCX, _Constant(0)); StoreGPRRegister(X86State::REG_RDI, Result_Dst); StoreGPRRegister(X86State::REG_RSI, Result_Src); - } - else { + } else { auto SizeConst = _Constant(Size); auto PtrDir = _SubShift(IR::SizeToOpSize(CTX->GetGPRSize()), SizeConst, DF, ShiftType::LSL, FEXCore::ilog2(Size) + 1); - OrderedNode *RSI = LoadGPRRegister(X86State::REG_RSI); - OrderedNode *RDI = LoadGPRRegister(X86State::REG_RDI); - RDI= AppendSegmentOffset(RDI, 0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true); + OrderedNode* RSI = LoadGPRRegister(X86State::REG_RSI); + OrderedNode* RDI = LoadGPRRegister(X86State::REG_RDI); + RDI = AppendSegmentOffset(RDI, 0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true); RSI = AppendSegmentOffset(RSI, Op->Flags, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX); auto Src = _LoadMemAutoTSO(GPRClass, Size, RSI, Size); @@ -3782,8 +3666,8 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) { bool Repeat = Op->Flags & (FEXCore::X86Tables::DecodeFlags::FLAG_REPNE_PREFIX | FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX); if (!Repeat) { - OrderedNode *Dest_RSI = LoadGPRRegister(X86State::REG_RSI); - OrderedNode *Dest_RDI = LoadGPRRegister(X86State::REG_RDI); + OrderedNode* Dest_RSI = LoadGPRRegister(X86State::REG_RSI); + OrderedNode* Dest_RDI = LoadGPRRegister(X86State::REG_RDI); // Only ES prefix Dest_RDI = AppendSegmentOffset(Dest_RDI, 0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true); @@ -3807,8 +3691,7 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) { // Offset second pointer Dest_RSI = _Add(OpSize::i64Bit, Dest_RSI, PtrDir); StoreGPRRegister(X86State::REG_RSI, Dest_RSI); - } - else { + } else { // Calculate flags early. CalculateDeferredFlags(); @@ -3826,7 +3709,7 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) { SetCurrentCodeBlock(LoopStart); StartNewBlock(); - OrderedNode *Counter = LoadGPRRegister(X86State::REG_RCX); + OrderedNode* Counter = LoadGPRRegister(X86State::REG_RCX); // Can we end the block? auto CondJump_ = CondJump(Counter, {COND_EQ}); @@ -3839,8 +3722,8 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) { // Working loop { - OrderedNode *Dest_RSI = LoadGPRRegister(X86State::REG_RSI); - OrderedNode *Dest_RDI = LoadGPRRegister(X86State::REG_RDI); + OrderedNode* Dest_RSI = LoadGPRRegister(X86State::REG_RSI); + OrderedNode* Dest_RDI = LoadGPRRegister(X86State::REG_RDI); // Only ES prefix Dest_RDI = AppendSegmentOffset(Dest_RDI, 0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true); @@ -3856,7 +3739,7 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) { // Calculate flags early. CalculateDeferredFlags(); - OrderedNode *TailCounter = LoadGPRRegister(X86State::REG_RCX); + OrderedNode* TailCounter = LoadGPRRegister(X86State::REG_RCX); // Decrement counter TailCounter = _Sub(OpSize::i64Bit, TailCounter, _Constant(1)); @@ -3872,7 +3755,7 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) { Dest_RSI = _Add(OpSize::i64Bit, Dest_RSI, PtrDir); StoreGPRRegister(X86State::REG_RSI, Dest_RSI); - OrderedNode *ZF = GetRFLAG(FEXCore::X86State::RFLAG_ZF_RAW_LOC); + OrderedNode* ZF = GetRFLAG(FEXCore::X86State::RFLAG_ZF_RAW_LOC); CalculateDeferredFlags(); InternalCondJump = CondJump(ZF, {REPE ? COND_NEQ : COND_EQ}); @@ -3902,7 +3785,7 @@ void OpDispatchBuilder::LODSOp(OpcodeArgs) { const bool Repeat = (Op->Flags & (FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX | FEXCore::X86Tables::DecodeFlags::FLAG_REPNE_PREFIX)) != 0; if (!Repeat) { - OrderedNode *Dest_RSI = LoadGPRRegister(X86State::REG_RSI); + OrderedNode* Dest_RSI = LoadGPRRegister(X86State::REG_RSI); Dest_RSI = AppendSegmentOffset(Dest_RSI, Op->Flags, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX); auto Src = _LoadMemAutoTSO(GPRClass, Size, Dest_RSI, Size); @@ -3914,12 +3797,11 @@ void OpDispatchBuilder::LODSOp(OpcodeArgs) { auto PtrDir = _SubShift(IR::SizeToOpSize(CTX->GetGPRSize()), SizeConst, DF, ShiftType::LSL, FEXCore::ilog2(Size) + 1); // Offset the pointer - OrderedNode *TailDest_RSI = LoadGPRRegister(X86State::REG_RSI); + OrderedNode* TailDest_RSI = LoadGPRRegister(X86State::REG_RSI); TailDest_RSI = _Add(OpSize::i64Bit, TailDest_RSI, PtrDir); StoreGPRRegister(X86State::REG_RSI, TailDest_RSI); - } - else { + } else { // Calculate flags early. because end of block CalculateDeferredFlags(); @@ -3941,7 +3823,7 @@ void OpDispatchBuilder::LODSOp(OpcodeArgs) { SetCurrentCodeBlock(LoopStart); StartNewBlock(); - OrderedNode *Counter = LoadGPRRegister(X86State::REG_RCX); + OrderedNode* Counter = LoadGPRRegister(X86State::REG_RCX); // Can we end the block? @@ -3955,7 +3837,7 @@ void OpDispatchBuilder::LODSOp(OpcodeArgs) { // Working loop { - OrderedNode *Dest_RSI = LoadGPRRegister(X86State::REG_RSI); + OrderedNode* Dest_RSI = LoadGPRRegister(X86State::REG_RSI); Dest_RSI = AppendSegmentOffset(Dest_RSI, Op->Flags, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX); @@ -3963,8 +3845,8 @@ void OpDispatchBuilder::LODSOp(OpcodeArgs) { StoreResult(GPRClass, Op, Src, -1); - OrderedNode *TailCounter = LoadGPRRegister(X86State::REG_RCX); - OrderedNode *TailDest_RSI = LoadGPRRegister(X86State::REG_RSI); + OrderedNode* TailCounter = LoadGPRRegister(X86State::REG_RCX); + OrderedNode* TailDest_RSI = LoadGPRRegister(X86State::REG_RSI); // Decrement counter TailCounter = _Sub(OpSize::i64Bit, TailCounter, _Constant(1)); @@ -3998,7 +3880,7 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) { const bool Repeat = (Op->Flags & (FEXCore::X86Tables::DecodeFlags::FLAG_REPNE_PREFIX | FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX)) != 0; if (!Repeat) { - OrderedNode *Dest_RDI = LoadGPRRegister(X86State::REG_RDI); + OrderedNode* Dest_RDI = LoadGPRRegister(X86State::REG_RDI); Dest_RDI = AppendSegmentOffset(Dest_RDI, 0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true); auto Src1 = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); @@ -4012,12 +3894,11 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) { auto PtrDir = _SubShift(IR::SizeToOpSize(CTX->GetGPRSize()), SizeConst, DF, ShiftType::LSL, FEXCore::ilog2(Size) + 1); // Offset the pointer - OrderedNode *TailDest_RDI = LoadGPRRegister(X86State::REG_RDI); + OrderedNode* TailDest_RDI = LoadGPRRegister(X86State::REG_RDI); TailDest_RDI = _Add(OpSize::i64Bit, TailDest_RDI, PtrDir); StoreGPRRegister(X86State::REG_RDI, TailDest_RDI); - } - else { + } else { // Calculate flags early. because end of block CalculateDeferredFlags(); @@ -4035,7 +3916,7 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) { SetCurrentCodeBlock(LoopStart); StartNewBlock(); - OrderedNode *Counter = LoadGPRRegister(X86State::REG_RCX); + OrderedNode* Counter = LoadGPRRegister(X86State::REG_RCX); // Can we end the block? // We leave if RCX = 0 @@ -4049,7 +3930,7 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) { // Working loop { - OrderedNode *Dest_RDI = LoadGPRRegister(X86State::REG_RDI); + OrderedNode* Dest_RDI = LoadGPRRegister(X86State::REG_RDI); Dest_RDI = AppendSegmentOffset(Dest_RDI, 0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true); @@ -4062,8 +3943,8 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) { // Calculate flags early. CalculateDeferredFlags(); - OrderedNode *TailCounter = LoadGPRRegister(X86State::REG_RCX); - OrderedNode *TailDest_RDI = LoadGPRRegister(X86State::REG_RDI); + OrderedNode* TailCounter = LoadGPRRegister(X86State::REG_RCX); + OrderedNode* TailDest_RDI = LoadGPRRegister(X86State::REG_RDI); // Decrement counter TailCounter = _Sub(OpSize::i64Bit, TailCounter, _Constant(1)); @@ -4075,7 +3956,7 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) { TailDest_RDI = _Add(OpSize::i64Bit, TailDest_RDI, PtrDir); StoreGPRRegister(X86State::REG_RDI, TailDest_RDI); - OrderedNode *ZF = GetRFLAG(FEXCore::X86State::RFLAG_ZF_RAW_LOC); + OrderedNode* ZF = GetRFLAG(FEXCore::X86State::RFLAG_ZF_RAW_LOC); CalculateDeferredFlags(); InternalCondJump = CondJump(ZF, {REPE ? COND_NEQ : COND_EQ}); @@ -4094,13 +3975,12 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) { } void OpDispatchBuilder::BSWAPOp(OpcodeArgs) { - OrderedNode *Dest; + OrderedNode* Dest; const auto Size = GetSrcSize(Op); if (Size == 2) { // BSWAP of 16bit is undef. ZEN+ causes the lower 16bits to get zero'd Dest = _Constant(0); - } - else { + } else { Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, CTX->GetGPRSize(), Op->Flags); Dest = _Rev(IR::SizeToOpSize(Size), Dest); } @@ -4110,7 +3990,7 @@ void OpDispatchBuilder::BSWAPOp(OpcodeArgs) { void OpDispatchBuilder::PUSHFOp(OpcodeArgs) { const uint8_t Size = GetSrcSize(Op); - OrderedNode *Src = GetPackedRFLAG(); + OrderedNode* Src = GetPackedRFLAG(); if (Size != 8) { Src = _Bfe(OpSize::i32Bit, Size * 8, 0, Src); } @@ -4129,7 +4009,7 @@ void OpDispatchBuilder::POPFOp(OpcodeArgs) { auto Constant = _Constant(Size); auto OldSP = LoadGPRRegister(X86State::REG_RSP); - OrderedNode *Src = _LoadMem(GPRClass, Size, OldSP, Size); + OrderedNode* Src = _LoadMem(GPRClass, Size, OldSP, Size); auto NewSP = _Add(OpSize::i64Bit, OldSP, Constant); // Store the new stack pointer @@ -4150,17 +4030,16 @@ void OpDispatchBuilder::NEGOp(OpcodeArgs) { auto Size = GetSrcSize(Op); auto ZeroConst = _Constant(0); - OrderedNode *Dest{}; - OrderedNode *Result{}; + OrderedNode* Dest {}; + OrderedNode* Result {}; if (DestIsLockedMem(Op)) { - OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); DestMem = AppendSegmentOffset(DestMem, Op->Flags); Dest = _AtomicFetchNeg(IR::SizeToOpSize(Size), DestMem); Result = _Neg(Size == 8 ? OpSize::i64Bit : OpSize::i32Bit, Dest); - } - else { + } else { Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); Result = _Neg(Size == 8 ? OpSize::i64Bit : OpSize::i32Bit, Dest); @@ -4172,13 +4051,13 @@ void OpDispatchBuilder::NEGOp(OpcodeArgs) { void OpDispatchBuilder::DIVOp(OpcodeArgs) { // This loads the divisor - OrderedNode *Divisor = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Divisor = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); const auto GPRSize = CTX->GetGPRSize(); const auto Size = GetSrcSize(Op); if (Size == 1) { - OrderedNode *Src1 = LoadGPRRegister(X86State::REG_RAX, 2); + OrderedNode* Src1 = LoadGPRRegister(X86State::REG_RAX, 2); auto UDivOp = _UDiv(OpSize::i16Bit, Src1, Divisor); auto URemOp = _URem(OpSize::i16Bit, Src1, Divisor); @@ -4186,34 +4065,31 @@ void OpDispatchBuilder::DIVOp(OpcodeArgs) { // AX[15:0] = concat auto ResultAX = _Bfi(IR::SizeToOpSize(GPRSize), 8, 8, UDivOp, URemOp); StoreGPRRegister(X86State::REG_RAX, ResultAX, 2); - } - else if (Size == 2) { - OrderedNode *Src1 = LoadGPRRegister(X86State::REG_RAX, Size); - OrderedNode *Src2 = LoadGPRRegister(X86State::REG_RDX, Size); + } else if (Size == 2) { + OrderedNode* Src1 = LoadGPRRegister(X86State::REG_RAX, Size); + OrderedNode* Src2 = LoadGPRRegister(X86State::REG_RDX, Size); auto UDivOp = _LUDiv(OpSize::i16Bit, Src1, Src2, Divisor); auto URemOp = _LURem(OpSize::i16Bit, Src1, Src2, Divisor); StoreGPRRegister(X86State::REG_RAX, UDivOp, Size); StoreGPRRegister(X86State::REG_RDX, URemOp, Size); - } - else if (Size == 4) { - OrderedNode *Src1 = LoadGPRRegister(X86State::REG_RAX, Size); - OrderedNode *Src2 = LoadGPRRegister(X86State::REG_RDX, Size); + } else if (Size == 4) { + OrderedNode* Src1 = LoadGPRRegister(X86State::REG_RAX, Size); + OrderedNode* Src2 = LoadGPRRegister(X86State::REG_RDX, Size); - OrderedNode *UDivOp = _Bfe(OpSize::i32Bit, Size * 8, 0, _LUDiv(OpSize::i32Bit, Src1, Src2, Divisor)); - OrderedNode *URemOp = _Bfe(OpSize::i32Bit, Size * 8, 0, _LURem(OpSize::i32Bit, Src1, Src2, Divisor)); + OrderedNode* UDivOp = _Bfe(OpSize::i32Bit, Size * 8, 0, _LUDiv(OpSize::i32Bit, Src1, Src2, Divisor)); + OrderedNode* URemOp = _Bfe(OpSize::i32Bit, Size * 8, 0, _LURem(OpSize::i32Bit, Src1, Src2, Divisor)); StoreGPRRegister(X86State::REG_RAX, UDivOp); StoreGPRRegister(X86State::REG_RDX, URemOp); - } - else if (Size == 8) { + } else if (Size == 8) { if (!CTX->Config.Is64BitMode) { LogMan::Msg::EFmt("Doesn't exist in 32bit mode"); DecodeFailure = true; return; } - OrderedNode *Src1 = LoadGPRRegister(X86State::REG_RAX); - OrderedNode *Src2 = LoadGPRRegister(X86State::REG_RDX); + OrderedNode* Src1 = LoadGPRRegister(X86State::REG_RAX); + OrderedNode* Src2 = LoadGPRRegister(X86State::REG_RDX); auto UDivOp = _LUDiv(OpSize::i64Bit, Src1, Src2, Divisor); auto URemOp = _LURem(OpSize::i64Bit, Src1, Src2, Divisor); @@ -4225,13 +4101,13 @@ void OpDispatchBuilder::DIVOp(OpcodeArgs) { void OpDispatchBuilder::IDIVOp(OpcodeArgs) { // This loads the divisor - OrderedNode *Divisor = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Divisor = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); const auto GPRSize = CTX->GetGPRSize(); const auto Size = GetSrcSize(Op); if (Size == 1) { - OrderedNode *Src1 = LoadGPRRegister(X86State::REG_RAX, 2); + OrderedNode* Src1 = LoadGPRRegister(X86State::REG_RAX, 2); Src1 = _Sbfe(OpSize::i64Bit, 16, 0, Src1); Divisor = _Sbfe(OpSize::i64Bit, 8, 0, Divisor); @@ -4241,34 +4117,31 @@ void OpDispatchBuilder::IDIVOp(OpcodeArgs) { // AX[15:0] = concat auto ResultAX = _Bfi(IR::SizeToOpSize(GPRSize), 8, 8, UDivOp, URemOp); StoreGPRRegister(X86State::REG_RAX, ResultAX, 2); - } - else if (Size == 2) { - OrderedNode *Src1 = LoadGPRRegister(X86State::REG_RAX, Size); - OrderedNode *Src2 = LoadGPRRegister(X86State::REG_RDX, Size); + } else if (Size == 2) { + OrderedNode* Src1 = LoadGPRRegister(X86State::REG_RAX, Size); + OrderedNode* Src2 = LoadGPRRegister(X86State::REG_RDX, Size); auto UDivOp = _LDiv(OpSize::i16Bit, Src1, Src2, Divisor); auto URemOp = _LRem(OpSize::i16Bit, Src1, Src2, Divisor); StoreGPRRegister(X86State::REG_RAX, UDivOp, Size); StoreGPRRegister(X86State::REG_RDX, URemOp, Size); - } - else if (Size == 4) { - OrderedNode *Src1 = LoadGPRRegister(X86State::REG_RAX, Size); - OrderedNode *Src2 = LoadGPRRegister(X86State::REG_RDX, Size); + } else if (Size == 4) { + OrderedNode* Src1 = LoadGPRRegister(X86State::REG_RAX, Size); + OrderedNode* Src2 = LoadGPRRegister(X86State::REG_RDX, Size); - OrderedNode *UDivOp = _Bfe(OpSize::i32Bit, Size * 8, 0, _LDiv(OpSize::i32Bit, Src1, Src2, Divisor)); - OrderedNode *URemOp = _Bfe(OpSize::i32Bit, Size * 8, 0, _LRem(OpSize::i32Bit, Src1, Src2, Divisor)); + OrderedNode* UDivOp = _Bfe(OpSize::i32Bit, Size * 8, 0, _LDiv(OpSize::i32Bit, Src1, Src2, Divisor)); + OrderedNode* URemOp = _Bfe(OpSize::i32Bit, Size * 8, 0, _LRem(OpSize::i32Bit, Src1, Src2, Divisor)); StoreGPRRegister(X86State::REG_RAX, UDivOp); StoreGPRRegister(X86State::REG_RDX, URemOp); - } - else if (Size == 8) { + } else if (Size == 8) { if (!CTX->Config.Is64BitMode) { LogMan::Msg::EFmt("Doesn't exist in 32bit mode"); DecodeFailure = true; return; } - OrderedNode *Src1 = LoadGPRRegister(X86State::REG_RAX); - OrderedNode *Src2 = LoadGPRRegister(X86State::REG_RDX); + OrderedNode* Src1 = LoadGPRRegister(X86State::REG_RAX); + OrderedNode* Src2 = LoadGPRRegister(X86State::REG_RDX); auto UDivOp = _LDiv(OpSize::i64Bit, Src1, Src2, Divisor); auto URemOp = _LRem(OpSize::i64Bit, Src1, Src2, Divisor); @@ -4281,8 +4154,8 @@ void OpDispatchBuilder::IDIVOp(OpcodeArgs) { void OpDispatchBuilder::BSFOp(OpcodeArgs) { const uint8_t GPRSize = CTX->GetGPRSize(); const uint8_t DstSize = GetDstSize(Op) == 2 ? 2 : GPRSize; - OrderedNode *Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, DstSize, Op->Flags); - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, DstSize, Op->Flags); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); InvalidateDeferredFlags(); CachedNZCV = nullptr; @@ -4295,15 +4168,15 @@ void OpDispatchBuilder::BSFOp(OpcodeArgs) { SetNZ_ZeroCV(GetSrcSize(Op), Src); // If Src was zero then the destination doesn't get modified - auto SelectOp = _NZCVSelect(IR::SizeToOpSize(GPRSize), CondClassType{COND_EQ}, Dest, Result); + auto SelectOp = _NZCVSelect(IR::SizeToOpSize(GPRSize), CondClassType {COND_EQ}, Dest, Result); StoreResult_WithOpSize(GPRClass, Op, Op->Dest, SelectOp, DstSize, -1); } void OpDispatchBuilder::BSROp(OpcodeArgs) { const uint8_t GPRSize = CTX->GetGPRSize(); const uint8_t DstSize = GetDstSize(Op) == 2 ? 2 : GPRSize; - OrderedNode *Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, DstSize, Op->Flags); - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, DstSize, Op->Flags); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); InvalidateDeferredFlags(); CachedNZCV = nullptr; @@ -4316,31 +4189,31 @@ void OpDispatchBuilder::BSROp(OpcodeArgs) { SetNZ_ZeroCV(GetSrcSize(Op), Src); // If Src was zero then the destination doesn't get modified - auto SelectOp = _NZCVSelect(IR::SizeToOpSize(GPRSize), CondClassType{COND_EQ}, Dest, Result); + auto SelectOp = _NZCVSelect(IR::SizeToOpSize(GPRSize), CondClassType {COND_EQ}, Dest, Result); StoreResult_WithOpSize(GPRClass, Op, Op->Dest, SelectOp, DstSize, -1); } void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) { -// CMPXCHG ModRM, reg, {RAX} -// MemData = *ModRM.dest -// if (RAX == MemData) -// modRM.dest = reg; -// ZF = 1 -// else -// ZF = 0 -// RAX = MemData -// -// CASL Xs, Xt, Xn -// MemData = *Xn -// if (MemData == Xs) -// *Xn = Xt -// Xs = MemData + // CMPXCHG ModRM, reg, {RAX} + // MemData = *ModRM.dest + // if (RAX == MemData) + // modRM.dest = reg; + // ZF = 1 + // else + // ZF = 0 + // RAX = MemData + // + // CASL Xs, Xt, Xn + // MemData = *Xn + // if (MemData == Xs) + // *Xn = Xt + // Xs = MemData const auto GPRSize = CTX->GetGPRSize(); auto Size = GetSrcSize(Op); // This is our source register - OrderedNode *Src2 = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); // 0x80014000 // 0x80064000 // 0x80064000 @@ -4349,20 +4222,17 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) { // If the destination is also the accumulator, we get some algebraic // simplifications. Not sure if this is actually hit but it's in // InstCountCI. - bool Trivial = Op->Dest.Data.GPR.GPR == X86State::REG_RAX && - !Op->Dest.IsGPRDirect() && - !Op->Dest.Data.GPR.HighBits; + bool Trivial = Op->Dest.Data.GPR.GPR == X86State::REG_RAX && !Op->Dest.IsGPRDirect() && !Op->Dest.Data.GPR.HighBits; - OrderedNode *Src1{}; - OrderedNode *Src1Lower{}; + OrderedNode* Src1 {}; + OrderedNode* Src1Lower {}; - OrderedNode *Src3{}; - OrderedNode *Src3Lower{}; + OrderedNode* Src3 {}; + OrderedNode* Src3Lower {}; if (GPRSize == 8 && Size == 4) { Src1 = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, GPRSize, Op->Flags); Src3 = LoadGPRRegister(X86State::REG_RAX); - } - else { + } else { Src1 = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, Size, Op->Flags); Src3 = LoadGPRRegister(X86State::REG_RAX); } @@ -4370,27 +4240,24 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) { if (Size != GPRSize) { Src1Lower = _Bfe(IR::SizeToOpSize(GPRSize), Size * 8, 0, Src1); Src3Lower = _Bfe(IR::SizeToOpSize(GPRSize), Size * 8, 0, Src3); - } - else { + } else { Src1Lower = Src1; Src3Lower = Src3; } // Compare RAX with the destination, setting flags accordingly. - OrderedNode *Result = _Sub(IR::SizeToOpSize(GPRSize), Src3Lower, Src1Lower); + OrderedNode* Result = _Sub(IR::SizeToOpSize(GPRSize), Src3Lower, Src1Lower); GenerateFlags_SUB(Op, Result, Src3Lower, Src1Lower); CalculateDeferredFlags(); if (!Trivial) { if (GPRSize == 8 && Size == 4) { // This allows us to only hit the ZEXT case on failure - OrderedNode *RAXResult = _NZCVSelect(IR::i64Bit, CondClassType{COND_EQ}, - Src3, Src1Lower); + OrderedNode* RAXResult = _NZCVSelect(IR::i64Bit, CondClassType {COND_EQ}, Src3, Src1Lower); // When the size is 4 we need to make sure not zext the GPR when the comparison fails StoreGPRRegister(X86State::REG_RAX, RAXResult); - } - else { + } else { StoreGPRRegister(X86State::REG_RAX, Src1Lower, Size); } } @@ -4398,8 +4265,7 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) { // Op1 = RAX == Op1 ? Op2 : Op1 // If they match then set the rm operand to the input // else don't set the rm operand - OrderedNode *DestResult = - Trivial ? Src2 : _NZCVSelect(IR::i64Bit, CondClassType{COND_EQ}, Src2, Src1); + OrderedNode* DestResult = Trivial ? Src2 : _NZCVSelect(IR::i64Bit, CondClassType {COND_EQ}, Src2, Src1); // Store in to GPR Dest if (GPRSize == 8 && Size == 4) { @@ -4407,22 +4273,20 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) { } else { StoreResult(GPRClass, Op, DestResult, -1); } - } - else { + } else { HandledLock = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK; - OrderedNode *Src3{}; - OrderedNode *Src3Lower{}; + OrderedNode* Src3 {}; + OrderedNode* Src3Lower {}; if (GPRSize == 8 && Size == 4) { Src3 = LoadGPRRegister(X86State::REG_RAX); Src3Lower = _Bfe(OpSize::i32Bit, 32, 0, Src3); - } - else { + } else { Src3 = LoadGPRRegister(X86State::REG_RAX, Size); Src3Lower = Src3; } // If this is a memory location then we want the pointer to it - OrderedNode *Src1 = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* Src1 = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); Src1 = AppendSegmentOffset(Src1, Op->Flags); @@ -4430,14 +4294,12 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) { // if (DataSrc == Src3) { *Src1 == Src2; } Src2 = DataSrc // This will write to memory! Careful! // Third operand must be a calculated guest memory address - OrderedNode *CASResult = _CAS(IR::SizeToOpSize(Size), Src3Lower, Src2, Src1); - OrderedNode *RAXResult = CASResult; + OrderedNode* CASResult = _CAS(IR::SizeToOpSize(Size), Src3Lower, Src2, Src1); + OrderedNode* RAXResult = CASResult; if (GPRSize == 8 && Size == 4) { // This allows us to only hit the ZEXT case on failure - RAXResult = _Select(FEXCore::IR::COND_EQ, - CASResult, Src3Lower, - Src3, CASResult); + RAXResult = _Select(FEXCore::IR::COND_EQ, CASResult, Src3Lower, Src3, CASResult); Size = 8; } @@ -4447,7 +4309,7 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) { const auto Size = GetDstBitSize(Op); - OrderedNode *Result = _Sub(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Src3Lower, CASResult); + OrderedNode* Result = _Sub(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Src3Lower, CASResult); GenerateFlags_SUB(Op, Result, Src3Lower, CASResult); } } @@ -4462,17 +4324,17 @@ void OpDispatchBuilder::CMPXCHGPairOp(OpcodeArgs) { HandledLock = (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK) != 0; // If this is a memory location then we want the pointer to it - OrderedNode *Src1 = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* Src1 = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); Src1 = AppendSegmentOffset(Src1, Op->Flags); - OrderedNode *Expected_Lower = LoadGPRRegister(X86State::REG_RAX, Size); - OrderedNode *Expected_Upper = LoadGPRRegister(X86State::REG_RDX, Size); - OrderedNode *Expected = _CreateElementPair(IR::SizeToOpSize(Size * 2), Expected_Lower, Expected_Upper); + OrderedNode* Expected_Lower = LoadGPRRegister(X86State::REG_RAX, Size); + OrderedNode* Expected_Upper = LoadGPRRegister(X86State::REG_RDX, Size); + OrderedNode* Expected = _CreateElementPair(IR::SizeToOpSize(Size * 2), Expected_Lower, Expected_Upper); - OrderedNode *Desired_Lower = LoadGPRRegister(X86State::REG_RBX, Size); - OrderedNode *Desired_Upper = LoadGPRRegister(X86State::REG_RCX, Size); - OrderedNode *Desired = _CreateElementPair(IR::SizeToOpSize(Size * 2), Desired_Lower, Desired_Upper); + OrderedNode* Desired_Lower = LoadGPRRegister(X86State::REG_RBX, Size); + OrderedNode* Desired_Upper = LoadGPRRegister(X86State::REG_RCX, Size); + OrderedNode* Desired = _CreateElementPair(IR::SizeToOpSize(Size * 2), Desired_Lower, Desired_Upper); // ssa0 = Expected // ssa1 = Desired @@ -4483,18 +4345,16 @@ void OpDispatchBuilder::CMPXCHGPairOp(OpcodeArgs) { // This will write to memory! Careful! // Third operand must be a calculated guest memory address - OrderedNode *CASResult = _CASPair(IR::SizeToOpSize(Size * 2), Expected, Desired, Src1); + OrderedNode* CASResult = _CASPair(IR::SizeToOpSize(Size * 2), Expected, Desired, Src1); - OrderedNode *Result_Lower = _ExtractElementPair(IR::SizeToOpSize(Size), CASResult, 0); - OrderedNode *Result_Upper = _ExtractElementPair(IR::SizeToOpSize(Size), CASResult, 1); + OrderedNode* Result_Lower = _ExtractElementPair(IR::SizeToOpSize(Size), CASResult, 0); + OrderedNode* Result_Upper = _ExtractElementPair(IR::SizeToOpSize(Size), CASResult, 1); // Set ZF if memory result was expected auto OneConst = _Constant(1); auto ZeroConst = _Constant(0); - OrderedNode *ZFResult = _Select(FEXCore::IR::COND_EQ, - CASResult, Expected, - OneConst, ZeroConst); + OrderedNode* ZFResult = _Select(FEXCore::IR::COND_EQ, CASResult, Expected, OneConst, ZeroConst); // Set ZF SetRFLAG(ZFResult); @@ -4519,12 +4379,12 @@ void OpDispatchBuilder::CMPXCHGPairOp(OpcodeArgs) { StartNewBlock(); } -void OpDispatchBuilder::CreateJumpBlocks(fextl::vector const *Blocks) { - OrderedNode *PrevCodeBlock{}; - for (auto &Target : *Blocks) { +void OpDispatchBuilder::CreateJumpBlocks(const fextl::vector* Blocks) { + OrderedNode* PrevCodeBlock {}; + for (auto& Target : *Blocks) { auto CodeNode = CreateCodeNode(); - JumpTargets.try_emplace(Target.Entry, JumpTargetInfo{CodeNode, false}); + JumpTargets.try_emplace(Target.Entry, JumpTargetInfo {CodeNode, false}); if (PrevCodeBlock) { LinkCodeBlocks(PrevCodeBlock, CodeNode); @@ -4534,7 +4394,7 @@ void OpDispatchBuilder::CreateJumpBlocks(fextl::vector const *Blocks, uint32_t NumInstructions) { +void OpDispatchBuilder::BeginFunction(uint64_t RIP, const fextl::vector* Blocks, uint32_t NumInstructions) { Entry = RIP; auto IRHeader = _IRHeader(InvalidNode, RIP, 0, NumInstructions); CreateJumpBlocks(Blocks); @@ -4553,15 +4413,16 @@ void OpDispatchBuilder::Finalize() { const uint8_t GPRSize = CTX->GetGPRSize(); // Node 0 is invalid node - OrderedNode *RealNode = reinterpret_cast(GetNode(1)); + OrderedNode* RealNode = reinterpret_cast(GetNode(1)); - [[maybe_unused]] const FEXCore::IR::IROp_Header *IROp = - RealNode->Op(DualListData.DataBegin()); + [[maybe_unused]] const FEXCore::IR::IROp_Header* IROp = RealNode->Op(DualListData.DataBegin()); LOGMAN_THROW_AA_FMT(IROp->Op == OP_IRHEADER, "First op in function must be our header"); // Let's walk the jump blocks and see if we have handled every block target - for (auto &Handler : JumpTargets) { - if (Handler.second.HaveEmitted) continue; + for (auto& Handler : JumpTargets) { + if (Handler.second.HaveEmitted) { + continue; + } // We haven't emitted. Dump out to the dispatcher SetCurrentCodeBlock(Handler.second.BlockEntry); @@ -4590,19 +4451,17 @@ uint32_t OpDispatchBuilder::GetDstBitSize(X86Tables::DecodedOp Op) const { return GetDstSize(Op) * 8; } -OrderedNode *OpDispatchBuilder::GetSegment(uint32_t Flags, uint32_t DefaultPrefix, bool Override) { +OrderedNode* OpDispatchBuilder::GetSegment(uint32_t Flags, uint32_t DefaultPrefix, bool Override) { const uint8_t GPRSize = CTX->GetGPRSize(); if (CTX->Config.Is64BitMode) { if (Flags & FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX) { return _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, fs_cached)); - } - else if (Flags & FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX) { + } else if (Flags & FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX) { return _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, gs_cached)); } // If there was any other segment in 64bit then it is ignored - } - else { + } else { uint32_t Prefix = Flags & FEXCore::X86Tables::DecodeFlags::FLAG_SEGMENTS; if (!Prefix || Override) { // If there was no prefix then use the default one if available @@ -4610,28 +4469,27 @@ OrderedNode *OpDispatchBuilder::GetSegment(uint32_t Flags, uint32_t DefaultPrefi Prefix = DefaultPrefix; } // With the segment register optimization we store the GDT bases directly in the segment register to remove indexed loads - OrderedNode *SegmentResult{}; + OrderedNode* SegmentResult {}; switch (Prefix) { - case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX: - SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, es_cached)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX: - SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, cs_cached)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX: - SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, ss_cached)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX: - SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, ds_cached)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX: - SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, fs_cached)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX: - SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, gs_cached)); - break; - default: - break; // Do nothing + case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX: + SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, es_cached)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX: + SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, cs_cached)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX: + SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, ss_cached)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX: + SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, ds_cached)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX: + SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, fs_cached)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX: + SegmentResult = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, gs_cached)); + break; + default: break; // Do nothing } CheckLegacySegmentRead(SegmentResult, Prefix); @@ -4640,7 +4498,7 @@ OrderedNode *OpDispatchBuilder::GetSegment(uint32_t Flags, uint32_t DefaultPrefi return nullptr; } -OrderedNode *OpDispatchBuilder::AppendSegmentOffset(OrderedNode *Value, uint32_t Flags, uint32_t DefaultPrefix, bool Override) { +OrderedNode* OpDispatchBuilder::AppendSegmentOffset(OrderedNode* Value, uint32_t Flags, uint32_t DefaultPrefix, bool Override) { auto Segment = GetSegment(Flags, DefaultPrefix, Override); if (Segment) { Value = _Add(IR::SizeToOpSize(std::max(4, std::max(GetOpSize(Value), GetOpSize(Segment)))), Value, Segment); @@ -4650,10 +4508,9 @@ OrderedNode *OpDispatchBuilder::AppendSegmentOffset(OrderedNode *Value, uint32_t } -void OpDispatchBuilder::CheckLegacySegmentRead(OrderedNode *NewNode, uint32_t SegmentReg) { +void OpDispatchBuilder::CheckLegacySegmentRead(OrderedNode* NewNode, uint32_t SegmentReg) { #ifndef FEX_DISABLE_TELEMETRY - if (SegmentReg == FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX || - SegmentReg == FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX) { + if (SegmentReg == FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX || SegmentReg == FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX) { // FS and GS segments aren't considered legacy. return; } @@ -4669,25 +4526,25 @@ void OpDispatchBuilder::CheckLegacySegmentRead(OrderedNode *NewNode, uint32_t Se return; } - FEXCore::Telemetry::TelemetryType TelemIndex{}; + FEXCore::Telemetry::TelemetryType TelemIndex {}; switch (SegmentReg) { - case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX: - TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_ES; - SegmentsNeedReadCheck &= ~FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX; - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX: - TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_CS; - SegmentsNeedReadCheck &= ~FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX; - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX: - TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_SS; - SegmentsNeedReadCheck &= ~FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX; - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX: - TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_DS; - SegmentsNeedReadCheck &= ~FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX; - break; - default: FEX_UNREACHABLE; + case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX: + TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_ES; + SegmentsNeedReadCheck &= ~FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX; + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX: + TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_CS; + SegmentsNeedReadCheck &= ~FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX; + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX: + TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_SS; + SegmentsNeedReadCheck &= ~FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX; + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX: + TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_DS; + SegmentsNeedReadCheck &= ~FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX; + break; + default: FEX_UNREACHABLE; } // Will set the telemetry value if NewNode is != 0 @@ -4695,10 +4552,9 @@ void OpDispatchBuilder::CheckLegacySegmentRead(OrderedNode *NewNode, uint32_t Se #endif } -void OpDispatchBuilder::CheckLegacySegmentWrite(OrderedNode *NewNode, uint32_t SegmentReg) { +void OpDispatchBuilder::CheckLegacySegmentWrite(OrderedNode* NewNode, uint32_t SegmentReg) { #ifndef FEX_DISABLE_TELEMETRY - if (SegmentReg == FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX || - SegmentReg == FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX) { + if (SegmentReg == FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX || SegmentReg == FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX) { // FS and GS segments aren't considered legacy. return; } @@ -4708,25 +4564,25 @@ void OpDispatchBuilder::CheckLegacySegmentWrite(OrderedNode *NewNode, uint32_t S return; } - FEXCore::Telemetry::TelemetryType TelemIndex{}; + FEXCore::Telemetry::TelemetryType TelemIndex {}; switch (SegmentReg) { - case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX: - TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_ES; - SegmentsNeedReadCheck |= FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX; - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX: - TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_CS; - SegmentsNeedReadCheck |= FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX; - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX: - TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_SS; - SegmentsNeedReadCheck |= FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX; - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX: - TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_DS; - SegmentsNeedReadCheck |= FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX; - break; - default: FEX_UNREACHABLE; + case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX: + TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_ES; + SegmentsNeedReadCheck |= FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX; + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX: + TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_CS; + SegmentsNeedReadCheck |= FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX; + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX: + TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_SS; + SegmentsNeedReadCheck |= FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX; + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX: + TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_DS; + SegmentsNeedReadCheck |= FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX; + break; + default: FEX_UNREACHABLE; } // Will set the telemetry value if NewNode is != 0 @@ -4734,48 +4590,45 @@ void OpDispatchBuilder::CheckLegacySegmentWrite(OrderedNode *NewNode, uint32_t S #endif } -void OpDispatchBuilder::UpdatePrefixFromSegment(OrderedNode *Segment, uint32_t SegmentReg) { +void OpDispatchBuilder::UpdatePrefixFromSegment(OrderedNode* Segment, uint32_t SegmentReg) { // Use BFE to extract the selector index in bits [15,3] of the segment register. // In some cases the upper 16-bits of the 32-bit GPR contain garbage to ignore. Segment = _Bfe(OpSize::i32Bit, 16 - 3, 3, Segment); auto NewSegment = _LoadContextIndexed(Segment, 4, offsetof(FEXCore::Core::CPUState, gdt[0]), 4, GPRClass); CheckLegacySegmentWrite(NewSegment, SegmentReg); switch (SegmentReg) { - case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX: - _StoreContext(4, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, es_cached)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX: - _StoreContext(4, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, cs_cached)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX: - _StoreContext(4, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, ss_cached)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX: - _StoreContext(4, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, ds_cached)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX: - _StoreContext(4, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, fs_cached)); - break; - case FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX: - _StoreContext(4, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, gs_cached)); - break; - default: break; // Do nothing + case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX: + _StoreContext(4, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, es_cached)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX: + _StoreContext(4, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, cs_cached)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX: + _StoreContext(4, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, ss_cached)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX: + _StoreContext(4, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, ds_cached)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX: + _StoreContext(4, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, fs_cached)); + break; + case FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX: + _StoreContext(4, GPRClass, NewSegment, offsetof(FEXCore::Core::CPUState, gs_cached)); + break; + default: break; // Do nothing } } -OrderedNode *OpDispatchBuilder::LoadSource_WithOpSize(RegisterClassType Class, X86Tables::DecodedOp const& Op, X86Tables::DecodedOperand const& Operand, - uint8_t OpSize, uint32_t Flags, const LoadSourceOptions& Options) { - LOGMAN_THROW_A_FMT(Operand.IsGPR() || - Operand.IsLiteral() || - Operand.IsGPRDirect() || - Operand.IsGPRIndirect() || - Operand.IsRIPRelative() || - Operand.IsSIB(), - "Unsupported Src type"); +OrderedNode* OpDispatchBuilder::LoadSource_WithOpSize(RegisterClassType Class, const X86Tables::DecodedOp& Op, + const X86Tables::DecodedOperand& Operand, uint8_t OpSize, uint32_t Flags, + const LoadSourceOptions& Options) { + LOGMAN_THROW_A_FMT( + Operand.IsGPR() || Operand.IsLiteral() || Operand.IsGPRDirect() || Operand.IsGPRIndirect() || Operand.IsRIPRelative() || Operand.IsSIB(), + "Unsupported Src type"); auto [Align, LoadData, ForceLoad, AccessType, AllowUpperGarbage] = Options; - OrderedNode *Src {nullptr}; + OrderedNode* Src {nullptr}; bool LoadableType = false; const uint8_t GPRSize = CTX->GetGPRSize(); const uint32_t AddrSize = (Op->Flags & X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) != 0 ? (GPRSize >> 1) : GPRSize; @@ -4789,15 +4642,13 @@ OrderedNode *OpDispatchBuilder::LoadSource_WithOpSize(RegisterClassType Class, X constant = constant & ((1ULL << width) - 1); } Src = _Constant(width, constant); - } - else if (Operand.IsGPR()) { + } else if (Operand.IsGPR()) { const auto gpr = Operand.Data.GPR.GPR; const auto highIndex = Operand.Data.GPR.HighBits ? 1 : 0; if (gpr >= FEXCore::X86State::REG_MM_0) { Src = _LoadContext(OpSize, FPRClass, offsetof(FEXCore::Core::CPUState, mm[gpr - FEXCore::X86State::REG_MM_0])); - } - else if (gpr >= FEXCore::X86State::REG_XMM_0) { + } else if (gpr >= FEXCore::X86State::REG_XMM_0) { const auto gprIndex = gpr - X86State::REG_XMM_0; // Load the full register size if it is a XMM register source. @@ -4809,20 +4660,17 @@ OrderedNode *OpDispatchBuilder::LoadSource_WithOpSize(RegisterClassType Class, X if (!AllowUpperGarbage && OpSize < Core::CPUState::XMM_SSE_REG_SIZE) { Src = _VMov(OpSize, Src); } - } - else { + } else { Src = LoadGPRRegister(gpr, OpSize, highIndex ? 8 : 0, AllowUpperGarbage); } - } - else if (Operand.IsGPRDirect()) { + } else if (Operand.IsGPRDirect()) { Src = LoadGPRRegister(Operand.Data.GPR.GPR, GPRSize); LoadableType = true; if (Operand.Data.GPR.GPR == FEXCore::X86State::REG_RSP && AccessType == MemoryAccessType::DEFAULT) { AccessType = MemoryAccessType::NONTSO; } - } - else if (Operand.IsGPRIndirect()) { + } else if (Operand.IsGPRIndirect()) { auto GPR = LoadGPRRegister(Operand.Data.GPRIndirect.GPR, GPRSize); auto Constant = _Constant(GPRSize * 8, Operand.Data.GPRIndirect.Displacement); @@ -4833,28 +4681,24 @@ OrderedNode *OpDispatchBuilder::LoadSource_WithOpSize(RegisterClassType Class, X if (Operand.Data.GPRIndirect.GPR == FEXCore::X86State::REG_RSP && AccessType == MemoryAccessType::DEFAULT) { AccessType = MemoryAccessType::NONTSO; } - } - else if (Operand.IsRIPRelative()) { + } else if (Operand.IsRIPRelative()) { if (CTX->Config.Is64BitMode) { Src = GetRelocatedPC(Op, Operand.Data.RIPLiteral.Value.s); - } - else { + } else { // 32bit this isn't RIP relative but instead absolute Src = _Constant(GPRSize * 8, Operand.Data.RIPLiteral.Value.u); } LoadableType = true; - } - else if (Operand.IsSIB()) { + } else if (Operand.IsSIB()) { const bool IsVSIB = (Op->Flags & X86Tables::DecodeFlags::FLAG_VSIB_BYTE) != 0; - OrderedNode *Tmp{}; + OrderedNode* Tmp {}; if (!IsVSIB && Operand.Data.SIB.Index != FEXCore::X86State::REG_INVALID && Operand.Data.SIB.Base != FEXCore::X86State::REG_INVALID) { auto Base = LoadGPRRegister(Operand.Data.SIB.Base, GPRSize); auto Index = LoadGPRRegister(Operand.Data.SIB.Index, GPRSize); Tmp = _AddShift(IR::SizeToOpSize(GPRSize), Base, Index, ShiftType::LSL, FEXCore::ilog2(Operand.Data.SIB.Scale)); - } - else { + } else { // NOTE: VSIB cannot have the index * scale portion calculated ahead of time, // since the index in this case is a vector. So, we can't just apply the scale // to it, since this needs to be applied to each element in the index register @@ -4881,8 +4725,7 @@ OrderedNode *OpDispatchBuilder::LoadSource_WithOpSize(RegisterClassType Class, X if (Tmp != nullptr) { Tmp = _Add(IR::SizeToOpSize(GPRSize), Tmp, GPR); - } - else { + } else { Tmp = GPR; } @@ -4895,23 +4738,19 @@ OrderedNode *OpDispatchBuilder::LoadSource_WithOpSize(RegisterClassType Class, X if (Operand.Data.SIB.Offset) { if (Tmp != nullptr) { Src = _Add(IR::SizeToOpSize(GPRSize), Tmp, _Constant(GPRSize * 8, Operand.Data.SIB.Offset)); - } - else { + } else { Src = _Constant(GPRSize * 8, Operand.Data.SIB.Offset); } - } - else { + } else { if (Tmp != nullptr) { Src = Tmp; - } - else { + } else { Src = _Constant(GPRSize * 8, 0); } } LoadableType = true; - } - else { + } else { LOGMAN_MSG_A_FMT("Unknown Src Type: {}\n", Operand.Type); } @@ -4928,54 +4767,53 @@ OrderedNode *OpDispatchBuilder::LoadSource_WithOpSize(RegisterClassType Class, X if (AccessType == MemoryAccessType::NONTSO || AccessType == MemoryAccessType::STREAM) { Src = _LoadMem(Class, OpSize, Src, Align == -1 ? OpSize : Align); - } - else { + } else { Src = _LoadMemAutoTSO(Class, OpSize, Src, Align == -1 ? OpSize : Align); } } return Src; } -OrderedNode *OpDispatchBuilder::GetRelocatedPC(FEXCore::X86Tables::DecodedOp const& Op, int64_t Offset) { +OrderedNode* OpDispatchBuilder::GetRelocatedPC(const FEXCore::X86Tables::DecodedOp& Op, int64_t Offset) { const uint8_t GPRSize = CTX->GetGPRSize(); return _EntrypointOffset(IR::SizeToOpSize(GPRSize), Op->PC + Op->InstSize + Offset - Entry); } -OrderedNode *OpDispatchBuilder::LoadGPRRegister(uint32_t GPR, int8_t Size, uint8_t Offset, bool AllowUpperGarbage) { +OrderedNode* OpDispatchBuilder::LoadGPRRegister(uint32_t GPR, int8_t Size, uint8_t Offset, bool AllowUpperGarbage) { const uint8_t GPRSize = CTX->GetGPRSize(); if (Size == -1) { Size = GPRSize; } - OrderedNode *Reg = _LoadRegister(false, offsetof(FEXCore::Core::CPUState, gregs[GPR]), GPRClass, GPRFixedClass, GPRSize); + OrderedNode* Reg = _LoadRegister(false, offsetof(FEXCore::Core::CPUState, gregs[GPR]), GPRClass, GPRFixedClass, GPRSize); if ((!AllowUpperGarbage && (Size != GPRSize)) || Offset != 0) { // Extract the subregister if requested. const auto OpSize = IR::SizeToOpSize(std::max(4u, Size)); - if (AllowUpperGarbage) + if (AllowUpperGarbage) { Reg = _Lshr(OpSize, Reg, _Constant(Offset)); - else + } else { Reg = _Bfe(OpSize, Size * 8, Offset, Reg); + } } return Reg; } -OrderedNode *OpDispatchBuilder::LoadXMMRegister(uint32_t XMM) { +OrderedNode* OpDispatchBuilder::LoadXMMRegister(uint32_t XMM) { const auto VectorSize = CTX->HostFeatures.SupportsAVX ? 32 : 16; - const auto VectorOffset = CTX->HostFeatures.SupportsAVX ? - offsetof(Core::CPUState, xmm.avx.data[XMM][0]) : - offsetof(Core::CPUState, xmm.sse.data[XMM][0]); + const auto VectorOffset = + CTX->HostFeatures.SupportsAVX ? offsetof(Core::CPUState, xmm.avx.data[XMM][0]) : offsetof(Core::CPUState, xmm.sse.data[XMM][0]); - OrderedNode *Reg = _LoadRegister(false, VectorOffset, FPRClass, FPRFixedClass, VectorSize); + OrderedNode* Reg = _LoadRegister(false, VectorOffset, FPRClass, FPRFixedClass, VectorSize); return Reg; } -void OpDispatchBuilder::StoreGPRRegister(uint32_t GPR, OrderedNode *const Src, int8_t Size, uint8_t Offset) { +void OpDispatchBuilder::StoreGPRRegister(uint32_t GPR, OrderedNode* const Src, int8_t Size, uint8_t Offset) { const uint8_t GPRSize = CTX->GetGPRSize(); if (Size == -1) { Size = GPRSize; } - OrderedNode *Reg = Src; + OrderedNode* Reg = Src; if (Size != GPRSize || Offset != 0) { // Need to do an insert if not automatic size or zero offset. Reg = LoadGPRRegister(GPR); @@ -4985,33 +4823,30 @@ void OpDispatchBuilder::StoreGPRRegister(uint32_t GPR, OrderedNode *const Src, i _StoreRegister(Reg, false, offsetof(FEXCore::Core::CPUState, gregs[GPR]), GPRClass, GPRFixedClass, GPRSize); } -void OpDispatchBuilder::StoreXMMRegister(uint32_t XMM, OrderedNode *const Src) { +void OpDispatchBuilder::StoreXMMRegister(uint32_t XMM, OrderedNode* const Src) { const auto VectorSize = CTX->HostFeatures.SupportsAVX ? 32 : 16; - const auto VectorOffset = CTX->HostFeatures.SupportsAVX ? - offsetof(Core::CPUState, xmm.avx.data[XMM][0]) : - offsetof(Core::CPUState, xmm.sse.data[XMM][0]); + const auto VectorOffset = + CTX->HostFeatures.SupportsAVX ? offsetof(Core::CPUState, xmm.avx.data[XMM][0]) : offsetof(Core::CPUState, xmm.sse.data[XMM][0]); _StoreRegister(Src, false, VectorOffset, FPRClass, FPRFixedClass, VectorSize); } -OrderedNode *OpDispatchBuilder::LoadSource(RegisterClassType Class, X86Tables::DecodedOp const& Op, X86Tables::DecodedOperand const& Operand, - uint32_t Flags, const LoadSourceOptions& Options) { +OrderedNode* OpDispatchBuilder::LoadSource(RegisterClassType Class, const X86Tables::DecodedOp& Op, + const X86Tables::DecodedOperand& Operand, uint32_t Flags, const LoadSourceOptions& Options) { const uint8_t OpSize = GetSrcSize(Op); return LoadSource_WithOpSize(Class, Op, Operand, OpSize, Flags, Options); } -void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Class, FEXCore::X86Tables::DecodedOp Op, FEXCore::X86Tables::DecodedOperand const& Operand, OrderedNode *const Src, uint8_t OpSize, int8_t Align, MemoryAccessType AccessType) { - LOGMAN_THROW_A_FMT(Operand.IsGPR() || - Operand.IsLiteral() || - Operand.IsGPRDirect() || - Operand.IsGPRIndirect() || - Operand.IsRIPRelative() || - Operand.IsSIB(), - "Unsupported Dest type"); +void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Class, FEXCore::X86Tables::DecodedOp Op, + const FEXCore::X86Tables::DecodedOperand& Operand, OrderedNode* const Src, uint8_t OpSize, + int8_t Align, MemoryAccessType AccessType) { + LOGMAN_THROW_A_FMT( + Operand.IsGPR() || Operand.IsLiteral() || Operand.IsGPRDirect() || Operand.IsGPRIndirect() || Operand.IsRIPRelative() || Operand.IsSIB(), + "Unsupported Dest type"); // 8Bit and 16bit destination types store their result without effecting the upper bits // 32bit ops ZEXT the result to 64bit - OrderedNode *MemStoreDst {nullptr}; + OrderedNode* MemStoreDst {nullptr}; bool MemStore = false; const uint8_t GPRSize = CTX->GetGPRSize(); const uint32_t AddrSize = (Op->Flags & X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) != 0 ? (GPRSize >> 1) : GPRSize; @@ -5019,13 +4854,11 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl if (Operand.IsLiteral()) { MemStoreDst = _Constant(Operand.Data.Literal.Size * 8, Operand.Data.Literal.Value); MemStore = true; // Literals are ONLY hardcoded memory destinations - } - else if (Operand.IsGPR()) { + } else if (Operand.IsGPR()) { const auto gpr = Operand.Data.GPR.GPR; if (gpr >= FEXCore::X86State::REG_MM_0) { _StoreContext(OpSize, Class, Src, offsetof(FEXCore::Core::CPUState, mm[gpr - FEXCore::X86State::REG_MM_0])); - } - else if (gpr >= FEXCore::X86State::REG_XMM_0) { + } else if (gpr >= FEXCore::X86State::REG_XMM_0) { const auto gprIndex = gpr - X86State::REG_XMM_0; const auto VectorSize = CTX->HostFeatures.SupportsAVX ? 32 : 16; @@ -5034,8 +4867,7 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl // Partial writes can come from FPRs. // TODO: Fix the instructions doing partial writes rather than dealing with it here. - LOGMAN_THROW_A_FMT(Class != IR::GPRClass, "Partial writes from GPR not allowed. Instruction: {}", - Op->TableInfo->Name); + LOGMAN_THROW_A_FMT(Class != IR::GPRClass, "Partial writes from GPR not allowed. Instruction: {}", Op->TableInfo->Name); // XMM-size is handled in implementations. if (VectorSize != Core::CPUState::XMM_AVX_REG_SIZE || OpSize != Core::CPUState::XMM_SSE_REG_SIZE) { @@ -5045,17 +4877,15 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl } StoreXMMRegister(gprIndex, Result); - } - else { + } else { if (GPRSize == 8 && OpSize == 4) { // If the Source IR op is 64 bits, we need to zext the upper bits // For all other sizes, the upper bits are guaranteed to already be zero - OrderedNode *Value = GetOpSize(Src) == 8 ? _Bfe(OpSize::i32Bit, 32, 0, Src) : Src; + OrderedNode* Value = GetOpSize(Src) == 8 ? _Bfe(OpSize::i32Bit, 32, 0, Src) : Src; StoreGPRRegister(gpr, Value, GPRSize); LOGMAN_THROW_AA_FMT(!Operand.Data.GPR.HighBits, "Can't handle 32bit store to high 8bit register"); - } - else { + } else { LOGMAN_THROW_AA_FMT(!(GPRSize == 4 && OpSize > 4), "Oops had a {} GPR load", OpSize); if (GPRSize != OpSize) { @@ -5065,22 +4895,19 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl // mov ah, 2 ; Move in to upper 8-bits of 16-bit reg. // mov ax, 2 ; Move in to lower 16-bits of reg. StoreGPRRegister(gpr, Src, OpSize, Operand.Data.GPR.HighBits * 8); - } - else { + } else { StoreGPRRegister(gpr, Src, std::min(GPRSize, OpSize)); } } } - } - else if (Operand.IsGPRDirect()) { + } else if (Operand.IsGPRDirect()) { MemStoreDst = LoadGPRRegister(Operand.Data.GPR.GPR, GPRSize); MemStore = true; if (Operand.Data.GPR.GPR == FEXCore::X86State::REG_RSP && AccessType == MemoryAccessType::DEFAULT) { AccessType = MemoryAccessType::NONTSO; } - } - else if (Operand.IsGPRIndirect()) { + } else if (Operand.IsGPRIndirect()) { auto GPR = LoadGPRRegister(Operand.Data.GPRIndirect.GPR, GPRSize); auto Constant = _Constant(GPRSize * 8, Operand.Data.GPRIndirect.Displacement); @@ -5089,26 +4916,22 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl if (Operand.Data.GPRIndirect.GPR == FEXCore::X86State::REG_RSP && AccessType == MemoryAccessType::DEFAULT) { AccessType = MemoryAccessType::NONTSO; } - } - else if (Operand.IsRIPRelative()) { + } else if (Operand.IsRIPRelative()) { if (CTX->Config.Is64BitMode) { MemStoreDst = GetRelocatedPC(Op, Operand.Data.RIPLiteral.Value.s); - } - else { + } else { // 32bit this isn't RIP relative but instead absolute MemStoreDst = _Constant(GPRSize * 8, Operand.Data.RIPLiteral.Value.u); } MemStore = true; - } - else if (Operand.IsSIB()) { - OrderedNode *Tmp {}; + } else if (Operand.IsSIB()) { + OrderedNode* Tmp {}; if (Operand.Data.SIB.Index != FEXCore::X86State::REG_INVALID && Operand.Data.SIB.Base != FEXCore::X86State::REG_INVALID) { auto Base = LoadGPRRegister(Operand.Data.SIB.Base, GPRSize); auto Index = LoadGPRRegister(Operand.Data.SIB.Index, GPRSize); Tmp = _AddShift(IR::SizeToOpSize(GPRSize), Base, Index, ShiftType::LSL, FEXCore::ilog2(Operand.Data.SIB.Scale)); - } - else { + } else { if (Operand.Data.SIB.Index != FEXCore::X86State::REG_INVALID) { Tmp = LoadGPRRegister(Operand.Data.SIB.Index, GPRSize); @@ -5123,8 +4946,7 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl if (Tmp != nullptr) { Tmp = _Add(IR::SizeToOpSize(GPRSize), Tmp, GPR); - } - else { + } else { Tmp = GPR; } } @@ -5133,16 +4955,13 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl if (Operand.Data.SIB.Offset) { if (Tmp != nullptr) { MemStoreDst = _Add(IR::SizeToOpSize(GPRSize), Tmp, _Constant(GPRSize * 8, Operand.Data.SIB.Offset)); - } - else { + } else { MemStoreDst = _Constant(GPRSize * 8, Operand.Data.SIB.Offset); } - } - else { + } else { if (Tmp != nullptr) { MemStoreDst = Tmp; - } - else { + } else { MemStoreDst = _Constant(GPRSize * 8, 0); } } @@ -5168,32 +4987,33 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl } else { if (AccessType == MemoryAccessType::NONTSO || AccessType == MemoryAccessType::STREAM) { _StoreMem(Class, OpSize, MemStoreDst, Src, Align == -1 ? OpSize : Align); - } - else { + } else { _StoreMemAutoTSO(Class, OpSize, MemStoreDst, Src, Align == -1 ? OpSize : Align); } } } } -void OpDispatchBuilder::StoreResult(FEXCore::IR::RegisterClassType Class, FEXCore::X86Tables::DecodedOp Op, FEXCore::X86Tables::DecodedOperand const& Operand, OrderedNode *const Src, int8_t Align, MemoryAccessType AccessType) { +void OpDispatchBuilder::StoreResult(FEXCore::IR::RegisterClassType Class, FEXCore::X86Tables::DecodedOp Op, + const FEXCore::X86Tables::DecodedOperand& Operand, OrderedNode* const Src, int8_t Align, + MemoryAccessType AccessType) { StoreResult_WithOpSize(Class, Op, Operand, Src, GetDstSize(Op), Align, AccessType); } -void OpDispatchBuilder::StoreResult(FEXCore::IR::RegisterClassType Class, FEXCore::X86Tables::DecodedOp Op, OrderedNode *const Src, int8_t Align, MemoryAccessType AccessType) { +void OpDispatchBuilder::StoreResult(FEXCore::IR::RegisterClassType Class, FEXCore::X86Tables::DecodedOp Op, OrderedNode* const Src, + int8_t Align, MemoryAccessType AccessType) { StoreResult(Class, Op, Op->Dest, Src, Align, AccessType); } -OpDispatchBuilder::OpDispatchBuilder(FEXCore::Context::ContextImpl *ctx) +OpDispatchBuilder::OpDispatchBuilder(FEXCore::Context::ContextImpl* ctx) : IREmitter {ctx->OpDispatcherAllocator} , CTX {ctx} { ResetWorkingList(); InstallHostSpecificOpcodeHandlers(); } -OpDispatchBuilder::OpDispatchBuilder(FEXCore::Utils::IntrusivePooledAllocator &Allocator) +OpDispatchBuilder::OpDispatchBuilder(FEXCore::Utils::IntrusivePooledAllocator& Allocator) : IREmitter {Allocator} - , CTX {nullptr} { -} + , CTX {nullptr} {} void OpDispatchBuilder::ResetWorkingList() { IREmitter::ResetWorkingList(); @@ -5210,12 +5030,12 @@ void OpDispatchBuilder::UnhandledOp(OpcodeArgs) { template void OpDispatchBuilder::MOVGPROp(OpcodeArgs) { - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags, {.Align = 1}); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags, {.Align = 1}); StoreResult(GPRClass, Op, Src, 1); } void OpDispatchBuilder::MOVGPRNTOp(OpcodeArgs) { - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1}); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1}); StoreResult(GPRClass, Op, Src, 1, MemoryAccessType::STREAM); } @@ -5223,20 +5043,21 @@ void OpDispatchBuilder::ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCor auto Size = GetDstSize(Op); auto RoundedSize = Size; - if (ALUIROp != FEXCore::IR::IROps::OP_ANDWITHFLAGS) + if (ALUIROp != FEXCore::IR::IROps::OP_ANDWITHFLAGS) { RoundedSize = std::max(4u, RoundedSize); + } const auto OpSize = IR::SizeToOpSize(RoundedSize); // X86 basic ALU ops just do the operation between the destination and a single source - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); - OrderedNode *Result{}; - OrderedNode *Dest{}; + OrderedNode* Result {}; + OrderedNode* Dest {}; if (DestIsLockedMem(Op)) { HandledLock = true; - OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); DestMem = AppendSegmentOffset(DestMem, Op->Flags); DeriveOp(FetchOp, AtomicFetchOp, _AtomicFetchAdd(IR::SizeToOpSize(Size), Src, DestMem)); @@ -5244,8 +5065,7 @@ void OpDispatchBuilder::ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCor DeriveOp(ALUOp, ALUIROp, _AndWithFlags(OpSize, Dest, Src)); Result = ALUOp; - } - else { + } else { Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); /* On x86, the canonical way to zero a register is XOR with itself... @@ -5254,14 +5074,12 @@ void OpDispatchBuilder::ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCor * arm64, "mov x0, #0" is faster than "eor x0, x0, x0". Additionally this * lets more constant folding kick in for flags. */ - if (ALUIROp == FEXCore::IR::IROps::OP_XOR && - Op->Dest.IsGPR() && Op->Src[0].IsGPR() && - Op->Dest.Data.GPR == Op->Src[0].Data.GPR) { + if (ALUIROp == FEXCore::IR::IROps::OP_XOR && Op->Dest.IsGPR() && Op->Src[0].IsGPR() && Op->Dest.Data.GPR == Op->Src[0].Data.GPR) { - Result = _Constant(0); + Result = _Constant(0); } else { - DeriveOp(ALUOp, ALUIROp, _AndWithFlags(OpSize, Dest, Src)); - Result = ALUOp; + DeriveOp(ALUOp, ALUIROp, _AndWithFlags(OpSize, Dest, Src)); + Result = ALUOp; } StoreResult(GPRClass, Op, Result, -1); @@ -5270,22 +5088,18 @@ void OpDispatchBuilder::ALUOpImpl(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCor // Flags set { switch (ALUIROp) { - case FEXCore::IR::IROps::OP_ADD: - GenerateFlags_ADD(Op, Result, Dest, Src); - break; - case FEXCore::IR::IROps::OP_SUB: - GenerateFlags_SUB(Op, Result, Dest, Src); - break; + case FEXCore::IR::IROps::OP_ADD: GenerateFlags_ADD(Op, Result, Dest, Src); break; + case FEXCore::IR::IROps::OP_SUB: GenerateFlags_SUB(Op, Result, Dest, Src); break; case FEXCore::IR::IROps::OP_XOR: case FEXCore::IR::IROps::OP_OR: { GenerateFlags_Logical(Op, Result, Dest, Src); - break; + break; } case FEXCore::IR::IROps::OP_ANDWITHFLAGS: { HandleNZ00Write(); CalculatePF(Result); _InvalidateFlags(1 << X86State::RFLAG_AF_RAW_LOC); - break; + break; } default: break; } @@ -5328,34 +5142,34 @@ void OpDispatchBuilder::INTOp(OpcodeArgs) { Reason.Signal = Core::FAULT_SIGSEGV; Reason.TrapNumber = X86State::X86_TRAPNO_OF; Reason.si_code = 0x80; - break; + break; case 0xF1: // INT1 Reason.ErrorRegister = 0; Reason.Signal = Core::FAULT_SIGTRAP; Reason.TrapNumber = X86State::X86_TRAPNO_DB; Reason.si_code = 1; SetRIPToNext = true; - break; + break; case 0xF4: { // HLT Reason.ErrorRegister = 0; Reason.Signal = Core::FAULT_SIGSEGV; Reason.TrapNumber = X86State::X86_TRAPNO_GP; Reason.si_code = 0x80; - break; + break; } case 0x0B: // UD2 Reason.ErrorRegister = 0; Reason.Signal = Core::FAULT_SIGILL; Reason.TrapNumber = X86State::X86_TRAPNO_UD; Reason.si_code = 2; - break; + break; case 0xCC: // INT3 Reason.ErrorRegister = 0; Reason.Signal = Core::FAULT_SIGTRAP; Reason.TrapNumber = X86State::X86_TRAPNO_BP; Reason.si_code = 0x80; SetRIPToNext = true; - break; + break; } // Calculate flags early. @@ -5369,8 +5183,7 @@ void OpDispatchBuilder::INTOp(OpcodeArgs) { // We want to set RIP to the next instruction after INT3/INT1 auto NewRIP = GetRelocatedPC(Op); _StoreContext(GPRSize, GPRClass, NewRIP, offsetof(FEXCore::Core::CPUState, rip)); - } - else if (Op->OP != 0xCE) { + } else if (Op->OP != 0xCE) { auto NewRIP = GetRelocatedPC(Op, -Op->InstSize); _StoreContext(GPRSize, GPRClass, NewRIP, offsetof(FEXCore::Core::CPUState, rip)); } @@ -5395,8 +5208,7 @@ void OpDispatchBuilder::INTOp(OpcodeArgs) { SetTrueJumpTarget(CondJump_, JumpTarget); SetCurrentCodeBlock(JumpTarget); StartNewBlock(); - } - else { + } else { BlockSetRIP = true; _Break(Reason); } @@ -5404,7 +5216,7 @@ void OpDispatchBuilder::INTOp(OpcodeArgs) { void OpDispatchBuilder::TZCNT(OpcodeArgs) { // _FindTrailingZeroes ignores upper garbage so we don't need to mask - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); Src = _FindTrailingZeroes(OpSizeFromSrc(Op), Src); StoreResult(GPRClass, Op, Src, -1); @@ -5414,7 +5226,7 @@ void OpDispatchBuilder::TZCNT(OpcodeArgs) { void OpDispatchBuilder::LZCNT(OpcodeArgs) { // _CountLeadingZeroes clears upper garbage so we don't need to mask - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); auto Res = _CountLeadingZeroes(OpSizeFromSrc(Op), Src); StoreResult(GPRClass, Op, Res, -1); @@ -5425,31 +5237,30 @@ void OpDispatchBuilder::MOVBEOp(OpcodeArgs) { const uint8_t GPRSize = CTX->GetGPRSize(); const auto SrcSize = GetSrcSize(Op); - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1}); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1}); Src = _Rev(IR::SizeToOpSize(std::max(4u, SrcSize)), Src); if (SrcSize == 2) { // 16-bit does an insert. // Rev of 16-bit value as 32-bit replaces the result in the upper 16-bits of the result. // bfxil the 16-bit result in to the GPR. - OrderedNode *Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, GPRSize, Op->Flags); + OrderedNode* Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, GPRSize, Op->Flags); auto Result = _Bfxil(IR::SizeToOpSize(GPRSize), 16, 16, Dest, Src); StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Result, GPRSize, -1); - } - else { + } else { // 32-bit does regular zext StoreResult(GPRClass, Op, Op->Dest, Src, -1); } } void OpDispatchBuilder::CLWB(OpcodeArgs) { - OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); DestMem = AppendSegmentOffset(DestMem, Op->Flags); _CacheLineClean(DestMem); } void OpDispatchBuilder::CLFLUSHOPT(OpcodeArgs) { - OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); DestMem = AppendSegmentOffset(DestMem, Op->Flags); _CacheLineClear(DestMem, false); } @@ -5467,8 +5278,7 @@ void OpDispatchBuilder::MemFenceOrXSAVEOPT(OpcodeArgs) { if (Op->ModRM == 0xF0) { // 0xF0 is MFENCE _Fence(FEXCore::IR::Fence_LoadStore); - } - else { + } else { LogMan::Msg::EFmt("Application tried using XSAVEOPT"); UnimplementedOp(Op); } @@ -5478,17 +5288,16 @@ void OpDispatchBuilder::StoreFenceOrCLFlush(OpcodeArgs) { if (Op->ModRM == 0xF8) { // 0xF8 is SFENCE _Fence({FEXCore::IR::Fence_Store}); - } - else { + } else { // This is a CLFlush - OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* DestMem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); DestMem = AppendSegmentOffset(DestMem, Op->Flags); _CacheLineClear(DestMem, true); } } void OpDispatchBuilder::CLZeroOp(OpcodeArgs) { - OrderedNode *DestMem = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.LoadData = false}); + OrderedNode* DestMem = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.LoadData = false}); _CacheLineZero(DestMem); } @@ -5515,14 +5324,13 @@ void OpDispatchBuilder::CRC32(OpcodeArgs) { // Destination GPR size is always 4 or 8 bytes depending on widening uint8_t DstSize = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REX_WIDENING ? 8 : 4; - OrderedNode *Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, GPRSize, Op->Flags); + OrderedNode* Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, GPRSize, Op->Flags); // Incoming memory is 8, 16, 32, or 64 - OrderedNode *Src{}; + OrderedNode* Src {}; if (Op->Src[0].IsGPR()) { Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], GPRSize, Op->Flags); - } - else { + } else { Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1}); } auto Result = _CRC32(Dest, Src, GetSrcSize(Op)); @@ -5533,8 +5341,8 @@ template void OpDispatchBuilder::RDRANDOp(OpcodeArgs) { auto Res = _RDRAND(Reseed); - OrderedNode *Result_Lower = _ExtractElementPair(OpSize::i64Bit, Res, 0); - OrderedNode *Result_Upper = _ExtractElementPair(OpSize::i64Bit, Res, 1); + OrderedNode* Result_Lower = _ExtractElementPair(OpSize::i64Bit, Res, 0); + OrderedNode* Result_Upper = _ExtractElementPair(OpSize::i64Bit, Res, 1); StoreResult(GPRClass, Op, Result_Lower, -1); GenerateFlags_RDRAND(Op, Result_Upper); @@ -5594,23 +5402,18 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { } #define OPD(prefix, opcode) (((prefix) << 8) | opcode) constexpr uint16_t PF_38_NONE = 0; - constexpr uint16_t PF_38_66 = (1U << 0); - constexpr uint16_t PF_38_F2 = (1U << 1); + constexpr uint16_t PF_38_66 = (1U << 0); + constexpr uint16_t PF_38_F2 = (1U << 1); constexpr std::tuple H0F38_SHA[] = { - {OPD(PF_38_NONE, 0xC8), 1, &OpDispatchBuilder::SHA1NEXTEOp}, - {OPD(PF_38_NONE, 0xC9), 1, &OpDispatchBuilder::SHA1MSG1Op}, - {OPD(PF_38_NONE, 0xCA), 1, &OpDispatchBuilder::SHA1MSG2Op}, - {OPD(PF_38_NONE, 0xCB), 1, &OpDispatchBuilder::SHA256RNDS2Op}, - {OPD(PF_38_NONE, 0xCC), 1, &OpDispatchBuilder::SHA256MSG1Op}, - {OPD(PF_38_NONE, 0xCD), 1, &OpDispatchBuilder::SHA256MSG2Op}, + {OPD(PF_38_NONE, 0xC8), 1, &OpDispatchBuilder::SHA1NEXTEOp}, {OPD(PF_38_NONE, 0xC9), 1, &OpDispatchBuilder::SHA1MSG1Op}, + {OPD(PF_38_NONE, 0xCA), 1, &OpDispatchBuilder::SHA1MSG2Op}, {OPD(PF_38_NONE, 0xCB), 1, &OpDispatchBuilder::SHA256RNDS2Op}, + {OPD(PF_38_NONE, 0xCC), 1, &OpDispatchBuilder::SHA256MSG1Op}, {OPD(PF_38_NONE, 0xCD), 1, &OpDispatchBuilder::SHA256MSG2Op}, }; constexpr std::tuple H0F38_AES[] = { - {OPD(PF_38_66, 0xDB), 1, &OpDispatchBuilder::AESImcOp}, - {OPD(PF_38_66, 0xDC), 1, &OpDispatchBuilder::AESEncOp}, - {OPD(PF_38_66, 0xDD), 1, &OpDispatchBuilder::AESEncLastOp}, - {OPD(PF_38_66, 0xDE), 1, &OpDispatchBuilder::AESDecOp}, + {OPD(PF_38_66, 0xDB), 1, &OpDispatchBuilder::AESImcOp}, {OPD(PF_38_66, 0xDC), 1, &OpDispatchBuilder::AESEncOp}, + {OPD(PF_38_66, 0xDD), 1, &OpDispatchBuilder::AESEncLastOp}, {OPD(PF_38_66, 0xDE), 1, &OpDispatchBuilder::AESDecOp}, {OPD(PF_38_66, 0xDF), 1, &OpDispatchBuilder::AESDecLastOp}, }; constexpr std::tuple H0F38_CRC[] = { @@ -5624,12 +5427,12 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { #define OPD(REX, prefix, opcode) ((REX << 9) | (prefix << 8) | opcode) #define PF_3A_NONE 0 -#define PF_3A_66 1 +#define PF_3A_66 1 constexpr std::tuple H0F3A_AES[] = { - {OPD(0, PF_3A_66, 0xDF), 1, &OpDispatchBuilder::AESKeyGenAssist}, + {OPD(0, PF_3A_66, 0xDF), 1, &OpDispatchBuilder::AESKeyGenAssist}, }; constexpr std::tuple H0F3A_PCLMUL[] = { - {OPD(0, PF_3A_66, 0x44), 1, &OpDispatchBuilder::PCLMULQDQOp}, + {OPD(0, PF_3A_66, 0x44), 1, &OpDispatchBuilder::PCLMULQDQOp}, }; #undef PF_3A_NONE @@ -6208,12 +6011,12 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0xAC, 1, &OpDispatchBuilder::SHRDImmediateOp}, {0xAD, 1, &OpDispatchBuilder::SHRDOp}, {0xAF, 1, &OpDispatchBuilder::IMUL1SrcOp}, - {0xB0, 2, &OpDispatchBuilder::CMPXCHGOp}, // CMPXCHG + {0xB0, 2, &OpDispatchBuilder::CMPXCHGOp}, // CMPXCHG {0xB3, 1, &OpDispatchBuilder::BTOp<0, BTAction::BTClear>}, // BTR {0xB6, 2, &OpDispatchBuilder::MOVZXOp}, {0xBB, 1, &OpDispatchBuilder::BTOp<0, BTAction::BTComplement>}, // BTC - {0xBC, 1, &OpDispatchBuilder::BSFOp}, // BSF - {0xBD, 1, &OpDispatchBuilder::BSROp}, // BSF + {0xBC, 1, &OpDispatchBuilder::BSFOp}, // BSF + {0xBD, 1, &OpDispatchBuilder::BSROp}, // BSF {0xBE, 2, &OpDispatchBuilder::MOVSXOp}, {0xC0, 2, &OpDispatchBuilder::XADDOp}, {0xC3, 1, &OpDispatchBuilder::MOVGPRNTOp}, @@ -6365,7 +6168,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 3), 1, &OpDispatchBuilder::RCROp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 4), 1, &OpDispatchBuilder::SHLImmediateOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 5), 1, &OpDispatchBuilder::SHRImmediateOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 6), 1, &OpDispatchBuilder::SHLImmediateOp}, // SAL + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 6), 1, &OpDispatchBuilder::SHLImmediateOp}, // SAL {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 7), 1, &OpDispatchBuilder::ASHRImmediateOp}, // SAR {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 0), 1, &OpDispatchBuilder::ROLImmediateOp}, @@ -6374,7 +6177,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 3), 1, &OpDispatchBuilder::RCROp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 4), 1, &OpDispatchBuilder::SHLImmediateOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 5), 1, &OpDispatchBuilder::SHRImmediateOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 6), 1, &OpDispatchBuilder::SHLImmediateOp}, // SAL + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 6), 1, &OpDispatchBuilder::SHLImmediateOp}, // SAL {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 7), 1, &OpDispatchBuilder::ASHRImmediateOp}, // SAR {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 0), 1, &OpDispatchBuilder::ROLOp}, @@ -6382,8 +6185,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 2), 1, &OpDispatchBuilder::RCLOp1Bit}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 3), 1, &OpDispatchBuilder::RCROp8x1Bit}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 4), 1, &OpDispatchBuilder::SHLOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 5), 1, &OpDispatchBuilder::SHROp}, // 1Bit SHR - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 6), 1, &OpDispatchBuilder::SHLOp}, // SAL + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 5), 1, &OpDispatchBuilder::SHROp}, // 1Bit SHR + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 6), 1, &OpDispatchBuilder::SHLOp}, // SAL {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 7), 1, &OpDispatchBuilder::ASHROp}, // SAR {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 0), 1, &OpDispatchBuilder::ROLOp}, @@ -6391,8 +6194,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 2), 1, &OpDispatchBuilder::RCLOp1Bit}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 3), 1, &OpDispatchBuilder::RCROp1Bit}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 4), 1, &OpDispatchBuilder::SHLOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 5), 1, &OpDispatchBuilder::SHROp}, // 1Bit SHR - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 6), 1, &OpDispatchBuilder::SHLOp}, // SAL + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 5), 1, &OpDispatchBuilder::SHROp}, // 1Bit SHR + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 6), 1, &OpDispatchBuilder::SHLOp}, // SAL {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 7), 1, &OpDispatchBuilder::ASHROp}, // SAR {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 0), 1, &OpDispatchBuilder::ROLOp}, @@ -6400,8 +6203,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 2), 1, &OpDispatchBuilder::RCLSmallerOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 3), 1, &OpDispatchBuilder::RCRSmallerOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 4), 1, &OpDispatchBuilder::SHLOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 5), 1, &OpDispatchBuilder::SHROp}, // SHR by CL - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 6), 1, &OpDispatchBuilder::SHLOp}, // SAL + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 5), 1, &OpDispatchBuilder::SHROp}, // SHR by CL + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 6), 1, &OpDispatchBuilder::SHLOp}, // SAL {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 7), 1, &OpDispatchBuilder::ASHROp}, // SAR {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 0), 1, &OpDispatchBuilder::ROLOp}, @@ -6409,8 +6212,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 2), 1, &OpDispatchBuilder::RCLOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 3), 1, &OpDispatchBuilder::RCROp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 4), 1, &OpDispatchBuilder::SHLOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 5), 1, &OpDispatchBuilder::SHROp}, // SHR by CL - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 6), 1, &OpDispatchBuilder::SHLOp}, // SAL + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 5), 1, &OpDispatchBuilder::SHROp}, // SHR by CL + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 6), 1, &OpDispatchBuilder::SHLOp}, // SAL {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 7), 1, &OpDispatchBuilder::ASHROp}, // SAR // GROUP 3 @@ -6420,7 +6223,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 3), 1, &OpDispatchBuilder::NEGOp}, // NEG {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 4), 1, &OpDispatchBuilder::MULOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 5), 1, &OpDispatchBuilder::IMULOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 6), 1, &OpDispatchBuilder::DIVOp}, // DIV + {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 6), 1, &OpDispatchBuilder::DIVOp}, // DIV {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 7), 1, &OpDispatchBuilder::IDIVOp}, // IDIV {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 0), 1, &OpDispatchBuilder::TESTOp<1>}, @@ -6430,7 +6233,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 4), 1, &OpDispatchBuilder::MULOp}, // MUL {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 5), 1, &OpDispatchBuilder::IMULOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 6), 1, &OpDispatchBuilder::DIVOp}, // DIV + {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 6), 1, &OpDispatchBuilder::DIVOp}, // DIV {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 7), 1, &OpDispatchBuilder::IDIVOp}, // IDIV // GROUP 4 @@ -6491,7 +6294,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0x2C, 1, &OpDispatchBuilder::CVTFPR_To_GPR<8, false>}, {0x2D, 1, &OpDispatchBuilder::CVTFPR_To_GPR<8, true>}, {0x51, 1, &OpDispatchBuilder::VectorScalarUnaryInsertALUOp}, - //x52 = Invalid + // x52 = Invalid {0x58, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, {0x59, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, {0x5A, 1, &OpDispatchBuilder::InsertScalar_CVT_Float_To_Float<4, 8>}, @@ -6618,10 +6421,10 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {0xFE, 1, &OpDispatchBuilder::VectorALUOp}, }; -constexpr uint16_t PF_NONE = 0; -constexpr uint16_t PF_F3 = 1; -constexpr uint16_t PF_66 = 2; -constexpr uint16_t PF_F2 = 3; + constexpr uint16_t PF_NONE = 0; + constexpr uint16_t PF_F3 = 1; + constexpr uint16_t PF_66 = 2; + constexpr uint16_t PF_F2 = 3; #define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_6) << 5) | (prefix) << 3 | (Reg)) constexpr std::tuple SecondaryExtensionOpTable[] = { // GROUP 7 @@ -6752,14 +6555,14 @@ constexpr uint16_t PF_F2 = 3; {OPDReg(0xD8, 7) | 0x00, 8, &OpDispatchBuilder::FDIVF64<32, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xC0), 8, &OpDispatchBuilder::FADDF64<80, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xC8), 8, &OpDispatchBuilder::FMULF64<80, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xD0), 8, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPD(0xD8, 0xD8), 8, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPD(0xD8, 0xE0), 8, &OpDispatchBuilder::FSUBF64<80, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xE8), 8, &OpDispatchBuilder::FSUBF64<80, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xF0), 8, &OpDispatchBuilder::FDIVF64<80, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xF8), 8, &OpDispatchBuilder::FDIVF64<80, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xC0), 8, &OpDispatchBuilder::FADDF64<80, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xC8), 8, &OpDispatchBuilder::FMULF64<80, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xD0), 8, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPD(0xD8, 0xD8), 8, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPD(0xD8, 0xE0), 8, &OpDispatchBuilder::FSUBF64<80, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xE8), 8, &OpDispatchBuilder::FSUBF64<80, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xF0), 8, &OpDispatchBuilder::FDIVF64<80, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xF8), 8, &OpDispatchBuilder::FDIVF64<80, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, {OPDReg(0xD9, 0) | 0x00, 8, &OpDispatchBuilder::FLDF64<32>}, @@ -6777,42 +6580,42 @@ constexpr uint16_t PF_F2 = 3; {OPDReg(0xD9, 7) | 0x00, 8, &OpDispatchBuilder::X87FSTCW}, - {OPD(0xD9, 0xC0), 8, &OpDispatchBuilder::FLDF64<80>}, - {OPD(0xD9, 0xC8), 8, &OpDispatchBuilder::FXCH}, - {OPD(0xD9, 0xD0), 1, &OpDispatchBuilder::NOPOp}, // FNOP - // D1 = Invalid - // D8 = Invalid - {OPD(0xD9, 0xE0), 1, &OpDispatchBuilder::FCHSF64}, - {OPD(0xD9, 0xE1), 1, &OpDispatchBuilder::FABSF64}, - // E2 = Invalid - {OPD(0xD9, 0xE4), 1, &OpDispatchBuilder::FTSTF64}, - {OPD(0xD9, 0xE5), 1, &OpDispatchBuilder::X87FXAMF64}, - // E6 = Invalid - {OPD(0xD9, 0xE8), 1, &OpDispatchBuilder::FLDF64_Const<0x3FF0000000000000>}, // 1.0 - {OPD(0xD9, 0xE9), 1, &OpDispatchBuilder::FLDF64_Const<0x400A934F0979A372>}, // log2l(10) - {OPD(0xD9, 0xEA), 1, &OpDispatchBuilder::FLDF64_Const<0x3FF71547652B82FE>}, // log2l(e) - {OPD(0xD9, 0xEB), 1, &OpDispatchBuilder::FLDF64_Const<0x400921FB54442D18>}, // pi - {OPD(0xD9, 0xEC), 1, &OpDispatchBuilder::FLDF64_Const<0x3FD34413509F79FF>}, // log10l(2) - {OPD(0xD9, 0xED), 1, &OpDispatchBuilder::FLDF64_Const<0x3FE62E42FEFA39EF>}, // log(2) - {OPD(0xD9, 0xEE), 1, &OpDispatchBuilder::FLDF64_Const<0>}, // 0.0 - - // EF = Invalid - {OPD(0xD9, 0xF0), 1, &OpDispatchBuilder::X87UnaryOpF64}, - {OPD(0xD9, 0xF1), 1, &OpDispatchBuilder::X87FYL2XF64}, - {OPD(0xD9, 0xF2), 1, &OpDispatchBuilder::X87TANF64}, - {OPD(0xD9, 0xF3), 1, &OpDispatchBuilder::X87ATANF64}, - {OPD(0xD9, 0xF4), 1, &OpDispatchBuilder::FXTRACTF64}, - {OPD(0xD9, 0xF5), 1, &OpDispatchBuilder::X87BinaryOpF64}, - {OPD(0xD9, 0xF6), 1, &OpDispatchBuilder::X87ModifySTP}, - {OPD(0xD9, 0xF7), 1, &OpDispatchBuilder::X87ModifySTP}, - {OPD(0xD9, 0xF8), 1, &OpDispatchBuilder::X87BinaryOpF64}, - {OPD(0xD9, 0xF9), 1, &OpDispatchBuilder::X87FYL2XF64}, - {OPD(0xD9, 0xFA), 1, &OpDispatchBuilder::FSQRTF64}, - {OPD(0xD9, 0xFB), 1, &OpDispatchBuilder::X87SinCosF64}, - {OPD(0xD9, 0xFC), 1, &OpDispatchBuilder::FRNDINTF64}, - {OPD(0xD9, 0xFD), 1, &OpDispatchBuilder::X87BinaryOpF64}, - {OPD(0xD9, 0xFE), 1, &OpDispatchBuilder::X87UnaryOpF64}, - {OPD(0xD9, 0xFF), 1, &OpDispatchBuilder::X87UnaryOpF64}, + {OPD(0xD9, 0xC0), 8, &OpDispatchBuilder::FLDF64<80>}, + {OPD(0xD9, 0xC8), 8, &OpDispatchBuilder::FXCH}, + {OPD(0xD9, 0xD0), 1, &OpDispatchBuilder::NOPOp}, // FNOP + // D1 = Invalid + // D8 = Invalid + {OPD(0xD9, 0xE0), 1, &OpDispatchBuilder::FCHSF64}, + {OPD(0xD9, 0xE1), 1, &OpDispatchBuilder::FABSF64}, + // E2 = Invalid + {OPD(0xD9, 0xE4), 1, &OpDispatchBuilder::FTSTF64}, + {OPD(0xD9, 0xE5), 1, &OpDispatchBuilder::X87FXAMF64}, + // E6 = Invalid + {OPD(0xD9, 0xE8), 1, &OpDispatchBuilder::FLDF64_Const<0x3FF0000000000000>}, // 1.0 + {OPD(0xD9, 0xE9), 1, &OpDispatchBuilder::FLDF64_Const<0x400A934F0979A372>}, // log2l(10) + {OPD(0xD9, 0xEA), 1, &OpDispatchBuilder::FLDF64_Const<0x3FF71547652B82FE>}, // log2l(e) + {OPD(0xD9, 0xEB), 1, &OpDispatchBuilder::FLDF64_Const<0x400921FB54442D18>}, // pi + {OPD(0xD9, 0xEC), 1, &OpDispatchBuilder::FLDF64_Const<0x3FD34413509F79FF>}, // log10l(2) + {OPD(0xD9, 0xED), 1, &OpDispatchBuilder::FLDF64_Const<0x3FE62E42FEFA39EF>}, // log(2) + {OPD(0xD9, 0xEE), 1, &OpDispatchBuilder::FLDF64_Const<0>}, // 0.0 + + // EF = Invalid + {OPD(0xD9, 0xF0), 1, &OpDispatchBuilder::X87UnaryOpF64}, + {OPD(0xD9, 0xF1), 1, &OpDispatchBuilder::X87FYL2XF64}, + {OPD(0xD9, 0xF2), 1, &OpDispatchBuilder::X87TANF64}, + {OPD(0xD9, 0xF3), 1, &OpDispatchBuilder::X87ATANF64}, + {OPD(0xD9, 0xF4), 1, &OpDispatchBuilder::FXTRACTF64}, + {OPD(0xD9, 0xF5), 1, &OpDispatchBuilder::X87BinaryOpF64}, + {OPD(0xD9, 0xF6), 1, &OpDispatchBuilder::X87ModifySTP}, + {OPD(0xD9, 0xF7), 1, &OpDispatchBuilder::X87ModifySTP}, + {OPD(0xD9, 0xF8), 1, &OpDispatchBuilder::X87BinaryOpF64}, + {OPD(0xD9, 0xF9), 1, &OpDispatchBuilder::X87FYL2XF64}, + {OPD(0xD9, 0xFA), 1, &OpDispatchBuilder::FSQRTF64}, + {OPD(0xD9, 0xFB), 1, &OpDispatchBuilder::X87SinCosF64}, + {OPD(0xD9, 0xFC), 1, &OpDispatchBuilder::FRNDINTF64}, + {OPD(0xD9, 0xFD), 1, &OpDispatchBuilder::X87BinaryOpF64}, + {OPD(0xD9, 0xFE), 1, &OpDispatchBuilder::X87UnaryOpF64}, + {OPD(0xD9, 0xFF), 1, &OpDispatchBuilder::X87UnaryOpF64}, {OPDReg(0xDA, 0) | 0x00, 8, &OpDispatchBuilder::FADDF64<32, true, OpDispatchBuilder::OpResult::RES_ST0>}, @@ -6830,16 +6633,16 @@ constexpr uint16_t PF_F2 = 3; {OPDReg(0xDA, 7) | 0x00, 8, &OpDispatchBuilder::FDIVF64<32, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xDA, 0xC0), 8, &OpDispatchBuilder::X87FCMOV}, - {OPD(0xDA, 0xC8), 8, &OpDispatchBuilder::X87FCMOV}, - {OPD(0xDA, 0xD0), 8, &OpDispatchBuilder::X87FCMOV}, - {OPD(0xDA, 0xD8), 8, &OpDispatchBuilder::X87FCMOV}, - // E0 = Invalid - // E8 = Invalid - {OPD(0xDA, 0xE9), 1, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>}, - // EA = Invalid - // F0 = Invalid - // F8 = Invalid + {OPD(0xDA, 0xC0), 8, &OpDispatchBuilder::X87FCMOV}, + {OPD(0xDA, 0xC8), 8, &OpDispatchBuilder::X87FCMOV}, + {OPD(0xDA, 0xD0), 8, &OpDispatchBuilder::X87FCMOV}, + {OPD(0xDA, 0xD8), 8, &OpDispatchBuilder::X87FCMOV}, + // E0 = Invalid + // E8 = Invalid + {OPD(0xDA, 0xE9), 1, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>}, + // EA = Invalid + // F0 = Invalid + // F8 = Invalid {OPDReg(0xDB, 0) | 0x00, 8, &OpDispatchBuilder::FILDF64}, @@ -6858,18 +6661,18 @@ constexpr uint16_t PF_F2 = 3; {OPDReg(0xDB, 7) | 0x00, 8, &OpDispatchBuilder::FSTF64<80>}, - {OPD(0xDB, 0xC0), 8, &OpDispatchBuilder::X87FCMOV}, - {OPD(0xDB, 0xC8), 8, &OpDispatchBuilder::X87FCMOV}, - {OPD(0xDB, 0xD0), 8, &OpDispatchBuilder::X87FCMOV}, - {OPD(0xDB, 0xD8), 8, &OpDispatchBuilder::X87FCMOV}, - // E0 = Invalid - {OPD(0xDB, 0xE2), 1, &OpDispatchBuilder::NOPOp}, // FNCLEX - {OPD(0xDB, 0xE3), 1, &OpDispatchBuilder::FNINITF64}, - // E4 = Invalid - {OPD(0xDB, 0xE8), 8, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, - {OPD(0xDB, 0xF0), 8, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, + {OPD(0xDB, 0xC0), 8, &OpDispatchBuilder::X87FCMOV}, + {OPD(0xDB, 0xC8), 8, &OpDispatchBuilder::X87FCMOV}, + {OPD(0xDB, 0xD0), 8, &OpDispatchBuilder::X87FCMOV}, + {OPD(0xDB, 0xD8), 8, &OpDispatchBuilder::X87FCMOV}, + // E0 = Invalid + {OPD(0xDB, 0xE2), 1, &OpDispatchBuilder::NOPOp}, // FNCLEX + {OPD(0xDB, 0xE3), 1, &OpDispatchBuilder::FNINITF64}, + // E4 = Invalid + {OPD(0xDB, 0xE8), 8, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, + {OPD(0xDB, 0xF0), 8, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, - // F8 = Invalid + // F8 = Invalid {OPDReg(0xDC, 0) | 0x00, 8, &OpDispatchBuilder::FADDF64<64, false, OpDispatchBuilder::OpResult::RES_ST0>}, @@ -6887,12 +6690,12 @@ constexpr uint16_t PF_F2 = 3; {OPDReg(0xDC, 7) | 0x00, 8, &OpDispatchBuilder::FDIVF64<64, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xDC, 0xC0), 8, &OpDispatchBuilder::FADDF64<80, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xC8), 8, &OpDispatchBuilder::FMULF64<80, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xE0), 8, &OpDispatchBuilder::FSUBF64<80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xE8), 8, &OpDispatchBuilder::FSUBF64<80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xF0), 8, &OpDispatchBuilder::FDIVF64<80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xF8), 8, &OpDispatchBuilder::FDIVF64<80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xC0), 8, &OpDispatchBuilder::FADDF64<80, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xC8), 8, &OpDispatchBuilder::FMULF64<80, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xE0), 8, &OpDispatchBuilder::FSUBF64<80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xE8), 8, &OpDispatchBuilder::FSUBF64<80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xF0), 8, &OpDispatchBuilder::FDIVF64<80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xF8), 8, &OpDispatchBuilder::FDIVF64<80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, {OPDReg(0xDD, 0) | 0x00, 8, &OpDispatchBuilder::FLDF64<64>}, @@ -6909,12 +6712,12 @@ constexpr uint16_t PF_F2 = 3; {OPDReg(0xDD, 7) | 0x00, 8, &OpDispatchBuilder::X87FNSTSW}, - {OPD(0xDD, 0xC0), 8, &OpDispatchBuilder::X87FFREE}, - {OPD(0xDD, 0xD0), 8, &OpDispatchBuilder::FST}, //register-register from regular X87 - {OPD(0xDD, 0xD8), 8, &OpDispatchBuilder::FST}, //^ + {OPD(0xDD, 0xC0), 8, &OpDispatchBuilder::X87FFREE}, + {OPD(0xDD, 0xD0), 8, &OpDispatchBuilder::FST}, // register-register from regular X87 + {OPD(0xDD, 0xD8), 8, &OpDispatchBuilder::FST}, //^ - {OPD(0xDD, 0xE0), 8, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPD(0xDD, 0xE8), 8, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPD(0xDD, 0xE0), 8, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPD(0xDD, 0xE8), 8, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, {OPDReg(0xDE, 0) | 0x00, 8, &OpDispatchBuilder::FADDF64<16, true, OpDispatchBuilder::OpResult::RES_ST0>}, @@ -6932,13 +6735,13 @@ constexpr uint16_t PF_F2 = 3; {OPDReg(0xDE, 7) | 0x00, 8, &OpDispatchBuilder::FDIVF64<16, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xDE, 0xC0), 8, &OpDispatchBuilder::FADDF64<80, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xC8), 8, &OpDispatchBuilder::FMULF64<80, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xD9), 1, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>}, - {OPD(0xDE, 0xE0), 8, &OpDispatchBuilder::FSUBF64<80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xE8), 8, &OpDispatchBuilder::FSUBF64<80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xF0), 8, &OpDispatchBuilder::FDIVF64<80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xF8), 8, &OpDispatchBuilder::FDIVF64<80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xC0), 8, &OpDispatchBuilder::FADDF64<80, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xC8), 8, &OpDispatchBuilder::FMULF64<80, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xD9), 1, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>}, + {OPD(0xDE, 0xE0), 8, &OpDispatchBuilder::FSUBF64<80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xE8), 8, &OpDispatchBuilder::FSUBF64<80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xF0), 8, &OpDispatchBuilder::FDIVF64<80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xF8), 8, &OpDispatchBuilder::FDIVF64<80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, {OPDReg(0xDF, 0) | 0x00, 8, &OpDispatchBuilder::FILDF64}, @@ -6956,13 +6759,13 @@ constexpr uint16_t PF_F2 = 3; {OPDReg(0xDF, 7) | 0x00, 8, &OpDispatchBuilder::FISTF64}, - // XXX: This should also set the x87 tag bits to empty - // We don't support this currently, so just pop the stack - {OPD(0xDF, 0xC0), 8, &OpDispatchBuilder::X87ModifySTP}, + // XXX: This should also set the x87 tag bits to empty + // We don't support this currently, so just pop the stack + {OPD(0xDF, 0xC0), 8, &OpDispatchBuilder::X87ModifySTP}, - {OPD(0xDF, 0xE0), 8, &OpDispatchBuilder::X87FNSTSW}, - {OPD(0xDF, 0xE8), 8, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, - {OPD(0xDF, 0xF0), 8, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, + {OPD(0xDF, 0xE0), 8, &OpDispatchBuilder::X87FNSTSW}, + {OPD(0xDF, 0xE8), 8, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, + {OPD(0xDF, 0xF0), 8, &OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, }; constexpr std::tuple X87OpTable[] = { @@ -6982,14 +6785,14 @@ constexpr uint16_t PF_F2 = 3; {OPDReg(0xD8, 7) | 0x00, 8, &OpDispatchBuilder::FDIV<32, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xC0), 8, &OpDispatchBuilder::FADD<80, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xC8), 8, &OpDispatchBuilder::FMUL<80, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xD0), 8, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPD(0xD8, 0xD8), 8, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPD(0xD8, 0xE0), 8, &OpDispatchBuilder::FSUB<80, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xE8), 8, &OpDispatchBuilder::FSUB<80, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xF0), 8, &OpDispatchBuilder::FDIV<80, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xF8), 8, &OpDispatchBuilder::FDIV<80, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xC0), 8, &OpDispatchBuilder::FADD<80, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xC8), 8, &OpDispatchBuilder::FMUL<80, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xD0), 8, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPD(0xD8, 0xD8), 8, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPD(0xD8, 0xE0), 8, &OpDispatchBuilder::FSUB<80, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xE8), 8, &OpDispatchBuilder::FSUB<80, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xF0), 8, &OpDispatchBuilder::FDIV<80, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xF8), 8, &OpDispatchBuilder::FDIV<80, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, {OPDReg(0xD9, 0) | 0x00, 8, &OpDispatchBuilder::FLD<32>}, @@ -7007,42 +6810,42 @@ constexpr uint16_t PF_F2 = 3; {OPDReg(0xD9, 7) | 0x00, 8, &OpDispatchBuilder::X87FSTCW}, - {OPD(0xD9, 0xC0), 8, &OpDispatchBuilder::FLD<80>}, - {OPD(0xD9, 0xC8), 8, &OpDispatchBuilder::FXCH}, - {OPD(0xD9, 0xD0), 1, &OpDispatchBuilder::NOPOp}, // FNOP - // D1 = Invalid - // D8 = Invalid - {OPD(0xD9, 0xE0), 1, &OpDispatchBuilder::FCHS}, - {OPD(0xD9, 0xE1), 1, &OpDispatchBuilder::FABS}, - // E2 = Invalid - {OPD(0xD9, 0xE4), 1, &OpDispatchBuilder::FTST}, - {OPD(0xD9, 0xE5), 1, &OpDispatchBuilder::X87FXAM}, - // E6 = Invalid - {OPD(0xD9, 0xE8), 1, &OpDispatchBuilder::FLD_Const<0x8000'0000'0000'0000, 0b0'011'1111'1111'1111>}, // 1.0 - {OPD(0xD9, 0xE9), 1, &OpDispatchBuilder::FLD_Const<0xD49A'784B'CD1B'8AFE, 0x4000>}, // log2l(10) - {OPD(0xD9, 0xEA), 1, &OpDispatchBuilder::FLD_Const<0xB8AA'3B29'5C17'F0BC, 0x3FFF>}, // log2l(e) - {OPD(0xD9, 0xEB), 1, &OpDispatchBuilder::FLD_Const<0xC90F'DAA2'2168'C235, 0x4000>}, // pi - {OPD(0xD9, 0xEC), 1, &OpDispatchBuilder::FLD_Const<0x9A20'9A84'FBCF'F799, 0x3FFD>}, // log10l(2) - {OPD(0xD9, 0xED), 1, &OpDispatchBuilder::FLD_Const<0xB172'17F7'D1CF'79AC, 0x3FFE>}, // log(2) - {OPD(0xD9, 0xEE), 1, &OpDispatchBuilder::FLD_Const<0, 0>}, // 0.0 - - // EF = Invalid - {OPD(0xD9, 0xF0), 1, &OpDispatchBuilder::X87UnaryOp}, - {OPD(0xD9, 0xF1), 1, &OpDispatchBuilder::X87FYL2X}, - {OPD(0xD9, 0xF2), 1, &OpDispatchBuilder::X87TAN}, - {OPD(0xD9, 0xF3), 1, &OpDispatchBuilder::X87ATAN}, - {OPD(0xD9, 0xF4), 1, &OpDispatchBuilder::FXTRACT}, - {OPD(0xD9, 0xF5), 1, &OpDispatchBuilder::X87BinaryOp}, - {OPD(0xD9, 0xF6), 1, &OpDispatchBuilder::X87ModifySTP}, - {OPD(0xD9, 0xF7), 1, &OpDispatchBuilder::X87ModifySTP}, - {OPD(0xD9, 0xF8), 1, &OpDispatchBuilder::X87BinaryOp}, - {OPD(0xD9, 0xF9), 1, &OpDispatchBuilder::X87FYL2X}, - {OPD(0xD9, 0xFA), 1, &OpDispatchBuilder::X87UnaryOp}, - {OPD(0xD9, 0xFB), 1, &OpDispatchBuilder::X87SinCos}, - {OPD(0xD9, 0xFC), 1, &OpDispatchBuilder::FRNDINT}, - {OPD(0xD9, 0xFD), 1, &OpDispatchBuilder::X87BinaryOp}, - {OPD(0xD9, 0xFE), 1, &OpDispatchBuilder::X87UnaryOp}, - {OPD(0xD9, 0xFF), 1, &OpDispatchBuilder::X87UnaryOp}, + {OPD(0xD9, 0xC0), 8, &OpDispatchBuilder::FLD<80>}, + {OPD(0xD9, 0xC8), 8, &OpDispatchBuilder::FXCH}, + {OPD(0xD9, 0xD0), 1, &OpDispatchBuilder::NOPOp}, // FNOP + // D1 = Invalid + // D8 = Invalid + {OPD(0xD9, 0xE0), 1, &OpDispatchBuilder::FCHS}, + {OPD(0xD9, 0xE1), 1, &OpDispatchBuilder::FABS}, + // E2 = Invalid + {OPD(0xD9, 0xE4), 1, &OpDispatchBuilder::FTST}, + {OPD(0xD9, 0xE5), 1, &OpDispatchBuilder::X87FXAM}, + // E6 = Invalid + {OPD(0xD9, 0xE8), 1, &OpDispatchBuilder::FLD_Const<0x8000'0000'0000'0000, 0b0'011'1111'1111'1111>}, // 1.0 + {OPD(0xD9, 0xE9), 1, &OpDispatchBuilder::FLD_Const<0xD49A'784B'CD1B'8AFE, 0x4000>}, // log2l(10) + {OPD(0xD9, 0xEA), 1, &OpDispatchBuilder::FLD_Const<0xB8AA'3B29'5C17'F0BC, 0x3FFF>}, // log2l(e) + {OPD(0xD9, 0xEB), 1, &OpDispatchBuilder::FLD_Const<0xC90F'DAA2'2168'C235, 0x4000>}, // pi + {OPD(0xD9, 0xEC), 1, &OpDispatchBuilder::FLD_Const<0x9A20'9A84'FBCF'F799, 0x3FFD>}, // log10l(2) + {OPD(0xD9, 0xED), 1, &OpDispatchBuilder::FLD_Const<0xB172'17F7'D1CF'79AC, 0x3FFE>}, // log(2) + {OPD(0xD9, 0xEE), 1, &OpDispatchBuilder::FLD_Const<0, 0>}, // 0.0 + + // EF = Invalid + {OPD(0xD9, 0xF0), 1, &OpDispatchBuilder::X87UnaryOp}, + {OPD(0xD9, 0xF1), 1, &OpDispatchBuilder::X87FYL2X}, + {OPD(0xD9, 0xF2), 1, &OpDispatchBuilder::X87TAN}, + {OPD(0xD9, 0xF3), 1, &OpDispatchBuilder::X87ATAN}, + {OPD(0xD9, 0xF4), 1, &OpDispatchBuilder::FXTRACT}, + {OPD(0xD9, 0xF5), 1, &OpDispatchBuilder::X87BinaryOp}, + {OPD(0xD9, 0xF6), 1, &OpDispatchBuilder::X87ModifySTP}, + {OPD(0xD9, 0xF7), 1, &OpDispatchBuilder::X87ModifySTP}, + {OPD(0xD9, 0xF8), 1, &OpDispatchBuilder::X87BinaryOp}, + {OPD(0xD9, 0xF9), 1, &OpDispatchBuilder::X87FYL2X}, + {OPD(0xD9, 0xFA), 1, &OpDispatchBuilder::X87UnaryOp}, + {OPD(0xD9, 0xFB), 1, &OpDispatchBuilder::X87SinCos}, + {OPD(0xD9, 0xFC), 1, &OpDispatchBuilder::FRNDINT}, + {OPD(0xD9, 0xFD), 1, &OpDispatchBuilder::X87BinaryOp}, + {OPD(0xD9, 0xFE), 1, &OpDispatchBuilder::X87UnaryOp}, + {OPD(0xD9, 0xFF), 1, &OpDispatchBuilder::X87UnaryOp}, {OPDReg(0xDA, 0) | 0x00, 8, &OpDispatchBuilder::FADD<32, true, OpDispatchBuilder::OpResult::RES_ST0>}, @@ -7060,16 +6863,16 @@ constexpr uint16_t PF_F2 = 3; {OPDReg(0xDA, 7) | 0x00, 8, &OpDispatchBuilder::FDIV<32, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xDA, 0xC0), 8, &OpDispatchBuilder::X87FCMOV}, - {OPD(0xDA, 0xC8), 8, &OpDispatchBuilder::X87FCMOV}, - {OPD(0xDA, 0xD0), 8, &OpDispatchBuilder::X87FCMOV}, - {OPD(0xDA, 0xD8), 8, &OpDispatchBuilder::X87FCMOV}, - // E0 = Invalid - // E8 = Invalid - {OPD(0xDA, 0xE9), 1, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>}, - // EA = Invalid - // F0 = Invalid - // F8 = Invalid + {OPD(0xDA, 0xC0), 8, &OpDispatchBuilder::X87FCMOV}, + {OPD(0xDA, 0xC8), 8, &OpDispatchBuilder::X87FCMOV}, + {OPD(0xDA, 0xD0), 8, &OpDispatchBuilder::X87FCMOV}, + {OPD(0xDA, 0xD8), 8, &OpDispatchBuilder::X87FCMOV}, + // E0 = Invalid + // E8 = Invalid + {OPD(0xDA, 0xE9), 1, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>}, + // EA = Invalid + // F0 = Invalid + // F8 = Invalid {OPDReg(0xDB, 0) | 0x00, 8, &OpDispatchBuilder::FILD}, @@ -7088,18 +6891,18 @@ constexpr uint16_t PF_F2 = 3; {OPDReg(0xDB, 7) | 0x00, 8, &OpDispatchBuilder::FST<80>}, - {OPD(0xDB, 0xC0), 8, &OpDispatchBuilder::X87FCMOV}, - {OPD(0xDB, 0xC8), 8, &OpDispatchBuilder::X87FCMOV}, - {OPD(0xDB, 0xD0), 8, &OpDispatchBuilder::X87FCMOV}, - {OPD(0xDB, 0xD8), 8, &OpDispatchBuilder::X87FCMOV}, - // E0 = Invalid - {OPD(0xDB, 0xE2), 1, &OpDispatchBuilder::NOPOp}, // FNCLEX - {OPD(0xDB, 0xE3), 1, &OpDispatchBuilder::FNINIT}, - // E4 = Invalid - {OPD(0xDB, 0xE8), 8, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, - {OPD(0xDB, 0xF0), 8, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, + {OPD(0xDB, 0xC0), 8, &OpDispatchBuilder::X87FCMOV}, + {OPD(0xDB, 0xC8), 8, &OpDispatchBuilder::X87FCMOV}, + {OPD(0xDB, 0xD0), 8, &OpDispatchBuilder::X87FCMOV}, + {OPD(0xDB, 0xD8), 8, &OpDispatchBuilder::X87FCMOV}, + // E0 = Invalid + {OPD(0xDB, 0xE2), 1, &OpDispatchBuilder::NOPOp}, // FNCLEX + {OPD(0xDB, 0xE3), 1, &OpDispatchBuilder::FNINIT}, + // E4 = Invalid + {OPD(0xDB, 0xE8), 8, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, + {OPD(0xDB, 0xF0), 8, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, - // F8 = Invalid + // F8 = Invalid {OPDReg(0xDC, 0) | 0x00, 8, &OpDispatchBuilder::FADD<64, false, OpDispatchBuilder::OpResult::RES_ST0>}, @@ -7117,12 +6920,12 @@ constexpr uint16_t PF_F2 = 3; {OPDReg(0xDC, 7) | 0x00, 8, &OpDispatchBuilder::FDIV<64, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xDC, 0xC0), 8, &OpDispatchBuilder::FADD<80, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xC8), 8, &OpDispatchBuilder::FMUL<80, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xE0), 8, &OpDispatchBuilder::FSUB<80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xE8), 8, &OpDispatchBuilder::FSUB<80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xF0), 8, &OpDispatchBuilder::FDIV<80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xF8), 8, &OpDispatchBuilder::FDIV<80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xC0), 8, &OpDispatchBuilder::FADD<80, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xC8), 8, &OpDispatchBuilder::FMUL<80, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xE0), 8, &OpDispatchBuilder::FSUB<80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xE8), 8, &OpDispatchBuilder::FSUB<80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xF0), 8, &OpDispatchBuilder::FDIV<80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xF8), 8, &OpDispatchBuilder::FDIV<80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, {OPDReg(0xDD, 0) | 0x00, 8, &OpDispatchBuilder::FLD<64>}, @@ -7139,12 +6942,12 @@ constexpr uint16_t PF_F2 = 3; {OPDReg(0xDD, 7) | 0x00, 8, &OpDispatchBuilder::X87FNSTSW}, - {OPD(0xDD, 0xC0), 8, &OpDispatchBuilder::X87FFREE}, - {OPD(0xDD, 0xD0), 8, &OpDispatchBuilder::FST}, - {OPD(0xDD, 0xD8), 8, &OpDispatchBuilder::FST}, + {OPD(0xDD, 0xC0), 8, &OpDispatchBuilder::X87FFREE}, + {OPD(0xDD, 0xD0), 8, &OpDispatchBuilder::FST}, + {OPD(0xDD, 0xD8), 8, &OpDispatchBuilder::FST}, - {OPD(0xDD, 0xE0), 8, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPD(0xDD, 0xE8), 8, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPD(0xDD, 0xE0), 8, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPD(0xDD, 0xE8), 8, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, {OPDReg(0xDE, 0) | 0x00, 8, &OpDispatchBuilder::FADD<16, true, OpDispatchBuilder::OpResult::RES_ST0>}, @@ -7162,13 +6965,13 @@ constexpr uint16_t PF_F2 = 3; {OPDReg(0xDE, 7) | 0x00, 8, &OpDispatchBuilder::FDIV<16, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xDE, 0xC0), 8, &OpDispatchBuilder::FADD<80, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xC8), 8, &OpDispatchBuilder::FMUL<80, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xD9), 1, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>}, - {OPD(0xDE, 0xE0), 8, &OpDispatchBuilder::FSUB<80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xE8), 8, &OpDispatchBuilder::FSUB<80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xF0), 8, &OpDispatchBuilder::FDIV<80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xF8), 8, &OpDispatchBuilder::FDIV<80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xC0), 8, &OpDispatchBuilder::FADD<80, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xC8), 8, &OpDispatchBuilder::FMUL<80, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xD9), 1, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>}, + {OPD(0xDE, 0xE0), 8, &OpDispatchBuilder::FSUB<80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xE8), 8, &OpDispatchBuilder::FSUB<80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xF0), 8, &OpDispatchBuilder::FDIV<80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xF8), 8, &OpDispatchBuilder::FDIV<80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, {OPDReg(0xDF, 0) | 0x00, 8, &OpDispatchBuilder::FILD}, @@ -7186,84 +6989,84 @@ constexpr uint16_t PF_F2 = 3; {OPDReg(0xDF, 7) | 0x00, 8, &OpDispatchBuilder::FIST}, - // XXX: This should also set the x87 tag bits to empty - // We don't support this currently, so just pop the stack - {OPD(0xDF, 0xC0), 8, &OpDispatchBuilder::X87ModifySTP}, + // XXX: This should also set the x87 tag bits to empty + // We don't support this currently, so just pop the stack + {OPD(0xDF, 0xC0), 8, &OpDispatchBuilder::X87ModifySTP}, - {OPD(0xDF, 0xE0), 8, &OpDispatchBuilder::X87FNSTSW}, - {OPD(0xDF, 0xE8), 8, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, - {OPD(0xDF, 0xF0), 8, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, + {OPD(0xDF, 0xE0), 8, &OpDispatchBuilder::X87FNSTSW}, + {OPD(0xDF, 0xE8), 8, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, + {OPD(0xDF, 0xF0), 8, &OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, }; #undef OPD #undef OPDReg #define OPD(prefix, opcode) (((prefix) << 8) | opcode) constexpr uint16_t PF_38_NONE = 0; - constexpr uint16_t PF_38_66 = (1U << 0); - constexpr uint16_t PF_38_F3 = (1U << 2); + constexpr uint16_t PF_38_66 = (1U << 0); + constexpr uint16_t PF_38_F3 = (1U << 2); constexpr std::tuple H0F38Table[] = { {OPD(PF_38_NONE, 0x00), 1, &OpDispatchBuilder::PSHUFBOp}, - {OPD(PF_38_66, 0x00), 1, &OpDispatchBuilder::PSHUFBOp}, + {OPD(PF_38_66, 0x00), 1, &OpDispatchBuilder::PSHUFBOp}, {OPD(PF_38_NONE, 0x01), 1, &OpDispatchBuilder::VectorALUOp}, - {OPD(PF_38_66, 0x01), 1, &OpDispatchBuilder::VectorALUOp}, + {OPD(PF_38_66, 0x01), 1, &OpDispatchBuilder::VectorALUOp}, {OPD(PF_38_NONE, 0x02), 1, &OpDispatchBuilder::VectorALUOp}, - {OPD(PF_38_66, 0x02), 1, &OpDispatchBuilder::VectorALUOp}, + {OPD(PF_38_66, 0x02), 1, &OpDispatchBuilder::VectorALUOp}, {OPD(PF_38_NONE, 0x03), 1, &OpDispatchBuilder::PHADDS}, - {OPD(PF_38_66, 0x03), 1, &OpDispatchBuilder::PHADDS}, + {OPD(PF_38_66, 0x03), 1, &OpDispatchBuilder::PHADDS}, {OPD(PF_38_NONE, 0x04), 1, &OpDispatchBuilder::PMADDUBSW}, - {OPD(PF_38_66, 0x04), 1, &OpDispatchBuilder::PMADDUBSW}, + {OPD(PF_38_66, 0x04), 1, &OpDispatchBuilder::PMADDUBSW}, {OPD(PF_38_NONE, 0x05), 1, &OpDispatchBuilder::PHSUB<2>}, - {OPD(PF_38_66, 0x05), 1, &OpDispatchBuilder::PHSUB<2>}, + {OPD(PF_38_66, 0x05), 1, &OpDispatchBuilder::PHSUB<2>}, {OPD(PF_38_NONE, 0x06), 1, &OpDispatchBuilder::PHSUB<4>}, - {OPD(PF_38_66, 0x06), 1, &OpDispatchBuilder::PHSUB<4>}, + {OPD(PF_38_66, 0x06), 1, &OpDispatchBuilder::PHSUB<4>}, {OPD(PF_38_NONE, 0x07), 1, &OpDispatchBuilder::PHSUBS}, - {OPD(PF_38_66, 0x07), 1, &OpDispatchBuilder::PHSUBS}, + {OPD(PF_38_66, 0x07), 1, &OpDispatchBuilder::PHSUBS}, {OPD(PF_38_NONE, 0x08), 1, &OpDispatchBuilder::PSIGN<1>}, - {OPD(PF_38_66, 0x08), 1, &OpDispatchBuilder::PSIGN<1>}, + {OPD(PF_38_66, 0x08), 1, &OpDispatchBuilder::PSIGN<1>}, {OPD(PF_38_NONE, 0x09), 1, &OpDispatchBuilder::PSIGN<2>}, - {OPD(PF_38_66, 0x09), 1, &OpDispatchBuilder::PSIGN<2>}, + {OPD(PF_38_66, 0x09), 1, &OpDispatchBuilder::PSIGN<2>}, {OPD(PF_38_NONE, 0x0A), 1, &OpDispatchBuilder::PSIGN<4>}, - {OPD(PF_38_66, 0x0A), 1, &OpDispatchBuilder::PSIGN<4>}, + {OPD(PF_38_66, 0x0A), 1, &OpDispatchBuilder::PSIGN<4>}, {OPD(PF_38_NONE, 0x0B), 1, &OpDispatchBuilder::PMULHRSW}, - {OPD(PF_38_66, 0x0B), 1, &OpDispatchBuilder::PMULHRSW}, - {OPD(PF_38_66, 0x10), 1, &OpDispatchBuilder::VectorVariableBlend<1>}, - {OPD(PF_38_66, 0x14), 1, &OpDispatchBuilder::VectorVariableBlend<4>}, - {OPD(PF_38_66, 0x15), 1, &OpDispatchBuilder::VectorVariableBlend<8>}, - {OPD(PF_38_66, 0x17), 1, &OpDispatchBuilder::PTestOp}, + {OPD(PF_38_66, 0x0B), 1, &OpDispatchBuilder::PMULHRSW}, + {OPD(PF_38_66, 0x10), 1, &OpDispatchBuilder::VectorVariableBlend<1>}, + {OPD(PF_38_66, 0x14), 1, &OpDispatchBuilder::VectorVariableBlend<4>}, + {OPD(PF_38_66, 0x15), 1, &OpDispatchBuilder::VectorVariableBlend<8>}, + {OPD(PF_38_66, 0x17), 1, &OpDispatchBuilder::PTestOp}, {OPD(PF_38_NONE, 0x1C), 1, &OpDispatchBuilder::VectorUnaryOp}, - {OPD(PF_38_66, 0x1C), 1, &OpDispatchBuilder::VectorUnaryOp}, + {OPD(PF_38_66, 0x1C), 1, &OpDispatchBuilder::VectorUnaryOp}, {OPD(PF_38_NONE, 0x1D), 1, &OpDispatchBuilder::VectorUnaryOp}, - {OPD(PF_38_66, 0x1D), 1, &OpDispatchBuilder::VectorUnaryOp}, + {OPD(PF_38_66, 0x1D), 1, &OpDispatchBuilder::VectorUnaryOp}, {OPD(PF_38_NONE, 0x1E), 1, &OpDispatchBuilder::VectorUnaryOp}, - {OPD(PF_38_66, 0x1E), 1, &OpDispatchBuilder::VectorUnaryOp}, - {OPD(PF_38_66, 0x20), 1, &OpDispatchBuilder::ExtendVectorElements<1, 2, true>}, - {OPD(PF_38_66, 0x21), 1, &OpDispatchBuilder::ExtendVectorElements<1, 4, true>}, - {OPD(PF_38_66, 0x22), 1, &OpDispatchBuilder::ExtendVectorElements<1, 8, true>}, - {OPD(PF_38_66, 0x23), 1, &OpDispatchBuilder::ExtendVectorElements<2, 4, true>}, - {OPD(PF_38_66, 0x24), 1, &OpDispatchBuilder::ExtendVectorElements<2, 8, true>}, - {OPD(PF_38_66, 0x25), 1, &OpDispatchBuilder::ExtendVectorElements<4, 8, true>}, - {OPD(PF_38_66, 0x28), 1, &OpDispatchBuilder::PMULLOp<4, true>}, - {OPD(PF_38_66, 0x29), 1, &OpDispatchBuilder::VectorALUOp}, - {OPD(PF_38_66, 0x2A), 1, &OpDispatchBuilder::MOVVectorNTOp}, - {OPD(PF_38_66, 0x2B), 1, &OpDispatchBuilder::PACKUSOp<4>}, - {OPD(PF_38_66, 0x30), 1, &OpDispatchBuilder::ExtendVectorElements<1, 2, false>}, - {OPD(PF_38_66, 0x31), 1, &OpDispatchBuilder::ExtendVectorElements<1, 4, false>}, - {OPD(PF_38_66, 0x32), 1, &OpDispatchBuilder::ExtendVectorElements<1, 8, false>}, - {OPD(PF_38_66, 0x33), 1, &OpDispatchBuilder::ExtendVectorElements<2, 4, false>}, - {OPD(PF_38_66, 0x34), 1, &OpDispatchBuilder::ExtendVectorElements<2, 8, false>}, - {OPD(PF_38_66, 0x35), 1, &OpDispatchBuilder::ExtendVectorElements<4, 8, false>}, - {OPD(PF_38_66, 0x37), 1, &OpDispatchBuilder::VectorALUOp}, - {OPD(PF_38_66, 0x38), 1, &OpDispatchBuilder::VectorALUOp}, - {OPD(PF_38_66, 0x39), 1, &OpDispatchBuilder::VectorALUOp}, - {OPD(PF_38_66, 0x3A), 1, &OpDispatchBuilder::VectorALUOp}, - {OPD(PF_38_66, 0x3B), 1, &OpDispatchBuilder::VectorALUOp}, - {OPD(PF_38_66, 0x3C), 1, &OpDispatchBuilder::VectorALUOp}, - {OPD(PF_38_66, 0x3D), 1, &OpDispatchBuilder::VectorALUOp}, - {OPD(PF_38_66, 0x3E), 1, &OpDispatchBuilder::VectorALUOp}, - {OPD(PF_38_66, 0x3F), 1, &OpDispatchBuilder::VectorALUOp}, - {OPD(PF_38_66, 0x40), 1, &OpDispatchBuilder::VectorALUOp}, - {OPD(PF_38_66, 0x41), 1, &OpDispatchBuilder::PHMINPOSUWOp}, + {OPD(PF_38_66, 0x1E), 1, &OpDispatchBuilder::VectorUnaryOp}, + {OPD(PF_38_66, 0x20), 1, &OpDispatchBuilder::ExtendVectorElements<1, 2, true>}, + {OPD(PF_38_66, 0x21), 1, &OpDispatchBuilder::ExtendVectorElements<1, 4, true>}, + {OPD(PF_38_66, 0x22), 1, &OpDispatchBuilder::ExtendVectorElements<1, 8, true>}, + {OPD(PF_38_66, 0x23), 1, &OpDispatchBuilder::ExtendVectorElements<2, 4, true>}, + {OPD(PF_38_66, 0x24), 1, &OpDispatchBuilder::ExtendVectorElements<2, 8, true>}, + {OPD(PF_38_66, 0x25), 1, &OpDispatchBuilder::ExtendVectorElements<4, 8, true>}, + {OPD(PF_38_66, 0x28), 1, &OpDispatchBuilder::PMULLOp<4, true>}, + {OPD(PF_38_66, 0x29), 1, &OpDispatchBuilder::VectorALUOp}, + {OPD(PF_38_66, 0x2A), 1, &OpDispatchBuilder::MOVVectorNTOp}, + {OPD(PF_38_66, 0x2B), 1, &OpDispatchBuilder::PACKUSOp<4>}, + {OPD(PF_38_66, 0x30), 1, &OpDispatchBuilder::ExtendVectorElements<1, 2, false>}, + {OPD(PF_38_66, 0x31), 1, &OpDispatchBuilder::ExtendVectorElements<1, 4, false>}, + {OPD(PF_38_66, 0x32), 1, &OpDispatchBuilder::ExtendVectorElements<1, 8, false>}, + {OPD(PF_38_66, 0x33), 1, &OpDispatchBuilder::ExtendVectorElements<2, 4, false>}, + {OPD(PF_38_66, 0x34), 1, &OpDispatchBuilder::ExtendVectorElements<2, 8, false>}, + {OPD(PF_38_66, 0x35), 1, &OpDispatchBuilder::ExtendVectorElements<4, 8, false>}, + {OPD(PF_38_66, 0x37), 1, &OpDispatchBuilder::VectorALUOp}, + {OPD(PF_38_66, 0x38), 1, &OpDispatchBuilder::VectorALUOp}, + {OPD(PF_38_66, 0x39), 1, &OpDispatchBuilder::VectorALUOp}, + {OPD(PF_38_66, 0x3A), 1, &OpDispatchBuilder::VectorALUOp}, + {OPD(PF_38_66, 0x3B), 1, &OpDispatchBuilder::VectorALUOp}, + {OPD(PF_38_66, 0x3C), 1, &OpDispatchBuilder::VectorALUOp}, + {OPD(PF_38_66, 0x3D), 1, &OpDispatchBuilder::VectorALUOp}, + {OPD(PF_38_66, 0x3E), 1, &OpDispatchBuilder::VectorALUOp}, + {OPD(PF_38_66, 0x3F), 1, &OpDispatchBuilder::VectorALUOp}, + {OPD(PF_38_66, 0x40), 1, &OpDispatchBuilder::VectorALUOp}, + {OPD(PF_38_66, 0x41), 1, &OpDispatchBuilder::PHMINPOSUWOp}, {OPD(PF_38_NONE, 0xF0), 2, &OpDispatchBuilder::MOVBEOp}, {OPD(PF_38_66, 0xF0), 2, &OpDispatchBuilder::MOVBEOp}, @@ -7276,38 +7079,38 @@ constexpr uint16_t PF_F2 = 3; #define OPD(REX, prefix, opcode) ((REX << 9) | (prefix << 8) | opcode) #define PF_3A_NONE 0 -#define PF_3A_66 1 +#define PF_3A_66 1 constexpr std::tuple H0F3ATable[] = { - {OPD(0, PF_3A_66, 0x08), 1, &OpDispatchBuilder::VectorRound<4>}, - {OPD(0, PF_3A_66, 0x09), 1, &OpDispatchBuilder::VectorRound<8>}, - {OPD(0, PF_3A_66, 0x0A), 1, &OpDispatchBuilder::InsertScalarRound<4>}, - {OPD(0, PF_3A_66, 0x0B), 1, &OpDispatchBuilder::InsertScalarRound<8>}, - {OPD(0, PF_3A_66, 0x0C), 1, &OpDispatchBuilder::VectorBlend<4>}, - {OPD(0, PF_3A_66, 0x0D), 1, &OpDispatchBuilder::VectorBlend<8>}, - {OPD(0, PF_3A_66, 0x0E), 1, &OpDispatchBuilder::VectorBlend<2>}, + {OPD(0, PF_3A_66, 0x08), 1, &OpDispatchBuilder::VectorRound<4>}, + {OPD(0, PF_3A_66, 0x09), 1, &OpDispatchBuilder::VectorRound<8>}, + {OPD(0, PF_3A_66, 0x0A), 1, &OpDispatchBuilder::InsertScalarRound<4>}, + {OPD(0, PF_3A_66, 0x0B), 1, &OpDispatchBuilder::InsertScalarRound<8>}, + {OPD(0, PF_3A_66, 0x0C), 1, &OpDispatchBuilder::VectorBlend<4>}, + {OPD(0, PF_3A_66, 0x0D), 1, &OpDispatchBuilder::VectorBlend<8>}, + {OPD(0, PF_3A_66, 0x0E), 1, &OpDispatchBuilder::VectorBlend<2>}, {OPD(0, PF_3A_NONE, 0x0F), 1, &OpDispatchBuilder::PAlignrOp}, - {OPD(0, PF_3A_66, 0x0F), 1, &OpDispatchBuilder::PAlignrOp}, - {OPD(1, PF_3A_66, 0x0F), 1, &OpDispatchBuilder::PAlignrOp}, - - {OPD(0, PF_3A_66, 0x14), 1, &OpDispatchBuilder::PExtrOp<1>}, - {OPD(0, PF_3A_66, 0x15), 1, &OpDispatchBuilder::PExtrOp<2>}, - {OPD(0, PF_3A_66, 0x16), 1, &OpDispatchBuilder::PExtrOp<4>}, - {OPD(1, PF_3A_66, 0x16), 1, &OpDispatchBuilder::PExtrOp<8>}, - {OPD(0, PF_3A_66, 0x17), 1, &OpDispatchBuilder::PExtrOp<4>}, - - {OPD(0, PF_3A_66, 0x20), 1, &OpDispatchBuilder::PINSROp<1>}, - {OPD(0, PF_3A_66, 0x21), 1, &OpDispatchBuilder::InsertPSOp}, - {OPD(0, PF_3A_66, 0x22), 1, &OpDispatchBuilder::PINSROp<4>}, - {OPD(1, PF_3A_66, 0x22), 1, &OpDispatchBuilder::PINSROp<8>}, - {OPD(0, PF_3A_66, 0x40), 1, &OpDispatchBuilder::DPPOp<4>}, - {OPD(0, PF_3A_66, 0x41), 1, &OpDispatchBuilder::DPPOp<8>}, - {OPD(0, PF_3A_66, 0x42), 1, &OpDispatchBuilder::MPSADBWOp}, - - {OPD(0, PF_3A_66, 0x60), 1, &OpDispatchBuilder::VPCMPESTRMOp}, - {OPD(0, PF_3A_66, 0x61), 1, &OpDispatchBuilder::VPCMPESTRIOp}, - {OPD(0, PF_3A_66, 0x62), 1, &OpDispatchBuilder::VPCMPISTRMOp}, - {OPD(0, PF_3A_66, 0x63), 1, &OpDispatchBuilder::VPCMPISTRIOp}, + {OPD(0, PF_3A_66, 0x0F), 1, &OpDispatchBuilder::PAlignrOp}, + {OPD(1, PF_3A_66, 0x0F), 1, &OpDispatchBuilder::PAlignrOp}, + + {OPD(0, PF_3A_66, 0x14), 1, &OpDispatchBuilder::PExtrOp<1>}, + {OPD(0, PF_3A_66, 0x15), 1, &OpDispatchBuilder::PExtrOp<2>}, + {OPD(0, PF_3A_66, 0x16), 1, &OpDispatchBuilder::PExtrOp<4>}, + {OPD(1, PF_3A_66, 0x16), 1, &OpDispatchBuilder::PExtrOp<8>}, + {OPD(0, PF_3A_66, 0x17), 1, &OpDispatchBuilder::PExtrOp<4>}, + + {OPD(0, PF_3A_66, 0x20), 1, &OpDispatchBuilder::PINSROp<1>}, + {OPD(0, PF_3A_66, 0x21), 1, &OpDispatchBuilder::InsertPSOp}, + {OPD(0, PF_3A_66, 0x22), 1, &OpDispatchBuilder::PINSROp<4>}, + {OPD(1, PF_3A_66, 0x22), 1, &OpDispatchBuilder::PINSROp<8>}, + {OPD(0, PF_3A_66, 0x40), 1, &OpDispatchBuilder::DPPOp<4>}, + {OPD(0, PF_3A_66, 0x41), 1, &OpDispatchBuilder::DPPOp<8>}, + {OPD(0, PF_3A_66, 0x42), 1, &OpDispatchBuilder::MPSADBWOp}, + + {OPD(0, PF_3A_66, 0x60), 1, &OpDispatchBuilder::VPCMPESTRMOp}, + {OPD(0, PF_3A_66, 0x61), 1, &OpDispatchBuilder::VPCMPESTRIOp}, + {OPD(0, PF_3A_66, 0x62), 1, &OpDispatchBuilder::VPCMPISTRMOp}, + {OPD(0, PF_3A_66, 0x63), 1, &OpDispatchBuilder::VPCMPISTRIOp}, {OPD(0, PF_3A_NONE, 0xCC), 1, &OpDispatchBuilder::SHA1RNDS4Op}, }; @@ -7357,27 +7160,23 @@ constexpr uint16_t PF_F2 = 3; #define OPD(map_select, pp, opcode) (((map_select - 1) << 10) | (pp << 8) | (opcode)) static constexpr std::tuple BMITable[] = { - {OPD(2, 0b00, 0xF2), 1, &OpDispatchBuilder::ANDNBMIOp}, - {OPD(2, 0b00, 0xF5), 1, &OpDispatchBuilder::BZHI}, - {OPD(2, 0b10, 0xF5), 1, &OpDispatchBuilder::PEXT}, - {OPD(2, 0b11, 0xF5), 1, &OpDispatchBuilder::PDEP}, - {OPD(2, 0b11, 0xF6), 1, &OpDispatchBuilder::MULX}, - {OPD(2, 0b00, 0xF7), 1, &OpDispatchBuilder::BEXTRBMIOp}, - {OPD(2, 0b01, 0xF7), 1, &OpDispatchBuilder::BMI2Shift}, - {OPD(2, 0b10, 0xF7), 1, &OpDispatchBuilder::BMI2Shift}, + {OPD(2, 0b00, 0xF2), 1, &OpDispatchBuilder::ANDNBMIOp}, {OPD(2, 0b00, 0xF5), 1, &OpDispatchBuilder::BZHI}, + {OPD(2, 0b10, 0xF5), 1, &OpDispatchBuilder::PEXT}, {OPD(2, 0b11, 0xF5), 1, &OpDispatchBuilder::PDEP}, + {OPD(2, 0b11, 0xF6), 1, &OpDispatchBuilder::MULX}, {OPD(2, 0b00, 0xF7), 1, &OpDispatchBuilder::BEXTRBMIOp}, + {OPD(2, 0b01, 0xF7), 1, &OpDispatchBuilder::BMI2Shift}, {OPD(2, 0b10, 0xF7), 1, &OpDispatchBuilder::BMI2Shift}, {OPD(2, 0b11, 0xF7), 1, &OpDispatchBuilder::BMI2Shift}, {OPD(3, 0b11, 0xF0), 1, &OpDispatchBuilder::RORX}, }; #undef OPD - #define OPD(group, pp, opcode) (((group - X86Tables::InstType::TYPE_VEX_GROUP_12) << 4) | (pp << 3) | (opcode)) - constexpr std::tuple VEXGroupTable[] = { - {OPD(X86Tables::InstType::TYPE_VEX_GROUP_17, 0, 0b001), 1, &OpDispatchBuilder::BLSRBMIOp}, - {OPD(X86Tables::InstType::TYPE_VEX_GROUP_17, 0, 0b010), 1, &OpDispatchBuilder::BLSMSKBMIOp}, - {OPD(X86Tables::InstType::TYPE_VEX_GROUP_17, 0, 0b011), 1, &OpDispatchBuilder::BLSIBMIOp}, - }; - #undef OPD +#define OPD(group, pp, opcode) (((group - X86Tables::InstType::TYPE_VEX_GROUP_12) << 4) | (pp << 3) | (opcode)) + constexpr std::tuple VEXGroupTable[] = { + {OPD(X86Tables::InstType::TYPE_VEX_GROUP_17, 0, 0b001), 1, &OpDispatchBuilder::BLSRBMIOp}, + {OPD(X86Tables::InstType::TYPE_VEX_GROUP_17, 0, 0b010), 1, &OpDispatchBuilder::BLSMSKBMIOp}, + {OPD(X86Tables::InstType::TYPE_VEX_GROUP_17, 0, 0b011), 1, &OpDispatchBuilder::BLSIBMIOp}, + }; +#undef OPD constexpr std::tuple EVEXTable[] = { {0x10, 2, &OpDispatchBuilder::UnimplementedOp}, @@ -7418,8 +7217,7 @@ constexpr uint16_t PF_F2 = 3; if (Mode == Context::MODE_32BIT) { InstallToTable(FEXCore::X86Tables::BaseOps, BaseOpTable_32); InstallToTable(FEXCore::X86Tables::SecondBaseOps, TwoByteOpTable_32); - } - else { + } else { InstallToTable(FEXCore::X86Tables::BaseOps, BaseOpTable_64); InstallToTable(FEXCore::X86Tables::SecondBaseOps, TwoByteOpTable_64); } @@ -7438,7 +7236,7 @@ constexpr uint16_t PF_F2 = 3; InstallToTable(FEXCore::X86Tables::SecondModRMTableOps, SecondaryModRMExtensionOpTable); FEX_CONFIG_OPT(ReducedPrecision, X87REDUCEDPRECISION); - if(ReducedPrecision) { + if (ReducedPrecision) { InstallToX87Table(FEXCore::X86Tables::X87Ops, X87F64OpTable); } else { InstallToX87Table(FEXCore::X86Tables::X87Ops, X87OpTable); @@ -7452,4 +7250,4 @@ constexpr uint16_t PF_F2 = 3; InstallToTable(FEXCore::X86Tables::EVEXTableOps, EVEXTable); } -} +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp index dc1ea4069f..2e1aa85e43 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp @@ -22,10 +22,10 @@ class OrderedNode; #define OpcodeArgs [[maybe_unused]] FEXCore::X86Tables::DecodedOp Op void OpDispatchBuilder::SHA1NEXTEOp(OpcodeArgs) { - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *RotatedNode{}; + OrderedNode* RotatedNode {}; if (CTX->HostFeatures.SupportsSHA) { // ARMv8 SHA1 extension provides a `SHA1H` instruction which does a fixed rotate by 30. // This only operates on element 0 rather than element 3. We don't have the luxury of rewriting the x86 SHA algorithm to take advantage of this. @@ -34,8 +34,7 @@ void OpDispatchBuilder::SHA1NEXTEOp(OpcodeArgs) { auto Duplicated = _VDupElement(OpSize::i128Bit, OpSize::i32Bit, Dest, 3); auto Sha1HRotated = _VSha1H(Duplicated); RotatedNode = _VDupElement(OpSize::i128Bit, OpSize::i32Bit, Sha1HRotated, 0); - } - else { + } else { // SHA1 extension missing, manually rotate. // Emulate rotate. auto ShiftLeft = _VShlI(OpSize::i128Bit, OpSize::i32Bit, Dest, 30); @@ -48,20 +47,20 @@ void OpDispatchBuilder::SHA1NEXTEOp(OpcodeArgs) { } void OpDispatchBuilder::SHA1MSG1Op(OpcodeArgs) { - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *NewVec = _VExtr(16, 8, Dest, Src, 1); + OrderedNode* NewVec = _VExtr(16, 8, Dest, Src, 1); // [W0, W1, W2, W3] ^ [W2, W3, W4, W5] - OrderedNode *Result = _VXor(16, 1, Dest, NewVec); + OrderedNode* Result = _VXor(16, 1, Dest, NewVec); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::SHA1MSG2Op(OpcodeArgs) { - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); // This instruction mostly matches ARMv8's SHA1SU1 instruction but one of the elements are flipped in an unexpected way. // Do all the work without it. @@ -91,41 +90,43 @@ void OpDispatchBuilder::SHA1MSG2Op(OpcodeArgs) { } void OpDispatchBuilder::SHA1RNDS4Op(OpcodeArgs) { - LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), - "Src1 needs to be literal here to indicate function and constants"); + LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here to indicate function and constants"); using FnType = OrderedNode* (*)(OpDispatchBuilder&, OrderedNode*, OrderedNode*, OrderedNode*); - const auto f0 = [](OpDispatchBuilder &Self, OrderedNode *B, OrderedNode *C, OrderedNode *D) -> OrderedNode* { + const auto f0 = [](OpDispatchBuilder& Self, OrderedNode* B, OrderedNode* C, OrderedNode* D) -> OrderedNode* { return Self._Xor(OpSize::i32Bit, Self._And(OpSize::i32Bit, B, C), Self._Andn(OpSize::i32Bit, D, B)); }; - const auto f1 = [](OpDispatchBuilder &Self, OrderedNode *B, OrderedNode *C, OrderedNode *D) -> OrderedNode* { + const auto f1 = [](OpDispatchBuilder& Self, OrderedNode* B, OrderedNode* C, OrderedNode* D) -> OrderedNode* { return Self._Xor(OpSize::i32Bit, Self._Xor(OpSize::i32Bit, B, C), D); }; - const auto f2 = [](OpDispatchBuilder &Self, OrderedNode *B, OrderedNode *C, OrderedNode *D) -> OrderedNode* { + const auto f2 = [](OpDispatchBuilder& Self, OrderedNode* B, OrderedNode* C, OrderedNode* D) -> OrderedNode* { return Self.BitwiseAtLeastTwo(B, C, D); }; - const auto f3 = [](OpDispatchBuilder &Self, OrderedNode *B, OrderedNode *C, OrderedNode *D) -> OrderedNode* { + const auto f3 = [](OpDispatchBuilder& Self, OrderedNode* B, OrderedNode* C, OrderedNode* D) -> OrderedNode* { return Self._Xor(OpSize::i32Bit, Self._Xor(OpSize::i32Bit, B, C), D); }; - constexpr std::array k_array{ + constexpr std::array k_array { 0x5A827999U, 0x6ED9EBA1U, 0x8F1BBCDCU, 0xCA62C1D6U, }; - constexpr std::array fn_array{ - f0, f1, f2, f3, + constexpr std::array fn_array { + f0, + f1, + f2, + f3, }; const uint64_t Imm8 = Op->Src[1].Data.Literal.Value & 0b11; const FnType Fn = fn_array[Imm8]; auto K = _Constant(32, k_array[Imm8]); - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); auto W0E = _VExtractToGPR(16, 4, Src, 3); @@ -137,7 +138,8 @@ void OpDispatchBuilder::SHA1RNDS4Op(OpcodeArgs) { auto C = _VExtractToGPR(16, 4, Dest, 1); auto D = _VExtractToGPR(16, 4, Dest, 0); - auto A1 = _Add(OpSize::i32Bit, _Add(OpSize::i32Bit, _Add(OpSize::i32Bit, Fn(*this, B, C, D), _Ror(OpSize::i32Bit, A, _Constant(32, 27))), W0E), K); + auto A1 = + _Add(OpSize::i32Bit, _Add(OpSize::i32Bit, _Add(OpSize::i32Bit, Fn(*this, B, C, D), _Ror(OpSize::i32Bit, A, _Constant(32, 27))), W0E), K); auto B1 = A; auto C1 = _Ror(OpSize::i32Bit, B, _Constant(32, 2)); auto D1 = C; @@ -145,13 +147,14 @@ void OpDispatchBuilder::SHA1RNDS4Op(OpcodeArgs) { return {A1, B1, C1, D1, E1}; }; - const auto Round1To3 = [&](OrderedNode *A, OrderedNode *B, OrderedNode *C, - OrderedNode *D, OrderedNode *E, OrderedNode *Src, unsigned W_idx) -> RoundResult { + const auto Round1To3 = [&](OrderedNode* A, OrderedNode* B, OrderedNode* C, OrderedNode* D, OrderedNode* E, OrderedNode* Src, + unsigned W_idx) -> RoundResult { // Kill W and E at the beginning auto W = _VExtractToGPR(16, 4, Src, W_idx); auto Q = _Add(OpSize::i32Bit, W, E); - auto ANext = _Add(OpSize::i32Bit, _Add(OpSize::i32Bit, _Add(OpSize::i32Bit, Fn(*this, B, C, D), _Ror(OpSize::i32Bit, A, _Constant(32, 27))), Q), K); + auto ANext = + _Add(OpSize::i32Bit, _Add(OpSize::i32Bit, _Add(OpSize::i32Bit, Fn(*this, B, C, D), _Ror(OpSize::i32Bit, A, _Constant(32, 27))), Q), K); auto BNext = A; auto CNext = _Ror(OpSize::i32Bit, B, _Constant(32, 2)); auto DNext = C; @@ -163,9 +166,9 @@ void OpDispatchBuilder::SHA1RNDS4Op(OpcodeArgs) { auto [A1, B1, C1, D1, E1] = Round0(); auto [A2, B2, C2, D2, E2] = Round1To3(A1, B1, C1, D1, E1, Src, 2); auto [A3, B3, C3, D3, E3] = Round1To3(A2, B2, C2, D2, E2, Src, 1); - auto Final = Round1To3(A3, B3, C3, D3, E3, Src, 0); + auto Final = Round1To3(A3, B3, C3, D3, E3, Src, 0); - auto Dest3 = _VInsGPR(16, 4, 3, Dest, std::get<0>(Final)); + auto Dest3 = _VInsGPR(16, 4, 3, Dest, std::get<0>(Final)); auto Dest2 = _VInsGPR(16, 4, 2, Dest3, std::get<1>(Final)); auto Dest1 = _VInsGPR(16, 4, 1, Dest2, std::get<2>(Final)); auto Dest0 = _VInsGPR(16, 4, 0, Dest1, std::get<3>(Final)); @@ -174,17 +177,17 @@ void OpDispatchBuilder::SHA1RNDS4Op(OpcodeArgs) { } void OpDispatchBuilder::SHA256MSG1Op(OpcodeArgs) { - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result{}; + OrderedNode* Result {}; if (CTX->HostFeatures.SupportsSHA) { Result = _VSha256U0(Dest, Src); - } - else { + } else { const auto Sigma0 = [this](OrderedNode* W) -> OrderedNode* { - return _Xor(OpSize::i32Bit, _Xor(OpSize::i32Bit, _Ror(OpSize::i32Bit, W, _Constant(32, 7)), _Ror(OpSize::i32Bit, W, _Constant(32, 18))), _Lshr(OpSize::i32Bit, W, _Constant(32, 3))); + return _Xor(OpSize::i32Bit, _Xor(OpSize::i32Bit, _Ror(OpSize::i32Bit, W, _Constant(32, 7)), _Ror(OpSize::i32Bit, W, _Constant(32, 18))), + _Lshr(OpSize::i32Bit, W, _Constant(32, 3))); }; auto W4 = _VExtractToGPR(16, 4, Src, 0); @@ -209,11 +212,12 @@ void OpDispatchBuilder::SHA256MSG1Op(OpcodeArgs) { void OpDispatchBuilder::SHA256MSG2Op(OpcodeArgs) { const auto Sigma1 = [this](OrderedNode* W) -> OrderedNode* { - return _Xor(OpSize::i32Bit, _Xor(OpSize::i32Bit, _Ror(OpSize::i32Bit, W, _Constant(32, 17)), _Ror(OpSize::i32Bit, W, _Constant(32, 19))), _Lshr(OpSize::i32Bit, W, _Constant(32, 10))); + return _Xor(OpSize::i32Bit, _Xor(OpSize::i32Bit, _Ror(OpSize::i32Bit, W, _Constant(32, 17)), _Ror(OpSize::i32Bit, W, _Constant(32, 19))), + _Lshr(OpSize::i32Bit, W, _Constant(32, 10))); }; - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); auto W14 = _VExtractToGPR(16, 4, Src, 2); auto W15 = _VExtractToGPR(16, 4, Src, 3); @@ -230,36 +234,38 @@ void OpDispatchBuilder::SHA256MSG2Op(OpcodeArgs) { StoreResult(FPRClass, Op, D0, -1); } -OrderedNode *OpDispatchBuilder::BitwiseAtLeastTwo(OrderedNode *A, OrderedNode *B, OrderedNode *C) { - // Returns whether at least 2/3 of A/B/C is true. - // Expressed as (A & (B | C)) | (B & C) - // - // Equivalent to expression in SHA calculations: (A & B) ^ (A & C) ^ (B & C) - auto And = _And(OpSize::i32Bit, B, C); - auto Or = _Or(OpSize::i32Bit, B, C); - return _Or(OpSize::i32Bit, _And(OpSize::i32Bit, A, Or), And); +OrderedNode* OpDispatchBuilder::BitwiseAtLeastTwo(OrderedNode* A, OrderedNode* B, OrderedNode* C) { + // Returns whether at least 2/3 of A/B/C is true. + // Expressed as (A & (B | C)) | (B & C) + // + // Equivalent to expression in SHA calculations: (A & B) ^ (A & C) ^ (B & C) + auto And = _And(OpSize::i32Bit, B, C); + auto Or = _Or(OpSize::i32Bit, B, C); + return _Or(OpSize::i32Bit, _And(OpSize::i32Bit, A, Or), And); } void OpDispatchBuilder::SHA256RNDS2Op(OpcodeArgs) { - const auto Ch = [this](OrderedNode *E, OrderedNode *F, OrderedNode *G) -> OrderedNode* { + const auto Ch = [this](OrderedNode* E, OrderedNode* F, OrderedNode* G) -> OrderedNode* { return _Xor(OpSize::i32Bit, _And(OpSize::i32Bit, E, F), _Andn(OpSize::i32Bit, G, E)); }; - const auto Sigma0 = [this](OrderedNode *A) -> OrderedNode* { - return _XorShift(OpSize::i32Bit, _XorShift(OpSize::i32Bit, _Ror(OpSize::i32Bit, A, _Constant(32, 2)), A, ShiftType::ROR, 13), A, ShiftType::ROR, 22); + const auto Sigma0 = [this](OrderedNode* A) -> OrderedNode* { + return _XorShift(OpSize::i32Bit, _XorShift(OpSize::i32Bit, _Ror(OpSize::i32Bit, A, _Constant(32, 2)), A, ShiftType::ROR, 13), A, + ShiftType::ROR, 22); }; - const auto Sigma1 = [this](OrderedNode *E) -> OrderedNode* { - return _XorShift(OpSize::i32Bit, _XorShift(OpSize::i32Bit, _Ror(OpSize::i32Bit, E, _Constant(32, 6)), E, ShiftType::ROR, 11), E, ShiftType::ROR, 25); + const auto Sigma1 = [this](OrderedNode* E) -> OrderedNode* { + return _XorShift(OpSize::i32Bit, _XorShift(OpSize::i32Bit, _Ror(OpSize::i32Bit, E, _Constant(32, 6)), E, ShiftType::ROR, 11), E, + ShiftType::ROR, 25); }; - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); // Hardcoded to XMM0 auto XMM0 = LoadXMMRegister(0); auto E0 = _VExtractToGPR(16, 4, Src, 1); auto F0 = _VExtractToGPR(16, 4, Src, 0); auto G0 = _VExtractToGPR(16, 4, Dest, 1); - OrderedNode *Q0 = _Add(OpSize::i32Bit, Ch(E0, F0, G0), Sigma1(E0)); + OrderedNode* Q0 = _Add(OpSize::i32Bit, Ch(E0, F0, G0), Sigma1(E0)); auto WK0 = _VExtractToGPR(16, 4, XMM0, 0); Q0 = _Add(OpSize::i32Bit, Q0, WK0); @@ -275,7 +281,7 @@ void OpDispatchBuilder::SHA256RNDS2Op(OpcodeArgs) { auto D0 = _VExtractToGPR(16, 4, Dest, 2); auto E1 = _Add(OpSize::i32Bit, Q0, D0); - OrderedNode * Q1 = _Add(OpSize::i32Bit, Ch(E1, E0, F0), Sigma1(E1)); + OrderedNode* Q1 = _Add(OpSize::i32Bit, Ch(E1, E0, F0), Sigma1(E1)); auto WK1 = _VExtractToGPR(16, 4, XMM0, 1); Q1 = _Add(OpSize::i32Bit, Q1, WK1); @@ -299,16 +305,16 @@ void OpDispatchBuilder::SHA256RNDS2Op(OpcodeArgs) { } void OpDispatchBuilder::AESImcOp(OpcodeArgs) { - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result = _VAESImc(Src); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Result = _VAESImc(Src); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::AESEncOp(OpcodeArgs) { - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); const auto ZeroRegister = LoadAndCacheNamedVectorConstant(16, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); - OrderedNode *Result = _VAESEnc(16, Dest, Src, ZeroRegister); + OrderedNode* Result = _VAESEnc(16, Dest, Src, ZeroRegister); StoreResult(FPRClass, Op, Result, -1); } @@ -319,19 +325,19 @@ void OpDispatchBuilder::VAESEncOp(OpcodeArgs) { // TODO: Handle 256-bit VAESENC. LOGMAN_THROW_A_FMT(Is128Bit, "256-bit VAESENC unimplemented"); - OrderedNode *State = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Key = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* State = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Key = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); const auto ZeroRegister = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); - OrderedNode *Result = _VAESEnc(DstSize, State, Key, ZeroRegister); + OrderedNode* Result = _VAESEnc(DstSize, State, Key, ZeroRegister); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::AESEncLastOp(OpcodeArgs) { - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); const auto ZeroRegister = LoadAndCacheNamedVectorConstant(16, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); - OrderedNode *Result = _VAESEncLast(16, Dest, Src, ZeroRegister); + OrderedNode* Result = _VAESEncLast(16, Dest, Src, ZeroRegister); StoreResult(FPRClass, Op, Result, -1); } @@ -342,19 +348,19 @@ void OpDispatchBuilder::VAESEncLastOp(OpcodeArgs) { // TODO: Handle 256-bit VAESENCLAST. LOGMAN_THROW_A_FMT(Is128Bit, "256-bit VAESENCLAST unimplemented"); - OrderedNode *State = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Key = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* State = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Key = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); const auto ZeroRegister = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); - OrderedNode *Result = _VAESEncLast(DstSize, State, Key, ZeroRegister); + OrderedNode* Result = _VAESEncLast(DstSize, State, Key, ZeroRegister); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::AESDecOp(OpcodeArgs) { - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); const auto ZeroRegister = LoadAndCacheNamedVectorConstant(16, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); - OrderedNode *Result = _VAESDec(16, Dest, Src, ZeroRegister); + OrderedNode* Result = _VAESDec(16, Dest, Src, ZeroRegister); StoreResult(FPRClass, Op, Result, -1); } @@ -365,19 +371,19 @@ void OpDispatchBuilder::VAESDecOp(OpcodeArgs) { // TODO: Handle 256-bit VAESDEC. LOGMAN_THROW_A_FMT(Is128Bit, "256-bit VAESDEC unimplemented"); - OrderedNode *State = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Key = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* State = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Key = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); const auto ZeroRegister = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); - OrderedNode *Result = _VAESDec(DstSize, State, Key, ZeroRegister); + OrderedNode* Result = _VAESDec(DstSize, State, Key, ZeroRegister); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::AESDecLastOp(OpcodeArgs) { - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); const auto ZeroRegister = LoadAndCacheNamedVectorConstant(16, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); - OrderedNode *Result = _VAESDecLast(16, Dest, Src, ZeroRegister); + OrderedNode* Result = _VAESDecLast(16, Dest, Src, ZeroRegister); StoreResult(FPRClass, Op, Result, -1); } @@ -388,16 +394,16 @@ void OpDispatchBuilder::VAESDecLastOp(OpcodeArgs) { // TODO: Handle 256-bit VAESDECLAST. LOGMAN_THROW_A_FMT(Is128Bit, "256-bit VAESDECLAST unimplemented"); - OrderedNode *State = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Key = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* State = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Key = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); const auto ZeroRegister = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); - OrderedNode *Result = _VAESDecLast(DstSize, State, Key, ZeroRegister); + OrderedNode* Result = _VAESDecLast(DstSize, State, Key, ZeroRegister); StoreResult(FPRClass, Op, Result, -1); } OrderedNode* OpDispatchBuilder::AESKeyGenAssistImpl(OpcodeArgs) { - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); const uint64_t RCON = Op->Src[1].Data.Literal.Value; @@ -407,15 +413,15 @@ OrderedNode* OpDispatchBuilder::AESKeyGenAssistImpl(OpcodeArgs) { } void OpDispatchBuilder::AESKeyGenAssist(OpcodeArgs) { - OrderedNode *Result = AESKeyGenAssistImpl(Op); + OrderedNode* Result = AESKeyGenAssistImpl(Op); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::PCLMULQDQOp(OpcodeArgs) { LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Selector needs to be literal here"); - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); const auto Selector = static_cast(Op->Src[1].Data.Literal.Value); auto Res = _PCLMUL(16, Dest, Src, Selector); @@ -427,12 +433,12 @@ void OpDispatchBuilder::VPCLMULQDQOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); const auto Selector = static_cast(Op->Src[2].Data.Literal.Value); - OrderedNode *Res = _PCLMUL(DstSize, Src1, Src2, Selector); + OrderedNode* Res = _PCLMUL(DstSize, Src1, Src2, Selector); StoreResult(FPRClass, Op, Res, -1); } -} +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp index 4c45e8b13b..48004ce29f 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp @@ -20,23 +20,12 @@ desc: Handles x86/64 flag generation namespace FEXCore::IR { constexpr std::array FlagOffsets = { - FEXCore::X86State::RFLAG_CF_RAW_LOC, - FEXCore::X86State::RFLAG_PF_RAW_LOC, - FEXCore::X86State::RFLAG_AF_RAW_LOC, - FEXCore::X86State::RFLAG_ZF_RAW_LOC, - FEXCore::X86State::RFLAG_SF_RAW_LOC, - FEXCore::X86State::RFLAG_TF_LOC, - FEXCore::X86State::RFLAG_IF_LOC, - FEXCore::X86State::RFLAG_DF_LOC, - FEXCore::X86State::RFLAG_OF_RAW_LOC, - FEXCore::X86State::RFLAG_IOPL_LOC, - FEXCore::X86State::RFLAG_NT_LOC, - FEXCore::X86State::RFLAG_RF_LOC, - FEXCore::X86State::RFLAG_VM_LOC, - FEXCore::X86State::RFLAG_AC_LOC, - FEXCore::X86State::RFLAG_VIF_LOC, - FEXCore::X86State::RFLAG_VIP_LOC, - FEXCore::X86State::RFLAG_ID_LOC, + FEXCore::X86State::RFLAG_CF_RAW_LOC, FEXCore::X86State::RFLAG_PF_RAW_LOC, FEXCore::X86State::RFLAG_AF_RAW_LOC, + FEXCore::X86State::RFLAG_ZF_RAW_LOC, FEXCore::X86State::RFLAG_SF_RAW_LOC, FEXCore::X86State::RFLAG_TF_LOC, + FEXCore::X86State::RFLAG_IF_LOC, FEXCore::X86State::RFLAG_DF_LOC, FEXCore::X86State::RFLAG_OF_RAW_LOC, + FEXCore::X86State::RFLAG_IOPL_LOC, FEXCore::X86State::RFLAG_NT_LOC, FEXCore::X86State::RFLAG_RF_LOC, + FEXCore::X86State::RFLAG_VM_LOC, FEXCore::X86State::RFLAG_AC_LOC, FEXCore::X86State::RFLAG_VIF_LOC, + FEXCore::X86State::RFLAG_VIP_LOC, FEXCore::X86State::RFLAG_ID_LOC, }; void OpDispatchBuilder::ZeroMultipleFlags(uint32_t FlagsMask) { @@ -48,8 +37,7 @@ void OpDispatchBuilder::ZeroMultipleFlags(uint32_t FlagsMask) { auto NZCVFlagsMask = FlagsMask & FullNZCVMask; if (NZCVFlagsMask == FullNZCVMask) { ZeroNZCV(); - } - else { + } else { const auto IndexMask = NZCVIndexMask(FlagsMask); if (std::popcount(NZCVFlagsMask) == 1) { @@ -64,8 +52,7 @@ void OpDispatchBuilder::ZeroMultipleFlags(uint32_t FlagsMask) { SetRFLAG(ZeroConst, FlagOffset); NZCVFlagsMask &= ~(FlagMask); } - } - else { + } else { auto IndexMaskConstant = _Constant(IndexMask); auto NewNZCV = _Andn(OpSize::i64Bit, GetNZCV(), IndexMaskConstant); SetNZCV(NewNZCV); @@ -96,7 +83,7 @@ void OpDispatchBuilder::ZeroMultipleFlags(uint32_t FlagsMask) { } } -void OpDispatchBuilder::SetPackedRFLAG(bool Lower8, OrderedNode *Src) { +void OpDispatchBuilder::SetPackedRFLAG(bool Lower8, OrderedNode* Src) { size_t NumFlags = FlagOffsets.size(); if (Lower8) { // Calculate flags early. @@ -104,8 +91,7 @@ void OpDispatchBuilder::SetPackedRFLAG(bool Lower8, OrderedNode *Src) { // This is only a partial overwrite of flags since OF isn't stored here. CalculateDeferredFlags(); NumFlags = 5; - } - else { + } else { // We are overwriting all RFLAGS. Invalidate the deferred flag state. InvalidateDeferredFlags(); } @@ -125,7 +111,7 @@ void OpDispatchBuilder::SetPackedRFLAG(bool Lower8, OrderedNode *Src) { SetRFLAG(Src, FEXCore::X86State::RFLAG_AF_RAW_LOC); } else if (FlagOffset == FEXCore::X86State::RFLAG_PF_RAW_LOC) { // PF is stored parity flipped - OrderedNode *Tmp = _Bfe(OpSize::i32Bit, 1, FlagOffset, Src); + OrderedNode* Tmp = _Bfe(OpSize::i32Bit, 1, FlagOffset, Src); Tmp = _Xor(OpSize::i32Bit, Tmp, _Constant(1)); SetRFLAG(Tmp, FlagOffset); } else { @@ -134,15 +120,14 @@ void OpDispatchBuilder::SetPackedRFLAG(bool Lower8, OrderedNode *Src) { } } -OrderedNode *OpDispatchBuilder::GetPackedRFLAG(uint32_t FlagsMask) { +OrderedNode* OpDispatchBuilder::GetPackedRFLAG(uint32_t FlagsMask) { // Calculate flags early. CalculateDeferredFlags(); - OrderedNode *Original = _Constant(0); + OrderedNode* Original = _Constant(0); // SF/ZF and N/Z are together on both arm64 and x86_64, so we special case that. - bool GetNZ = (FlagsMask & (1 << FEXCore::X86State::RFLAG_SF_RAW_LOC)) && - (FlagsMask & (1 << FEXCore::X86State::RFLAG_ZF_RAW_LOC)); + bool GetNZ = (FlagsMask & (1 << FEXCore::X86State::RFLAG_SF_RAW_LOC)) && (FlagsMask & (1 << FEXCore::X86State::RFLAG_ZF_RAW_LOC)); // Handle CF first, since it's at bit 0 and hence doesn't need shift or OR. if (FlagsMask & (1 << FEXCore::X86State::RFLAG_CF_RAW_LOC)) { @@ -156,21 +141,20 @@ OrderedNode *OpDispatchBuilder::GetPackedRFLAG(uint32_t FlagsMask) { continue; } - if ((GetNZ && (FlagOffset == FEXCore::X86State::RFLAG_SF_RAW_LOC || - FlagOffset == FEXCore::X86State::RFLAG_ZF_RAW_LOC)) || - FlagOffset == FEXCore::X86State::RFLAG_CF_RAW_LOC || - FlagOffset == FEXCore::X86State::RFLAG_PF_RAW_LOC) { + if ((GetNZ && (FlagOffset == FEXCore::X86State::RFLAG_SF_RAW_LOC || FlagOffset == FEXCore::X86State::RFLAG_ZF_RAW_LOC)) || + FlagOffset == FEXCore::X86State::RFLAG_CF_RAW_LOC || FlagOffset == FEXCore::X86State::RFLAG_PF_RAW_LOC) { // Already handled continue; } // Note that the Bfi only considers the bottom bit of the flag, the rest of // the byte is allowed to be garbage. - OrderedNode *Flag; - if (FlagOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) + OrderedNode* Flag; + if (FlagOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) { Flag = LoadAF(); - else + } else { Flag = GetRFLAG(FlagOffset); + } Original = _Orlshl(OpSize::i64Bit, Original, Flag, FlagOffset); } @@ -198,16 +182,17 @@ OrderedNode *OpDispatchBuilder::GetPackedRFLAG(uint32_t FlagsMask) { } // The constant is OR'ed in at the end, to avoid a pointless or xzr, #2. - if ((1U << X86State::RFLAG_RESERVED_LOC) & FlagsMask) + if ((1U << X86State::RFLAG_RESERVED_LOC) & FlagsMask) { Original = _Or(OpSize::i64Bit, Original, _Constant(2)); + } return Original; } -void OpDispatchBuilder::CalculateOF(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, bool Sub) { +void OpDispatchBuilder::CalculateOF(uint8_t SrcSize, OrderedNode* Res, OrderedNode* Src1, OrderedNode* Src2, bool Sub) { auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; uint64_t SignBit = (SrcSize * 8) - 1; - OrderedNode *Anded = nullptr; + OrderedNode* Anded = nullptr; // For add, OF is set iff the sources have the same sign but the destination // sign differs. If we know a source sign, we can simplify the expression: if @@ -221,24 +206,26 @@ void OpDispatchBuilder::CalculateOF(uint8_t SrcSize, OrderedNode *Res, OrderedNo if (IsValueConstant(WrapNode(Src2), &Const)) { bool Negative = (Const & (1ull << SignBit)) != 0; - if (Negative ^ Sub) + if (Negative ^ Sub) { Anded = _Andn(OpSize, Src1, Res); - else + } else { Anded = _Andn(OpSize, Res, Src1); + } } else { auto XorOp1 = _Xor(OpSize, Src1, Src2); auto XorOp2 = _Xor(OpSize, Res, Src1); - if (Sub) + if (Sub) { Anded = _And(OpSize, XorOp2, XorOp1); - else + } else { Anded = _Andn(OpSize, XorOp2, XorOp1); + } } SetRFLAG(Anded, SrcSize * 8 - 1, true); } -OrderedNode *OpDispatchBuilder::LoadPFRaw() { +OrderedNode* OpDispatchBuilder::LoadPFRaw() { // Read the stored byte. This is the original result (up to 64-bits), it needs // parity calculated. auto Result = GetRFLAG(FEXCore::X86State::RFLAG_PF_RAW_LOC); @@ -252,7 +239,7 @@ OrderedNode *OpDispatchBuilder::LoadPFRaw() { return _VExtractToGPR(8, 1, Count, 0); } -OrderedNode *OpDispatchBuilder::LoadAF() { +OrderedNode* OpDispatchBuilder::LoadAF() { // Read the stored value. This is the XOR of the arguments. auto AFWord = GetRFLAG(FEXCore::X86State::RFLAG_AF_RAW_LOC); @@ -273,16 +260,16 @@ void OpDispatchBuilder::FixupAF() { auto PFRaw = GetRFLAG(FEXCore::X86State::RFLAG_PF_RAW_LOC); auto AFRaw = GetRFLAG(FEXCore::X86State::RFLAG_AF_RAW_LOC); - OrderedNode *XorRes = _Xor(OpSize::i32Bit, AFRaw, PFRaw); + OrderedNode* XorRes = _Xor(OpSize::i32Bit, AFRaw, PFRaw); SetRFLAG(XorRes); } -void OpDispatchBuilder::CalculatePF(OrderedNode *Res) { +void OpDispatchBuilder::CalculatePF(OrderedNode* Res) { // Calculation is entirely deferred until load, just store the 8-bit result. SetRFLAG(Res); } -void OpDispatchBuilder::CalculateAF(OpSize OpSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { +void OpDispatchBuilder::CalculateAF(OpSize OpSize, OrderedNode* Res, OrderedNode* Src1, OrderedNode* Src2) { // We only care about bit 4 in the subsequent XOR. If we'll XOR with 0, // there's no sense XOR'ing at all. This affects INC. uint64_t Const; @@ -294,15 +281,16 @@ void OpDispatchBuilder::CalculateAF(OpSize OpSize, OrderedNode *Res, OrderedNode // We store the XOR of the arguments. At read time, we XOR with the // appropriate bit of the result (available as the PF flag) and extract the // appropriate bit. - OrderedNode *XorRes = _Xor(OpSize, Src1, Src2); + OrderedNode* XorRes = _Xor(OpSize, Src1, Src2); SetRFLAG(XorRes); } void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) { if (CurrentDeferredFlags.Type == FlagsGenerationType::TYPE_NONE) { // Nothing to do - if (NZCVDirty && CachedNZCV) + if (NZCVDirty && CachedNZCV) { _StoreNZCV(CachedNZCV); + } CachedNZCV = nullptr; NZCVDirty = false; @@ -310,183 +298,104 @@ void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) { } switch (CurrentDeferredFlags.Type) { - case FlagsGenerationType::TYPE_ADC: - CalculateFlags_ADC( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.ThreeSource.Src1, - CurrentDeferredFlags.Sources.ThreeSource.Src2, - CurrentDeferredFlags.Sources.ThreeSource.Src3); - break; - case FlagsGenerationType::TYPE_SBB: - CalculateFlags_SBB( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.ThreeSource.Src1, - CurrentDeferredFlags.Sources.ThreeSource.Src2, - CurrentDeferredFlags.Sources.ThreeSource.Src3); - break; - case FlagsGenerationType::TYPE_SUB: - CalculateFlags_SUB( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.TwoSrcImmediate.Src1, - CurrentDeferredFlags.Sources.TwoSrcImmediate.Src2, - CurrentDeferredFlags.Sources.TwoSrcImmediate.UpdateCF); - break; - case FlagsGenerationType::TYPE_ADD: - CalculateFlags_ADD( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.TwoSrcImmediate.Src1, - CurrentDeferredFlags.Sources.TwoSrcImmediate.Src2, - CurrentDeferredFlags.Sources.TwoSrcImmediate.UpdateCF); - break; - case FlagsGenerationType::TYPE_MUL: - CalculateFlags_MUL( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.OneSource.Src1); - break; - case FlagsGenerationType::TYPE_UMUL: - CalculateFlags_UMUL(CurrentDeferredFlags.Res); - break; - case FlagsGenerationType::TYPE_LOGICAL: - CalculateFlags_Logical( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.TwoSource.Src1, - CurrentDeferredFlags.Sources.TwoSource.Src2); - break; - case FlagsGenerationType::TYPE_LSHL: - CalculateFlags_ShiftLeft( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.TwoSource.Src1, - CurrentDeferredFlags.Sources.TwoSource.Src2); - break; - case FlagsGenerationType::TYPE_LSHLI: - CalculateFlags_ShiftLeftImmediate( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, - CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); - break; - case FlagsGenerationType::TYPE_LSHR: - CalculateFlags_ShiftRight( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.TwoSource.Src1, - CurrentDeferredFlags.Sources.TwoSource.Src2); - break; - case FlagsGenerationType::TYPE_LSHRI: - CalculateFlags_ShiftRightImmediate( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, - CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); - break; - case FlagsGenerationType::TYPE_LSHRDI: - CalculateFlags_ShiftRightDoubleImmediate( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, - CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); - break; - case FlagsGenerationType::TYPE_ASHR: - CalculateFlags_SignShiftRight( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.TwoSource.Src1, - CurrentDeferredFlags.Sources.TwoSource.Src2); - break; - case FlagsGenerationType::TYPE_ASHRI: - CalculateFlags_SignShiftRightImmediate( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, - CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); - break; - case FlagsGenerationType::TYPE_ROR: - CalculateFlags_RotateRight( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.TwoSource.Src1, - CurrentDeferredFlags.Sources.TwoSource.Src2); - break; - case FlagsGenerationType::TYPE_RORI: - CalculateFlags_RotateRightImmediate( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, - CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); - break; - case FlagsGenerationType::TYPE_ROL: - CalculateFlags_RotateLeft( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.TwoSource.Src1, - CurrentDeferredFlags.Sources.TwoSource.Src2); - break; - case FlagsGenerationType::TYPE_ROLI: - CalculateFlags_RotateLeftImmediate( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, - CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); - break; - case FlagsGenerationType::TYPE_BEXTR: - CalculateFlags_BEXTR(CurrentDeferredFlags.Res); - break; - case FlagsGenerationType::TYPE_BLSI: - CalculateFlags_BLSI( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res); - break; - case FlagsGenerationType::TYPE_BLSMSK: - CalculateFlags_BLSMSK( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.OneSource.Src1); - break; - case FlagsGenerationType::TYPE_BLSR: - CalculateFlags_BLSR( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.OneSource.Src1); - break; - case FlagsGenerationType::TYPE_POPCOUNT: - CalculateFlags_POPCOUNT(CurrentDeferredFlags.Res); - break; - case FlagsGenerationType::TYPE_BZHI: - CalculateFlags_BZHI( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.OneSource.Src1); - break; - case FlagsGenerationType::TYPE_ZCNT: - CalculateFlags_ZCNT( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res); - break; - case FlagsGenerationType::TYPE_RDRAND: - CalculateFlags_RDRAND(CurrentDeferredFlags.Res); - break; - case FlagsGenerationType::TYPE_NONE: - default: ERROR_AND_DIE_FMT("Unhandled flags type {}", CurrentDeferredFlags.Type); + case FlagsGenerationType::TYPE_ADC: + CalculateFlags_ADC(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, CurrentDeferredFlags.Sources.ThreeSource.Src1, + CurrentDeferredFlags.Sources.ThreeSource.Src2, CurrentDeferredFlags.Sources.ThreeSource.Src3); + break; + case FlagsGenerationType::TYPE_SBB: + CalculateFlags_SBB(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, CurrentDeferredFlags.Sources.ThreeSource.Src1, + CurrentDeferredFlags.Sources.ThreeSource.Src2, CurrentDeferredFlags.Sources.ThreeSource.Src3); + break; + case FlagsGenerationType::TYPE_SUB: + CalculateFlags_SUB(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, CurrentDeferredFlags.Sources.TwoSrcImmediate.Src1, + CurrentDeferredFlags.Sources.TwoSrcImmediate.Src2, CurrentDeferredFlags.Sources.TwoSrcImmediate.UpdateCF); + break; + case FlagsGenerationType::TYPE_ADD: + CalculateFlags_ADD(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, CurrentDeferredFlags.Sources.TwoSrcImmediate.Src1, + CurrentDeferredFlags.Sources.TwoSrcImmediate.Src2, CurrentDeferredFlags.Sources.TwoSrcImmediate.UpdateCF); + break; + case FlagsGenerationType::TYPE_MUL: + CalculateFlags_MUL(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, CurrentDeferredFlags.Sources.OneSource.Src1); + break; + case FlagsGenerationType::TYPE_UMUL: CalculateFlags_UMUL(CurrentDeferredFlags.Res); break; + case FlagsGenerationType::TYPE_LOGICAL: + CalculateFlags_Logical(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, CurrentDeferredFlags.Sources.TwoSource.Src1, + CurrentDeferredFlags.Sources.TwoSource.Src2); + break; + case FlagsGenerationType::TYPE_LSHL: + CalculateFlags_ShiftLeft(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, CurrentDeferredFlags.Sources.TwoSource.Src1, + CurrentDeferredFlags.Sources.TwoSource.Src2); + break; + case FlagsGenerationType::TYPE_LSHLI: + CalculateFlags_ShiftLeftImmediate(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, + CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); + break; + case FlagsGenerationType::TYPE_LSHR: + CalculateFlags_ShiftRight(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, CurrentDeferredFlags.Sources.TwoSource.Src1, + CurrentDeferredFlags.Sources.TwoSource.Src2); + break; + case FlagsGenerationType::TYPE_LSHRI: + CalculateFlags_ShiftRightImmediate(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, + CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); + break; + case FlagsGenerationType::TYPE_LSHRDI: + CalculateFlags_ShiftRightDoubleImmediate(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, + CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); + break; + case FlagsGenerationType::TYPE_ASHR: + CalculateFlags_SignShiftRight(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, CurrentDeferredFlags.Sources.TwoSource.Src1, + CurrentDeferredFlags.Sources.TwoSource.Src2); + break; + case FlagsGenerationType::TYPE_ASHRI: + CalculateFlags_SignShiftRightImmediate(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, + CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); + break; + case FlagsGenerationType::TYPE_ROR: + CalculateFlags_RotateRight(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, CurrentDeferredFlags.Sources.TwoSource.Src1, + CurrentDeferredFlags.Sources.TwoSource.Src2); + break; + case FlagsGenerationType::TYPE_RORI: + CalculateFlags_RotateRightImmediate(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, + CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); + break; + case FlagsGenerationType::TYPE_ROL: + CalculateFlags_RotateLeft(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, CurrentDeferredFlags.Sources.TwoSource.Src1, + CurrentDeferredFlags.Sources.TwoSource.Src2); + break; + case FlagsGenerationType::TYPE_ROLI: + CalculateFlags_RotateLeftImmediate(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, + CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); + break; + case FlagsGenerationType::TYPE_BEXTR: CalculateFlags_BEXTR(CurrentDeferredFlags.Res); break; + case FlagsGenerationType::TYPE_BLSI: CalculateFlags_BLSI(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res); break; + case FlagsGenerationType::TYPE_BLSMSK: + CalculateFlags_BLSMSK(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, CurrentDeferredFlags.Sources.OneSource.Src1); + break; + case FlagsGenerationType::TYPE_BLSR: + CalculateFlags_BLSR(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, CurrentDeferredFlags.Sources.OneSource.Src1); + break; + case FlagsGenerationType::TYPE_POPCOUNT: CalculateFlags_POPCOUNT(CurrentDeferredFlags.Res); break; + case FlagsGenerationType::TYPE_BZHI: + CalculateFlags_BZHI(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res, CurrentDeferredFlags.Sources.OneSource.Src1); + break; + case FlagsGenerationType::TYPE_ZCNT: CalculateFlags_ZCNT(CurrentDeferredFlags.SrcSize, CurrentDeferredFlags.Res); break; + case FlagsGenerationType::TYPE_RDRAND: CalculateFlags_RDRAND(CurrentDeferredFlags.Res); break; + case FlagsGenerationType::TYPE_NONE: + default: ERROR_AND_DIE_FMT("Unhandled flags type {}", CurrentDeferredFlags.Type); } // Done calculating CurrentDeferredFlags.Type = FlagsGenerationType::TYPE_NONE; - if (NZCVDirty && CachedNZCV) + if (NZCVDirty && CachedNZCV) { _StoreNZCV(CachedNZCV); + } CachedNZCV = nullptr; NZCVDirty = false; } -void OpDispatchBuilder::CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF) { +void OpDispatchBuilder::CalculateFlags_ADC(uint8_t SrcSize, OrderedNode* Res, OrderedNode* Src1, OrderedNode* Src2, OrderedNode* CF) { auto Zero = _Constant(0); auto One = _Constant(1); auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; @@ -506,7 +415,7 @@ void OpDispatchBuilder::CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Res, Or { auto SelectOpLT = _Select(FEXCore::IR::COND_ULT, Res, Src2, One, Zero); auto SelectOpLE = _Select(FEXCore::IR::COND_ULE, Res, Src2, One, Zero); - auto SelectCF = _Select(FEXCore::IR::COND_EQ, CF, One, SelectOpLE, SelectOpLT); + auto SelectCF = _Select(FEXCore::IR::COND_EQ, CF, One, SelectOpLE, SelectOpLT); SetRFLAG(SelectCF); } @@ -515,7 +424,7 @@ void OpDispatchBuilder::CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Res, Or } } -void OpDispatchBuilder::CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF) { +void OpDispatchBuilder::CalculateFlags_SBB(uint8_t SrcSize, OrderedNode* Res, OrderedNode* Src1, OrderedNode* Src2, OrderedNode* CF) { auto Zero = _Constant(0); auto One = _Constant(1); auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; @@ -541,7 +450,7 @@ void OpDispatchBuilder::CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Res, Or { auto SelectOpLT = _Select(FEXCore::IR::COND_UGT, Res, Src1, One, Zero); auto SelectOpLE = _Select(FEXCore::IR::COND_UGE, Res, Src1, One, Zero); - auto SelectCF = _Select(FEXCore::IR::COND_EQ, CF, One, SelectOpLE, SelectOpLT); + auto SelectCF = _Select(FEXCore::IR::COND_EQ, CF, One, SelectOpLE, SelectOpLT); SetRFLAG(SelectCF); } @@ -550,7 +459,7 @@ void OpDispatchBuilder::CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Res, Or } } -void OpDispatchBuilder::CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, bool UpdateCF) { +void OpDispatchBuilder::CalculateFlags_SUB(uint8_t SrcSize, OrderedNode* Res, OrderedNode* Src1, OrderedNode* Src2, bool UpdateCF) { auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; CalculateAF(OpSize, Res, Src1, Src2); @@ -564,13 +473,14 @@ void OpDispatchBuilder::CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Res, Or // If we're updating CF, we need to invert it for correctness. If we're not // updating CF, we need to restore the CF since we stomped over it. - if (UpdateCF) + if (UpdateCF) { CarryInvert(); - else + } else { SetRFLAG(OldCF); + } } -void OpDispatchBuilder::CalculateFlags_ADD(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, bool UpdateCF) { +void OpDispatchBuilder::CalculateFlags_ADD(uint8_t SrcSize, OrderedNode* Res, OrderedNode* Src1, OrderedNode* Src2, bool UpdateCF) { auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; CalculateAF(OpSize, Res, Src1, Src2); @@ -583,11 +493,12 @@ void OpDispatchBuilder::CalculateFlags_ADD(uint8_t SrcSize, OrderedNode *Res, Or _AddNZCV(IR::SizeToOpSize(SrcSize), Src1, Src2); // We stomped over CF while calculation flags, restore it. - if (!UpdateCF) + if (!UpdateCF) { SetRFLAG(OldCF); + } } -void OpDispatchBuilder::CalculateFlags_MUL(uint8_t SrcSize, OrderedNode *Res, OrderedNode *High) { +void OpDispatchBuilder::CalculateFlags_MUL(uint8_t SrcSize, OrderedNode* Res, OrderedNode* High) { HandleNZCVWrite(); // PF/AF/ZF/SF @@ -607,11 +518,11 @@ void OpDispatchBuilder::CalculateFlags_MUL(uint8_t SrcSize, OrderedNode *Res, Or // If High = SignBit, then sets to nZcv. Else sets to nzCV. Since SF/ZF // undefined, this does what we need. auto Zero = _Constant(0); - _CondAddNZCV(OpSize::i64Bit, Zero, Zero, CondClassType{COND_EQ}, 0x3 /* nzCV */); + _CondAddNZCV(OpSize::i64Bit, Zero, Zero, CondClassType {COND_EQ}, 0x3 /* nzCV */); } } -void OpDispatchBuilder::CalculateFlags_UMUL(OrderedNode *High) { +void OpDispatchBuilder::CalculateFlags_UMUL(OrderedNode* High) { HandleNZCVWrite(); auto Zero = _Constant(0); @@ -632,11 +543,11 @@ void OpDispatchBuilder::CalculateFlags_UMUL(OrderedNode *High) { // If High = 0, then sets to nZcv. Else sets to nzCV. Since SF/ZF undefined, // this does what we need. - _CondAddNZCV(Size, Zero, Zero, CondClassType{COND_EQ}, 0x3 /* nzCV */); + _CondAddNZCV(Size, Zero, Zero, CondClassType {COND_EQ}, 0x3 /* nzCV */); } } -void OpDispatchBuilder::CalculateFlags_Logical(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { +void OpDispatchBuilder::CalculateFlags_Logical(uint8_t SrcSize, OrderedNode* Res, OrderedNode* Src1, OrderedNode* Src2) { // AF // Undefined _InvalidateFlags(1 << X86State::RFLAG_AF_RAW_LOC); @@ -647,8 +558,8 @@ void OpDispatchBuilder::CalculateFlags_Logical(uint8_t SrcSize, OrderedNode *Res SetNZ_ZeroCV(SrcSize, Res); } -void OpDispatchBuilder::CalculateFlags_ShiftLeft(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { - CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res, Src1, Src2](){ +void OpDispatchBuilder::CalculateFlags_ShiftLeft(uint8_t SrcSize, OrderedNode* Res, OrderedNode* Src1, OrderedNode* Src2) { + CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res, Src1, Src2]() { const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; SetNZ_ZeroCV(SrcSize, Res); @@ -671,8 +582,8 @@ void OpDispatchBuilder::CalculateFlags_ShiftLeft(uint8_t SrcSize, OrderedNode *R }); } -void OpDispatchBuilder::CalculateFlags_ShiftRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { - CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res, Src1, Src2](){ +void OpDispatchBuilder::CalculateFlags_ShiftRight(uint8_t SrcSize, OrderedNode* Res, OrderedNode* Src1, OrderedNode* Src2) { + CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res, Src1, Src2]() { const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; SetNZ_ZeroCV(SrcSize, Res); @@ -695,8 +606,8 @@ void OpDispatchBuilder::CalculateFlags_ShiftRight(uint8_t SrcSize, OrderedNode * }); } -void OpDispatchBuilder::CalculateFlags_SignShiftRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { - CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res, Src1, Src2](){ +void OpDispatchBuilder::CalculateFlags_SignShiftRight(uint8_t SrcSize, OrderedNode* Res, OrderedNode* Src1, OrderedNode* Src2) { + CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res, Src1, Src2]() { // SF/ZF/OF SetNZ_ZeroCV(SrcSize, Res); @@ -714,9 +625,11 @@ void OpDispatchBuilder::CalculateFlags_SignShiftRight(uint8_t SrcSize, OrderedNo }); } -void OpDispatchBuilder::CalculateFlags_ShiftLeftImmediate(uint8_t SrcSize, OrderedNode *UnmaskedRes, OrderedNode *Src1, uint64_t Shift) { +void OpDispatchBuilder::CalculateFlags_ShiftLeftImmediate(uint8_t SrcSize, OrderedNode* UnmaskedRes, OrderedNode* Src1, uint64_t Shift) { // No flags changed if shift is zero - if (Shift == 0) return; + if (Shift == 0) { + return; + } auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; @@ -748,16 +661,18 @@ void OpDispatchBuilder::CalculateFlags_ShiftLeftImmediate(uint8_t SrcSize, Order } } -void OpDispatchBuilder::CalculateFlags_SignShiftRightImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) { +void OpDispatchBuilder::CalculateFlags_SignShiftRightImmediate(uint8_t SrcSize, OrderedNode* Res, OrderedNode* Src1, uint64_t Shift) { // No flags changed if shift is zero - if (Shift == 0) return; + if (Shift == 0) { + return; + } SetNZ_ZeroCV(SrcSize, Res); // CF { // Extract the last bit shifted in to CF - SetRFLAG(Src1, Shift-1, true); + SetRFLAG(Src1, Shift - 1, true); } CalculatePF(Res); @@ -772,7 +687,7 @@ void OpDispatchBuilder::CalculateFlags_SignShiftRightImmediate(uint8_t SrcSize, // already zeroed there's nothing to do here. } -void OpDispatchBuilder::CalculateFlags_ShiftRightImmediateCommon(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) { +void OpDispatchBuilder::CalculateFlags_ShiftRightImmediateCommon(uint8_t SrcSize, OrderedNode* Res, OrderedNode* Src1, uint64_t Shift) { // Set SF and PF. Clobbers OF, but OF only defined for Shift = 1 where it is // set below. SetNZ_ZeroCV(SrcSize, Res); @@ -780,7 +695,7 @@ void OpDispatchBuilder::CalculateFlags_ShiftRightImmediateCommon(uint8_t SrcSize // CF { // Extract the last bit shifted in to CF - SetRFLAG(Src1, Shift-1, true); + SetRFLAG(Src1, Shift - 1, true); } CalculatePF(Res); @@ -790,9 +705,11 @@ void OpDispatchBuilder::CalculateFlags_ShiftRightImmediateCommon(uint8_t SrcSize _InvalidateFlags(1 << X86State::RFLAG_AF_RAW_LOC); } -void OpDispatchBuilder::CalculateFlags_ShiftRightImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) { +void OpDispatchBuilder::CalculateFlags_ShiftRightImmediate(uint8_t SrcSize, OrderedNode* Res, OrderedNode* Src1, uint64_t Shift) { // No flags changed if shift is zero - if (Shift == 0) return; + if (Shift == 0) { + return; + } CalculateFlags_ShiftRightImmediateCommon(SrcSize, Res, Src1, Shift); @@ -806,9 +723,11 @@ void OpDispatchBuilder::CalculateFlags_ShiftRightImmediate(uint8_t SrcSize, Orde } } -void OpDispatchBuilder::CalculateFlags_ShiftRightDoubleImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) { +void OpDispatchBuilder::CalculateFlags_ShiftRightDoubleImmediate(uint8_t SrcSize, OrderedNode* Res, OrderedNode* Src1, uint64_t Shift) { // No flags changed if shift is zero - if (Shift == 0) return; + if (Shift == 0) { + return; + } const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; CalculateFlags_ShiftRightImmediateCommon(SrcSize, Res, Src1, Shift); @@ -825,15 +744,16 @@ void OpDispatchBuilder::CalculateFlags_ShiftRightDoubleImmediate(uint8_t SrcSize } } -void OpDispatchBuilder::CalculateFlags_RotateRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { - CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res](){ +void OpDispatchBuilder::CalculateFlags_RotateRight(uint8_t SrcSize, OrderedNode* Res, OrderedNode* Src1, OrderedNode* Src2) { + CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res]() { auto SizeBits = SrcSize * 8; const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; // Ends up faster overall if we don't have FlagM, slower if we do... // If Shift != 1, OF is undefined so we choose to zero here. - if (!CTX->HostFeatures.SupportsFlagM) + if (!CTX->HostFeatures.SupportsFlagM) { ZeroCV(); + } // Extract the last bit shifted in to CF SetRFLAG(Res, SizeBits - 1, true); @@ -845,19 +765,20 @@ void OpDispatchBuilder::CalculateFlags_RotateRight(uint8_t SrcSize, OrderedNode }); } -void OpDispatchBuilder::CalculateFlags_RotateLeft(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { - CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res](){ +void OpDispatchBuilder::CalculateFlags_RotateLeft(uint8_t SrcSize, OrderedNode* Res, OrderedNode* Src1, OrderedNode* Src2) { + CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res]() { const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; auto SizeBits = SrcSize * 8; // Ends up faster overall if we don't have FlagM, slower if we do... // If Shift != 1, OF is undefined so we choose to zero here. - if (!CTX->HostFeatures.SupportsFlagM) + if (!CTX->HostFeatures.SupportsFlagM) { ZeroCV(); + } // Extract the last bit shifted in to CF - //auto Size = _Constant(GetSrcSize(Res) * 8); - //auto ShiftAmt = _Sub(OpSize::i64Bit, Size, Src2); + // auto Size = _Constant(GetSrcSize(Res) * 8); + // auto ShiftAmt = _Sub(OpSize::i64Bit, Size, Src2); SetRFLAG(Res, 0, true); // OF is the LSB and MSB XOR'd together. @@ -868,16 +789,19 @@ void OpDispatchBuilder::CalculateFlags_RotateLeft(uint8_t SrcSize, OrderedNode * }); } -void OpDispatchBuilder::CalculateFlags_RotateRightImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) { - if (Shift == 0) return; +void OpDispatchBuilder::CalculateFlags_RotateRightImmediate(uint8_t SrcSize, OrderedNode* Res, OrderedNode* Src1, uint64_t Shift) { + if (Shift == 0) { + return; + } const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; auto SizeBits = SrcSize * 8; // Ends up faster overall if we don't have FlagM, slower if we do... // If Shift != 1, OF is undefined so we choose to zero here. - if (!CTX->HostFeatures.SupportsFlagM) + if (!CTX->HostFeatures.SupportsFlagM) { ZeroCV(); + } // CF { @@ -896,16 +820,19 @@ void OpDispatchBuilder::CalculateFlags_RotateRightImmediate(uint8_t SrcSize, Ord } } -void OpDispatchBuilder::CalculateFlags_RotateLeftImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) { - if (Shift == 0) return; +void OpDispatchBuilder::CalculateFlags_RotateLeftImmediate(uint8_t SrcSize, OrderedNode* Res, OrderedNode* Src1, uint64_t Shift) { + if (Shift == 0) { + return; + } const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; auto SizeBits = SrcSize * 8; // Ends up faster overall if we don't have FlagM, slower if we do... // If Shift != 1, OF is undefined so we choose to zero here. - if (!CTX->HostFeatures.SupportsFlagM) + if (!CTX->HostFeatures.SupportsFlagM) { ZeroCV(); + } // CF { @@ -926,16 +853,15 @@ void OpDispatchBuilder::CalculateFlags_RotateLeftImmediate(uint8_t SrcSize, Orde } } -void OpDispatchBuilder::CalculateFlags_BEXTR(OrderedNode *Src) { +void OpDispatchBuilder::CalculateFlags_BEXTR(OrderedNode* Src) { // ZF is set properly. CF and OF are defined as being set to zero. SF, PF, and // AF are undefined. SetNZ_ZeroCV(GetOpSize(Src), Src); - _InvalidateFlags((1UL << X86State::RFLAG_PF_RAW_LOC) | - (1UL << X86State::RFLAG_AF_RAW_LOC)); + _InvalidateFlags((1UL << X86State::RFLAG_PF_RAW_LOC) | (1UL << X86State::RFLAG_AF_RAW_LOC)); } -void OpDispatchBuilder::CalculateFlags_BLSI(uint8_t SrcSize, OrderedNode *Result) { +void OpDispatchBuilder::CalculateFlags_BLSI(uint8_t SrcSize, OrderedNode* Result) { // CF is cleared if Src is zero, otherwise it's set. However, Src is zero iff // Result is zero, so we can test the result instead. So, CF is just the // inverted ZF. @@ -947,14 +873,12 @@ void OpDispatchBuilder::CalculateFlags_BLSI(uint8_t SrcSize, OrderedNode *Result SetRFLAG(CFOp); // PF/AF undefined - _InvalidateFlags((1UL << X86State::RFLAG_PF_RAW_LOC) | - (1UL << X86State::RFLAG_AF_RAW_LOC)); + _InvalidateFlags((1UL << X86State::RFLAG_PF_RAW_LOC) | (1UL << X86State::RFLAG_AF_RAW_LOC)); } -void OpDispatchBuilder::CalculateFlags_BLSMSK(uint8_t SrcSize, OrderedNode *Result, OrderedNode *Src) { +void OpDispatchBuilder::CalculateFlags_BLSMSK(uint8_t SrcSize, OrderedNode* Result, OrderedNode* Src) { // PF/AF undefined - _InvalidateFlags((1UL << X86State::RFLAG_PF_RAW_LOC) | - (1UL << X86State::RFLAG_AF_RAW_LOC)); + _InvalidateFlags((1UL << X86State::RFLAG_PF_RAW_LOC) | (1UL << X86State::RFLAG_AF_RAW_LOC)); // CF set according to the Src auto Zero = _Constant(0); @@ -967,7 +891,7 @@ void OpDispatchBuilder::CalculateFlags_BLSMSK(uint8_t SrcSize, OrderedNode *Resu SetRFLAG(CFOp); } -void OpDispatchBuilder::CalculateFlags_BLSR(uint8_t SrcSize, OrderedNode *Result, OrderedNode *Src) { +void OpDispatchBuilder::CalculateFlags_BLSR(uint8_t SrcSize, OrderedNode* Result, OrderedNode* Src) { auto Zero = _Constant(0); auto One = _Constant(1); auto CFOp = _Select(IR::COND_EQ, Src, Zero, One, Zero); @@ -976,30 +900,27 @@ void OpDispatchBuilder::CalculateFlags_BLSR(uint8_t SrcSize, OrderedNode *Result SetRFLAG(CFOp); // PF/AF undefined - _InvalidateFlags((1UL << X86State::RFLAG_PF_RAW_LOC) | - (1UL << X86State::RFLAG_AF_RAW_LOC)); + _InvalidateFlags((1UL << X86State::RFLAG_PF_RAW_LOC) | (1UL << X86State::RFLAG_AF_RAW_LOC)); } -void OpDispatchBuilder::CalculateFlags_POPCOUNT(OrderedNode *Result) { +void OpDispatchBuilder::CalculateFlags_POPCOUNT(OrderedNode* Result) { // We need to set ZF while clearing the rest of NZCV. The result of a popcount // is in the range [0, 63]. In particular, it is always positive. So a // combined NZ test will correctly zero SF/CF/OF while setting ZF. SetNZ_ZeroCV(OpSize::i32Bit, Result); - ZeroMultipleFlags((1U << X86State::RFLAG_AF_RAW_LOC) | - (1U << X86State::RFLAG_PF_RAW_LOC)); + ZeroMultipleFlags((1U << X86State::RFLAG_AF_RAW_LOC) | (1U << X86State::RFLAG_PF_RAW_LOC)); } -void OpDispatchBuilder::CalculateFlags_BZHI(uint8_t SrcSize, OrderedNode *Result, OrderedNode *Src) { +void OpDispatchBuilder::CalculateFlags_BZHI(uint8_t SrcSize, OrderedNode* Result, OrderedNode* Src) { // PF/AF undefined - _InvalidateFlags((1UL << X86State::RFLAG_PF_RAW_LOC) | - (1UL << X86State::RFLAG_AF_RAW_LOC)); + _InvalidateFlags((1UL << X86State::RFLAG_PF_RAW_LOC) | (1UL << X86State::RFLAG_AF_RAW_LOC)); SetNZ_ZeroCV(SrcSize, Result); SetRFLAG(Src); } -void OpDispatchBuilder::CalculateFlags_ZCNT(uint8_t SrcSize, OrderedNode *Result) { +void OpDispatchBuilder::CalculateFlags_ZCNT(uint8_t SrcSize, OrderedNode* Result) { // OF, SF, AF, PF all undefined // Test ZF of result, SF is undefined so this is ok. SetNZ_ZeroCV(SrcSize, Result); @@ -1011,18 +932,15 @@ void OpDispatchBuilder::CalculateFlags_ZCNT(uint8_t SrcSize, OrderedNode *Result SetRFLAG(Result, CarryBit); } -void OpDispatchBuilder::CalculateFlags_RDRAND(OrderedNode *Src) { +void OpDispatchBuilder::CalculateFlags_RDRAND(OrderedNode* Src) { // OF, SF, ZF, AF, PF all zero // CF is set to the incoming source - uint32_t FlagsMaskToZero = - FullNZCVMask | - (1U << X86State::RFLAG_AF_RAW_LOC) | - (1U << X86State::RFLAG_PF_RAW_LOC); + uint32_t FlagsMaskToZero = FullNZCVMask | (1U << X86State::RFLAG_AF_RAW_LOC) | (1U << X86State::RFLAG_PF_RAW_LOC); ZeroMultipleFlags(FlagsMaskToZero); SetRFLAG(Src); } -} +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 6532e4d9ab..aa8d8e8a41 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -25,28 +25,25 @@ namespace FEXCore::IR { #define OpcodeArgs [[maybe_unused]] FEXCore::X86Tables::DecodedOp Op void OpDispatchBuilder::MOVVectorAlignedOp(OpcodeArgs) { - if (Op->Dest.IsGPR() && Op->Src[0].IsGPR() && - Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) { + if (Op->Dest.IsGPR() && Op->Src[0].IsGPR() && Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) { // Nop return; } - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); StoreResult(FPRClass, Op, Src, -1); } void OpDispatchBuilder::MOVVectorUnalignedOp(OpcodeArgs) { - if (Op->Dest.IsGPR() && Op->Src[0].IsGPR() && - Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) { + if (Op->Dest.IsGPR() && Op->Src[0].IsGPR() && Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) { // Nop return; } - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1}); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1}); StoreResult(FPRClass, Op, Src, 1); } void OpDispatchBuilder::MOVVectorNTOp(OpcodeArgs) { - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, - {.Align = 1, .AccessType = MemoryAccessType::STREAM}); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1, .AccessType = MemoryAccessType::STREAM}); StoreResult(FPRClass, Op, Src, 1, MemoryAccessType::STREAM); } @@ -54,7 +51,7 @@ void OpDispatchBuilder::VMOVAPS_VMOVAPDOp(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); if (Is128Bit && Op->Dest.IsGPR()) { Src = _VMov(16, Src); @@ -66,7 +63,7 @@ void OpDispatchBuilder::VMOVUPS_VMOVUPDOp(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1}); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1}); if (Is128Bit && Op->Dest.IsGPR()) { Src = _VMov(16, Src); @@ -78,39 +75,37 @@ void OpDispatchBuilder::MOVHPDOp(OpcodeArgs) { if (Op->Dest.IsGPR()) { if (Op->Src[0].IsGPR()) { // MOVLHPS between two vector registers. - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, 16, Op->Flags); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, 16, Op->Flags); auto Result = _VInsElement(16, 8, 1, 0, Dest, Src); StoreResult(FPRClass, Op, Result, -1); - } - else { + } else { // If the destination is a GPR then the source is memory // xmm1[127:64] = src - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.LoadData = false}); - OrderedNode *Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, 16, Op->Flags); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.LoadData = false}); + OrderedNode* Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, 16, Op->Flags); auto Result = _VLoadVectorElement(16, 8, Dest, 1, Src); StoreResult(FPRClass, Op, Result, -1); } - } - else { + } else { // In this case memory is the destination and the high bits of the XMM are source // Mem64 = xmm1[127:64] - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, 8, Op->Flags, {.LoadData = false}); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, 8, Op->Flags, {.LoadData = false}); _VStoreVectorElement(16, 8, Src, 1, Dest); } } void OpDispatchBuilder::VMOVHPOp(OpcodeArgs) { if (Op->Dest.IsGPR()) { - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 16}); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, {.Align = 8}); - OrderedNode *Result = _VInsElement(16, 8, 1, 0, Src1, Src2); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 16}); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, {.Align = 8}); + OrderedNode* Result = _VInsElement(16, 8, 1, 0, Src1, Src2); StoreResult(FPRClass, Op, Result, -1); } else { - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 16}); - OrderedNode *Result = _VInsElement(16, 8, 0, 1, Src, Src); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 16}); + OrderedNode* Result = _VInsElement(16, 8, 0, 1, Src, Src); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, 8, 8); } } @@ -119,69 +114,65 @@ void OpDispatchBuilder::MOVLPOp(OpcodeArgs) { if (Op->Dest.IsGPR()) { // xmm, xmm is movhlps special case if (Op->Src[0].IsGPR()) { - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 16}); - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, {.Align = 16}); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 16}); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, {.Align = 16}); auto Result = _VInsElement(16, 8, 0, 1, Dest, Src); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, 16, 16); - } - else { + } else { auto DstSize = GetDstSize(Op); - OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.Align = 8, .LoadData = false}); - OrderedNode *Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, DstSize, Op->Flags); + OrderedNode* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.Align = 8, .LoadData = false}); + OrderedNode* Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, DstSize, Op->Flags); auto Result = _VLoadVectorElement(16, 8, Dest, 0, Src); StoreResult(FPRClass, Op, Result, -1); } - } - else { - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 8}); + } else { + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 8}); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, 8, 8); } } void OpDispatchBuilder::VMOVLPOp(OpcodeArgs) { if (Op->Dest.IsGPR()) { - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 16}); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, {.Align = 8}); - OrderedNode *Result = _VInsElement(16, 8, 0, 0, Src1, Src2); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 16}); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, {.Align = 8}); + OrderedNode* Result = _VInsElement(16, 8, 0, 0, Src1, Src2); StoreResult(FPRClass, Op, Result, -1); } else { - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 8}); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 8}); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, 8, 8); } } void OpDispatchBuilder::VMOVSHDUPOp(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result = _VTrn2(SrcSize, 4, Src, Src); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Result = _VTrn2(SrcSize, 4, Src, Src); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::VMOVSLDUPOp(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result = _VTrn(SrcSize, 4, Src, Src); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Result = _VTrn(SrcSize, 4, Src, Src); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::MOVScalarOpImpl(OpcodeArgs, size_t ElementSize) { if (Op->Dest.IsGPR() && Op->Src[0].IsGPR()) { // MOVSS/SD xmm1, xmm2 - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); auto Result = _VInsElement(16, ElementSize, 0, 0, Dest, Src); StoreResult(FPRClass, Op, Result, -1); - } - else if (Op->Dest.IsGPR()) { + } else if (Op->Dest.IsGPR()) { // MOVSS/SD xmm1, mem32/mem64 // xmm1[127:0] <- zext(mem32/mem64) - OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], ElementSize, Op->Flags); + OrderedNode* Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], ElementSize, Op->Flags); StoreResult(FPRClass, Op, Src, -1); - } - else { + } else { // MOVSS/SD mem32/mem64, xmm1 - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, ElementSize, -1); } } @@ -197,17 +188,17 @@ void OpDispatchBuilder::MOVSDOp(OpcodeArgs) { void OpDispatchBuilder::VMOVScalarOpImpl(OpcodeArgs, size_t ElementSize) { if (Op->Dest.IsGPR() && Op->Src[0].IsGPR() && Op->Src[1].IsGPR()) { // VMOVSS/SD xmm1, xmm2, xmm3 - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - OrderedNode *Result = _VInsElement(16, ElementSize, 0, 0, Src1, Src2); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Result = _VInsElement(16, ElementSize, 0, 0, Src1, Src2); StoreResult(FPRClass, Op, Result, -1); } else if (Op->Dest.IsGPR()) { // VMOVSS/SD xmm1, mem32/mem64 - OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], ElementSize, Op->Flags); + OrderedNode* Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], ElementSize, Op->Flags); StoreResult(FPRClass, Op, Src, -1); } else { // VMOVSS/SD mem32/mem64, xmm1 - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, ElementSize, -1); } } @@ -222,449 +213,286 @@ void OpDispatchBuilder::VMOVSSOp(OpcodeArgs) { void OpDispatchBuilder::VectorALUOpImpl(OpcodeArgs, IROps IROp, size_t ElementSize) { const auto Size = GetSrcSize(Op); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); DeriveOp(ALUOp, IROp, _VAdd(Size, ElementSize, Dest, Src)); StoreResult(FPRClass, Op, ALUOp, -1); } -template +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs) { VectorALUOpImpl(Op, IROp, ElementSize); } -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); - -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); - -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); - -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); + +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); + +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); + +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUOp(OpcodeArgs); void OpDispatchBuilder::AVXVectorALUOpImpl(OpcodeArgs, IROps IROp, size_t ElementSize) { const auto Size = GetSrcSize(Op); - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); DeriveOp(ALUOp, IROp, _VAdd(Size, ElementSize, Src1, Src2)); StoreResult(FPRClass, Op, ALUOp, -1); } -template +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs) { AVXVectorALUOpImpl(Op, IROp, ElementSize); } -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); - -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); - -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); - -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); - -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); - -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); - -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); - -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); + +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); + +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); + +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); + +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); + +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); + +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); + +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs); void OpDispatchBuilder::VectorALUROpImpl(OpcodeArgs, IROps IROp, size_t ElementSize) { const auto Size = GetSrcSize(Op); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); DeriveOp(ALUOp, IROp, _VAdd(Size, ElementSize, Src, Dest)); StoreResult(FPRClass, Op, ALUOp, -1); } -template +template void OpDispatchBuilder::VectorALUROp(OpcodeArgs) { VectorALUROpImpl(Op, IROp, ElementSize); } -template -void OpDispatchBuilder::VectorALUROp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUROp(OpcodeArgs); -template -void OpDispatchBuilder::VectorALUROp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUROp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUROp(OpcodeArgs); +template void OpDispatchBuilder::VectorALUROp(OpcodeArgs); -OrderedNode* OpDispatchBuilder::VectorScalarInsertALUOpImpl(OpcodeArgs, IROps IROp, - size_t DstSize, size_t ElementSize, - const X86Tables::DecodedOperand& Src1Op, - const X86Tables::DecodedOperand& Src2Op, - bool ZeroUpperBits) { +OrderedNode* OpDispatchBuilder::VectorScalarInsertALUOpImpl(OpcodeArgs, IROps IROp, size_t DstSize, size_t ElementSize, + const X86Tables::DecodedOperand& Src1Op, + const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits) { // We load the full vector width when dealing with a source vector, // so that we don't do any unnecessary zero extension to the scalar // element that we're going to operate on. const auto SrcSize = GetSrcSize(Op); - OrderedNode *Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags); - OrderedNode *Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, SrcSize, Op->Flags, - {.AllowUpperGarbage = true}); + OrderedNode* Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags); + OrderedNode* Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, SrcSize, Op->Flags, {.AllowUpperGarbage = true}); // If OpSize == ElementSize then it only does the lower scalar op - DeriveOp(ALUOp, IROp, - _VFAddScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, ZeroUpperBits)); + DeriveOp(ALUOp, IROp, _VFAddScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, ZeroUpperBits)); return ALUOp; } -template +template void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs) { const auto DstSize = GetGuestVectorLength(); auto Result = VectorScalarInsertALUOpImpl(Op, IROp, DstSize, ElementSize, Op->Dest, Op->Src[0], false); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); } -template -void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); - -template +template void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); + +template void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs) { const auto DstSize = GetGuestVectorLength(); auto Result = VectorScalarInsertALUOpImpl(Op, IROp, DstSize, ElementSize, Op->Src[0], Op->Src[1], true); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); } -template -void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); - -OrderedNode* OpDispatchBuilder::VectorScalarUnaryInsertALUOpImpl(OpcodeArgs, IROps IROp, - size_t DstSize, size_t ElementSize, - const X86Tables::DecodedOperand& Src1Op, - const X86Tables::DecodedOperand& Src2Op, - bool ZeroUpperBits) { +template void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); + +OrderedNode* OpDispatchBuilder::VectorScalarUnaryInsertALUOpImpl(OpcodeArgs, IROps IROp, size_t DstSize, size_t ElementSize, + const X86Tables::DecodedOperand& Src1Op, + const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits) { // We load the full vector width when dealing with a source vector, // so that we don't do any unnecessary zero extension to the scalar // element that we're going to operate on. const auto SrcSize = GetSrcSize(Op); - OrderedNode *Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags); - OrderedNode *Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, SrcSize, Op->Flags, - {.AllowUpperGarbage = true}); + OrderedNode* Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags); + OrderedNode* Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, SrcSize, Op->Flags, {.AllowUpperGarbage = true}); // If OpSize == ElementSize then it only does the lower scalar op DeriveOp(ALUOp, IROp, _VFSqrtScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, ZeroUpperBits)); return ALUOp; } -template +template void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs) { const auto DstSize = GetGuestVectorLength(); auto Result = VectorScalarInsertALUOpImpl(Op, IROp, DstSize, ElementSize, Op->Dest, Op->Src[0], false); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); } -template -void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs); -template +template void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp(OpcodeArgs) { const auto DstSize = GetGuestVectorLength(); auto Result = VectorScalarInsertALUOpImpl(Op, IROp, DstSize, ElementSize, Op->Src[0], Op->Src[1], true); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); } -template -void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp(OpcodeArgs); void OpDispatchBuilder::InsertMMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs) { // We load the full vector width when dealing with a source vector, @@ -673,8 +501,8 @@ void OpDispatchBuilder::InsertMMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs) { const auto DstSize = GetGuestVectorLength(); const auto SrcSize = Op->Src[0].IsGPR() ? 8 : GetSrcSize(Op); - OrderedNode *Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, DstSize, Op->Flags); - OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); + OrderedNode* Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, DstSize, Op->Flags); + OrderedNode* Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); // Always 32-bit. const size_t ElementSize = 4; @@ -684,24 +512,20 @@ void OpDispatchBuilder::InsertMMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs) { StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Dest, DstSize, -1); } -OrderedNode* OpDispatchBuilder::InsertCVTGPR_To_FPRImpl(OpcodeArgs, - size_t DstSize, size_t DstElementSize, - const X86Tables::DecodedOperand& Src1Op, - const X86Tables::DecodedOperand& Src2Op, - bool ZeroUpperBits) { +OrderedNode* OpDispatchBuilder::InsertCVTGPR_To_FPRImpl(OpcodeArgs, size_t DstSize, size_t DstElementSize, const X86Tables::DecodedOperand& Src1Op, + const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits) { // We load the full vector width when dealing with a source vector, // so that we don't do any unnecessary zero extension to the scalar // element that we're going to operate on. const auto SrcSize = GetSrcSize(Op); - OrderedNode *Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags); + OrderedNode* Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags); if (Src2Op.IsGPR()) { // If the source is a GPR then convert directly from the GPR. auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Src2Op, CTX->GetGPRSize(), Op->Flags); return _VSToFGPRInsert(IR::SizeToOpSize(DstSize), DstElementSize, SrcSize, Src1, Src2, ZeroUpperBits); - } - else if (SrcSize != DstElementSize) { + } else if (SrcSize != DstElementSize) { // If the source is from memory but the Source size and destination size aren't the same, // then it is more optimal to load in to a GPR and convert between GPR->FPR. // ARM GPR->FPR conversion supports different size source and destinations while FPR->FPR doesn't. @@ -723,36 +547,29 @@ void OpDispatchBuilder::InsertCVTGPR_To_FPR(OpcodeArgs) { StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); } -template -void OpDispatchBuilder::InsertCVTGPR_To_FPR<4>(OpcodeArgs); -template -void OpDispatchBuilder::InsertCVTGPR_To_FPR<8>(OpcodeArgs); +template void OpDispatchBuilder::InsertCVTGPR_To_FPR<4>(OpcodeArgs); +template void OpDispatchBuilder::InsertCVTGPR_To_FPR<8>(OpcodeArgs); -template +template void OpDispatchBuilder::AVXInsertCVTGPR_To_FPR(OpcodeArgs) { const auto DstSize = GetGuestVectorLength(); - OrderedNode *Result = InsertCVTGPR_To_FPRImpl(Op, DstSize, DstElementSize, Op->Src[0], Op->Src[1], true); + OrderedNode* Result = InsertCVTGPR_To_FPRImpl(Op, DstSize, DstElementSize, Op->Src[0], Op->Src[1], true); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); } -template -void OpDispatchBuilder::AVXInsertCVTGPR_To_FPR<4>(OpcodeArgs); -template -void OpDispatchBuilder::AVXInsertCVTGPR_To_FPR<8>(OpcodeArgs); +template void OpDispatchBuilder::AVXInsertCVTGPR_To_FPR<4>(OpcodeArgs); +template void OpDispatchBuilder::AVXInsertCVTGPR_To_FPR<8>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::InsertScalar_CVT_Float_To_FloatImpl(OpcodeArgs, - size_t DstSize, size_t DstElementSize, size_t SrcElementSize, - const X86Tables::DecodedOperand& Src1Op, - const X86Tables::DecodedOperand& Src2Op, - bool ZeroUpperBits) { +OrderedNode* OpDispatchBuilder::InsertScalar_CVT_Float_To_FloatImpl(OpcodeArgs, size_t DstSize, size_t DstElementSize, + size_t SrcElementSize, const X86Tables::DecodedOperand& Src1Op, + const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits) { // We load the full vector width when dealing with a source vector, // so that we don't do any unnecessary zero extension to the scalar // element that we're going to operate on. const auto SrcSize = GetSrcSize(Op); - OrderedNode *Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags); - OrderedNode *Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, SrcSize, Op->Flags, - {.AllowUpperGarbage = true}); + OrderedNode* Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags); + OrderedNode* Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, SrcSize, Op->Flags, {.AllowUpperGarbage = true}); return _VFToFScalarInsert(IR::SizeToOpSize(DstSize), DstElementSize, SrcElementSize, Src1, Src2, ZeroUpperBits); } @@ -760,40 +577,32 @@ OrderedNode* OpDispatchBuilder::InsertScalar_CVT_Float_To_FloatImpl(OpcodeArgs, template void OpDispatchBuilder::InsertScalar_CVT_Float_To_Float(OpcodeArgs) { const auto DstSize = GetGuestVectorLength(); - OrderedNode *Result = InsertScalar_CVT_Float_To_FloatImpl(Op, DstSize, DstElementSize, SrcElementSize, Op->Dest, Op->Src[0], false); + OrderedNode* Result = InsertScalar_CVT_Float_To_FloatImpl(Op, DstSize, DstElementSize, SrcElementSize, Op->Dest, Op->Src[0], false); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); } -template -void OpDispatchBuilder::InsertScalar_CVT_Float_To_Float<4, 8>(OpcodeArgs); -template -void OpDispatchBuilder::InsertScalar_CVT_Float_To_Float<8, 4>(OpcodeArgs); +template void OpDispatchBuilder::InsertScalar_CVT_Float_To_Float<4, 8>(OpcodeArgs); +template void OpDispatchBuilder::InsertScalar_CVT_Float_To_Float<8, 4>(OpcodeArgs); -template +template void OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float(OpcodeArgs) { const auto DstSize = GetGuestVectorLength(); - OrderedNode *Result = InsertScalar_CVT_Float_To_FloatImpl(Op, DstSize, DstElementSize, SrcElementSize, Op->Src[0], Op->Src[1], true); + OrderedNode* Result = InsertScalar_CVT_Float_To_FloatImpl(Op, DstSize, DstElementSize, SrcElementSize, Op->Src[0], Op->Src[1], true); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); } -template -void OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float<4, 8>(OpcodeArgs); -template -void OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float<8, 4>(OpcodeArgs); +template void OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float<4, 8>(OpcodeArgs); +template void OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float<8, 4>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::InsertScalarRoundImpl(OpcodeArgs, - size_t DstSize, size_t ElementSize, - const X86Tables::DecodedOperand& Src1Op, - const X86Tables::DecodedOperand& Src2Op, - uint64_t Mode, bool ZeroUpperBits) { +OrderedNode* OpDispatchBuilder::InsertScalarRoundImpl(OpcodeArgs, size_t DstSize, size_t ElementSize, const X86Tables::DecodedOperand& Src1Op, + const X86Tables::DecodedOperand& Src2Op, uint64_t Mode, bool ZeroUpperBits) { // We load the full vector width when dealing with a source vector, // so that we don't do any unnecessary zero extension to the scalar // element that we're going to operate on. const auto SrcSize = GetSrcSize(Op); - OrderedNode *Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags); - OrderedNode *Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, SrcSize, Op->Flags, - {.AllowUpperGarbage = true}); + OrderedNode* Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags); + OrderedNode* Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, SrcSize, Op->Flags, {.AllowUpperGarbage = true}); const uint64_t RoundControlSource = (Mode >> 2) & 1; uint64_t RoundControl = Mode & 0b11; @@ -818,14 +627,12 @@ void OpDispatchBuilder::InsertScalarRound(OpcodeArgs) { const uint64_t Mode = Op->Src[1].Data.Literal.Value; const auto DstSize = GetGuestVectorLength(); - OrderedNode *Result = InsertScalarRoundImpl(Op, DstSize, ElementSize, Op->Dest, Op->Src[0], Mode, false); + OrderedNode* Result = InsertScalarRoundImpl(Op, DstSize, ElementSize, Op->Dest, Op->Src[0], Mode, false); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); } -template -void OpDispatchBuilder::InsertScalarRound<4>(OpcodeArgs); -template -void OpDispatchBuilder::InsertScalarRound<8>(OpcodeArgs); +template void OpDispatchBuilder::InsertScalarRound<4>(OpcodeArgs); +template void OpDispatchBuilder::InsertScalarRound<8>(OpcodeArgs); template void OpDispatchBuilder::AVXInsertScalarRound(OpcodeArgs) { @@ -833,58 +640,74 @@ void OpDispatchBuilder::AVXInsertScalarRound(OpcodeArgs) { const uint64_t Mode = Op->Src[2].Data.Literal.Value; const auto DstSize = GetGuestVectorLength(); - OrderedNode *Result = InsertScalarRoundImpl(Op, DstSize, ElementSize, Op->Dest, Op->Src[0], Mode, true); + OrderedNode* Result = InsertScalarRoundImpl(Op, DstSize, ElementSize, Op->Dest, Op->Src[0], Mode, true); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); } -template -void OpDispatchBuilder::AVXInsertScalarRound<4>(OpcodeArgs); -template -void OpDispatchBuilder::AVXInsertScalarRound<8>(OpcodeArgs); +template void OpDispatchBuilder::AVXInsertScalarRound<4>(OpcodeArgs); +template void OpDispatchBuilder::AVXInsertScalarRound<8>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::InsertScalarFCMPOpImpl(OpcodeArgs, - size_t DstSize, size_t ElementSize, - const X86Tables::DecodedOperand& Src1Op, - const X86Tables::DecodedOperand& Src2Op, - uint8_t CompType, bool ZeroUpperBits) { +OrderedNode* OpDispatchBuilder::InsertScalarFCMPOpImpl(OpcodeArgs, size_t DstSize, size_t ElementSize, const X86Tables::DecodedOperand& Src1Op, + const X86Tables::DecodedOperand& Src2Op, uint8_t CompType, bool ZeroUpperBits) { // We load the full vector width when dealing with a source vector, // so that we don't do any unnecessary zero extension to the scalar // element that we're going to operate on. const auto SrcSize = GetSrcSize(Op); - OrderedNode *Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags); - OrderedNode *Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, SrcSize, Op->Flags, - {.AllowUpperGarbage = true}); + OrderedNode* Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags); + OrderedNode* Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, SrcSize, Op->Flags, {.AllowUpperGarbage = true}); switch (CompType) { - case 0x00: case 0x08: case 0x10: case 0x18: // EQ - return _VFCMPScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, FloatCompareOp::EQ, ZeroUpperBits); - case 0x01: case 0x09: case 0x11: case 0x19: // LT, GT(Swapped operand) - return _VFCMPScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, FloatCompareOp::LT, ZeroUpperBits); - case 0x02: case 0x0A: case 0x12: case 0x1A: // LE, GE(Swapped operand) - return _VFCMPScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, FloatCompareOp::LE, ZeroUpperBits); - case 0x03: case 0x0B: case 0x13: case 0x1B: // Unordered - return _VFCMPScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, FloatCompareOp::UNO, ZeroUpperBits); - case 0x04: case 0x0C: case 0x14: case 0x1C: // NEQ - return _VFCMPScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, FloatCompareOp::NEQ, ZeroUpperBits); - case 0x05: case 0x0D: case 0x15: case 0x1D: { // NLT, NGT(Swapped operand) - OrderedNode *Result = _VFCMPLT(ElementSize, ElementSize, Src1, Src2); - Result = _VNot(ElementSize, ElementSize, Result); - // Insert the lower bits - return _VInsElement(GetDstSize(Op), ElementSize, 0, 0, Src1, Result); - } - case 0x06: case 0x0E: case 0x16: case 0x1E: { // NLE, NGE(Swapped operand) - OrderedNode *Result = _VFCMPLE(ElementSize, ElementSize, Src1, Src2); - Result = _VNot(ElementSize, ElementSize, Result); - // Insert the lower bits - return _VInsElement(GetDstSize(Op), ElementSize, 0, 0, Src1, Result); - } - case 0x07: case 0x0F: case 0x17: case 0x1F: // Ordered - return _VFCMPScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, FloatCompareOp::ORD, ZeroUpperBits); - default: - LOGMAN_MSG_A_FMT("Unknown Comparison type: {}", CompType); - break; + case 0x00: + case 0x08: + case 0x10: + case 0x18: // EQ + return _VFCMPScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, FloatCompareOp::EQ, ZeroUpperBits); + case 0x01: + case 0x09: + case 0x11: + case 0x19: // LT, GT(Swapped operand) + return _VFCMPScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, FloatCompareOp::LT, ZeroUpperBits); + case 0x02: + case 0x0A: + case 0x12: + case 0x1A: // LE, GE(Swapped operand) + return _VFCMPScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, FloatCompareOp::LE, ZeroUpperBits); + case 0x03: + case 0x0B: + case 0x13: + case 0x1B: // Unordered + return _VFCMPScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, FloatCompareOp::UNO, ZeroUpperBits); + case 0x04: + case 0x0C: + case 0x14: + case 0x1C: // NEQ + return _VFCMPScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, FloatCompareOp::NEQ, ZeroUpperBits); + case 0x05: + case 0x0D: + case 0x15: + case 0x1D: { // NLT, NGT(Swapped operand) + OrderedNode* Result = _VFCMPLT(ElementSize, ElementSize, Src1, Src2); + Result = _VNot(ElementSize, ElementSize, Result); + // Insert the lower bits + return _VInsElement(GetDstSize(Op), ElementSize, 0, 0, Src1, Result); + } + case 0x06: + case 0x0E: + case 0x16: + case 0x1E: { // NLE, NGE(Swapped operand) + OrderedNode* Result = _VFCMPLE(ElementSize, ElementSize, Src1, Src2); + Result = _VNot(ElementSize, ElementSize, Result); + // Insert the lower bits + return _VInsElement(GetDstSize(Op), ElementSize, 0, 0, Src1, Result); + } + case 0x07: + case 0x0F: + case 0x17: + case 0x1F: // Ordered + return _VFCMPScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, FloatCompareOp::ORD, ZeroUpperBits); + default: LOGMAN_MSG_A_FMT("Unknown Comparison type: {}", CompType); break; } FEX_UNREACHABLE; } @@ -895,14 +718,12 @@ void OpDispatchBuilder::InsertScalarFCMPOp(OpcodeArgs) { const uint8_t CompType = Op->Src[1].Data.Literal.Value; const auto DstSize = GetGuestVectorLength(); - OrderedNode *Result = InsertScalarFCMPOpImpl(Op, DstSize, ElementSize, Op->Dest, Op->Src[0], CompType, false); + OrderedNode* Result = InsertScalarFCMPOpImpl(Op, DstSize, ElementSize, Op->Dest, Op->Src[0], CompType, false); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); } -template -void OpDispatchBuilder::InsertScalarFCMPOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::InsertScalarFCMPOp<8>(OpcodeArgs); +template void OpDispatchBuilder::InsertScalarFCMPOp<4>(OpcodeArgs); +template void OpDispatchBuilder::InsertScalarFCMPOp<8>(OpcodeArgs); template void OpDispatchBuilder::AVXInsertScalarFCMPOp(OpcodeArgs) { @@ -910,14 +731,12 @@ void OpDispatchBuilder::AVXInsertScalarFCMPOp(OpcodeArgs) { const uint8_t CompType = Op->Src[2].Data.Literal.Value; const auto DstSize = GetGuestVectorLength(); - OrderedNode *Result = InsertScalarFCMPOpImpl(Op, DstSize, ElementSize, Op->Src[0], Op->Src[1], CompType, true); + OrderedNode* Result = InsertScalarFCMPOpImpl(Op, DstSize, ElementSize, Op->Src[0], Op->Src[1], CompType, true); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); } -template -void OpDispatchBuilder::AVXInsertScalarFCMPOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::AVXInsertScalarFCMPOp<8>(OpcodeArgs); +template void OpDispatchBuilder::AVXInsertScalarFCMPOp<4>(OpcodeArgs); +template void OpDispatchBuilder::AVXInsertScalarFCMPOp<8>(OpcodeArgs); void OpDispatchBuilder::VectorUnaryOpImpl(OpcodeArgs, IROps IROp, size_t ElementSize) { // In the event of a scalar operation and a vector source, then @@ -927,34 +746,27 @@ void OpDispatchBuilder::VectorUnaryOpImpl(OpcodeArgs, IROps IROp, size_t Element const auto SrcSize = GetSrcSize(Op); const auto OpSize = GetSrcSize(Op); - OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); + OrderedNode* Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); DeriveOp(ALUOp, IROp, _VFSqrt(OpSize, ElementSize, Src)); StoreResult(FPRClass, Op, ALUOp, -1); } -template +template void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs) { VectorUnaryOpImpl(Op, IROp, ElementSize); } -template -void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs); +template void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs); +template void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs); +template void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs); +template void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs); +template void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs); +template void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs); +template void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs); void OpDispatchBuilder::AVXVectorUnaryOpImpl(OpcodeArgs, IROps IROp, size_t ElementSize) { // In the event of a scalar operation and a vector source, then @@ -964,7 +776,7 @@ void OpDispatchBuilder::AVXVectorUnaryOpImpl(OpcodeArgs, IROps IROp, size_t Elem const auto SrcSize = GetSrcSize(Op); const auto OpSize = GetSrcSize(Op); - OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); + OrderedNode* Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); DeriveOp(ALUOp, IROp, _VFSqrt(OpSize, ElementSize, Src)); @@ -976,31 +788,24 @@ void OpDispatchBuilder::AVXVectorUnaryOpImpl(OpcodeArgs, IROps IROp, size_t Elem StoreResult(FPRClass, Op, ALUOp, -1); } -template +template void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs) { AVXVectorUnaryOpImpl(Op, IROp, ElementSize); } -template -void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs); void OpDispatchBuilder::VectorUnaryDuplicateOpImpl(OpcodeArgs, IROps IROp, size_t ElementSize) { const auto Size = GetSrcSize(Op); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); DeriveOp(ALUOp, IROp, _VFSqrt(ElementSize, ElementSize, Src)); @@ -1009,19 +814,17 @@ void OpDispatchBuilder::VectorUnaryDuplicateOpImpl(OpcodeArgs, IROps IROp, size_ StoreResult(FPRClass, Op, Result, -1); } -template +template void OpDispatchBuilder::VectorUnaryDuplicateOp(OpcodeArgs) { VectorUnaryDuplicateOpImpl(Op, IROp, ElementSize); } -template -void OpDispatchBuilder::VectorUnaryDuplicateOp(OpcodeArgs); -template -void OpDispatchBuilder::VectorUnaryDuplicateOp(OpcodeArgs); +template void OpDispatchBuilder::VectorUnaryDuplicateOp(OpcodeArgs); +template void OpDispatchBuilder::VectorUnaryDuplicateOp(OpcodeArgs); void OpDispatchBuilder::MOVQOp(OpcodeArgs) { const auto SrcSize = Op->Src[0].IsGPR() ? 16U : GetSrcSize(Op); - OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); + OrderedNode* Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); // This instruction is a bit special that if the destination is a register then it'll ZEXT the 64bit source to 128bit if (Op->Dest.IsGPR()) { const auto gpr = Op->Dest.Data.GPR.GPR; @@ -1029,15 +832,14 @@ void OpDispatchBuilder::MOVQOp(OpcodeArgs) { auto Reg = _VMov(8, Src); StoreXMMRegister(gprIndex, Reg); - } - else { + } else { // This is simple, just store the result StoreResult(FPRClass, Op, Src, -1); } } void OpDispatchBuilder::MOVQMMXOp(OpcodeArgs) { - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1}); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1}); StoreResult(FPRClass, Op, Src, 1); } @@ -1046,23 +848,22 @@ void OpDispatchBuilder::MOVMSKOp(OpcodeArgs) { auto Size = GetSrcSize(Op); uint8_t NumElements = Size / ElementSize; - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); if (Size == 16 && ElementSize == 8) { // UnZip2 the 64-bit elements as 32-bit to get the sign bits closer. // Sign bits are now in bit positions 31 and 63 after this. - Src =_VUnZip2(Size, 4, Src, Src); + Src = _VUnZip2(Size, 4, Src, Src); // Extract the low 64-bits to GPR in one move. - OrderedNode *GPR = _VExtractToGPR(Size, 8, Src, 0); + OrderedNode* GPR = _VExtractToGPR(Size, 8, Src, 0); // BFI the sign bit in 31 in to 62. // Inserting the full lower 32-bits offset 31 so the sign bit ends up at offset 63. GPR = _Bfi(OpSize::i64Bit, 32, 31, GPR, GPR); // Shift right to only get the two sign bits we care about. GPR = _Lshr(OpSize::i64Bit, GPR, _Constant(62)); StoreResult_WithOpSize(GPRClass, Op, Op->Dest, GPR, CTX->GetGPRSize(), -1); - } - else if (Size == 16 && ElementSize == 4) { + } else if (Size == 16 && ElementSize == 4) { // Shift all the sign bits to the bottom of their respective elements. Src = _VUShrI(Size, 4, Src, 31); // Load the specific 128-bit movmskps shift elements operator. @@ -1072,15 +873,14 @@ void OpDispatchBuilder::MOVMSKOp(OpcodeArgs) { // Add across the vector so the sign bits will end up in bits [3:0] Src = _VAddV(Size, 4, Src); // Extract to a GPR. - OrderedNode *GPR = _VExtractToGPR(Size, 4, Src, 0); + OrderedNode* GPR = _VExtractToGPR(Size, 4, Src, 0); StoreResult_WithOpSize(GPRClass, Op, Op->Dest, GPR, CTX->GetGPRSize(), -1); - } - else { - OrderedNode *CurrentVal = _Constant(0); + } else { + OrderedNode* CurrentVal = _Constant(0); for (unsigned i = 0; i < NumElements; ++i) { // Extract the top bit of the element - OrderedNode *Tmp = _VExtractToGPR(Size, ElementSize, Src, i); + OrderedNode* Tmp = _VExtractToGPR(Size, ElementSize, Src, i); Tmp = _Bfe(IR::SizeToOpSize(ElementSize), 1, ElementSize * 8 - 1, Tmp); // Shift it to the correct location @@ -1093,25 +893,23 @@ void OpDispatchBuilder::MOVMSKOp(OpcodeArgs) { } } -template -void OpDispatchBuilder::MOVMSKOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::MOVMSKOp<8>(OpcodeArgs); +template void OpDispatchBuilder::MOVMSKOp<4>(OpcodeArgs); +template void OpDispatchBuilder::MOVMSKOp<8>(OpcodeArgs); void OpDispatchBuilder::MOVMSKOpOne(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE; const auto ExtractSize = Is256Bit ? 4 : 2; - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *VMask = _VDupFromGPR(SrcSize, 8, _Constant(0x80'40'20'10'08'04'02'01ULL)); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* VMask = _VDupFromGPR(SrcSize, 8, _Constant(0x80'40'20'10'08'04'02'01ULL)); auto VCMP = _VCMPLTZ(SrcSize, 1, Src); auto VAnd = _VAnd(SrcSize, 1, VCMP, VMask); // Since we also handle the MM MOVMSKB here too, // we need to clamp the lower bound. - const auto VAdd1Size = std::max(SrcSize, uint8_t{16}); + const auto VAdd1Size = std::max(SrcSize, uint8_t {16}); const auto VAdd2Size = std::max(SrcSize / 2, 8); auto VAdd1 = _VAddP(VAdd1Size, 1, VAnd, VAnd); @@ -1127,36 +925,32 @@ template void OpDispatchBuilder::PUNPCKLOp(OpcodeArgs) { auto Size = GetSrcSize(Op); - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); auto ALUOp = _VZip(Size, ElementSize, Dest, Src); StoreResult(FPRClass, Op, ALUOp, -1); } -template -void OpDispatchBuilder::PUNPCKLOp<1>(OpcodeArgs); -template -void OpDispatchBuilder::PUNPCKLOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::PUNPCKLOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::PUNPCKLOp<8>(OpcodeArgs); +template void OpDispatchBuilder::PUNPCKLOp<1>(OpcodeArgs); +template void OpDispatchBuilder::PUNPCKLOp<2>(OpcodeArgs); +template void OpDispatchBuilder::PUNPCKLOp<4>(OpcodeArgs); +template void OpDispatchBuilder::PUNPCKLOp<8>(OpcodeArgs); -template +template void OpDispatchBuilder::VPUNPCKLOp(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - OrderedNode *Result{}; + OrderedNode* Result {}; if (Is128Bit) { Result = _VZip(SrcSize, ElementSize, Src1, Src2); } else { - OrderedNode *ZipLo = _VZip(SrcSize, ElementSize, Src1, Src2); - OrderedNode *ZipHi = _VZip2(SrcSize, ElementSize, Src1, Src2); + OrderedNode* ZipLo = _VZip(SrcSize, ElementSize, Src1, Src2); + OrderedNode* ZipHi = _VZip2(SrcSize, ElementSize, Src1, Src2); Result = _VInsElement(SrcSize, 16, 1, 0, ZipLo, ZipHi); } @@ -1164,48 +958,40 @@ void OpDispatchBuilder::VPUNPCKLOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VPUNPCKLOp<1>(OpcodeArgs); -template -void OpDispatchBuilder::VPUNPCKLOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::VPUNPCKLOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::VPUNPCKLOp<8>(OpcodeArgs); +template void OpDispatchBuilder::VPUNPCKLOp<1>(OpcodeArgs); +template void OpDispatchBuilder::VPUNPCKLOp<2>(OpcodeArgs); +template void OpDispatchBuilder::VPUNPCKLOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VPUNPCKLOp<8>(OpcodeArgs); template void OpDispatchBuilder::PUNPCKHOp(OpcodeArgs) { auto Size = GetSrcSize(Op); - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); auto ALUOp = _VZip2(Size, ElementSize, Dest, Src); StoreResult(FPRClass, Op, ALUOp, -1); } -template -void OpDispatchBuilder::PUNPCKHOp<1>(OpcodeArgs); -template -void OpDispatchBuilder::PUNPCKHOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::PUNPCKHOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::PUNPCKHOp<8>(OpcodeArgs); +template void OpDispatchBuilder::PUNPCKHOp<1>(OpcodeArgs); +template void OpDispatchBuilder::PUNPCKHOp<2>(OpcodeArgs); +template void OpDispatchBuilder::PUNPCKHOp<4>(OpcodeArgs); +template void OpDispatchBuilder::PUNPCKHOp<8>(OpcodeArgs); -template +template void OpDispatchBuilder::VPUNPCKHOp(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - OrderedNode *Result{}; + OrderedNode* Result {}; if (Is128Bit) { Result = _VZip2(SrcSize, ElementSize, Src1, Src2); } else { - OrderedNode *ZipLo = _VZip(SrcSize, ElementSize, Src1, Src2); - OrderedNode *ZipHi = _VZip2(SrcSize, ElementSize, Src1, Src2); + OrderedNode* ZipLo = _VZip(SrcSize, ElementSize, Src1, Src2); + OrderedNode* ZipHi = _VZip2(SrcSize, ElementSize, Src1, Src2); Result = _VInsElement(SrcSize, 16, 0, 1, ZipHi, ZipLo); } @@ -1213,26 +999,21 @@ void OpDispatchBuilder::VPUNPCKHOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VPUNPCKHOp<1>(OpcodeArgs); -template -void OpDispatchBuilder::VPUNPCKHOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::VPUNPCKHOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::VPUNPCKHOp<8>(OpcodeArgs); +template void OpDispatchBuilder::VPUNPCKHOp<1>(OpcodeArgs); +template void OpDispatchBuilder::VPUNPCKHOp<2>(OpcodeArgs); +template void OpDispatchBuilder::VPUNPCKHOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VPUNPCKHOp<8>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::PSHUFBOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, - const X86Tables::DecodedOperand& Src2) { - OrderedNode *Src1Node = LoadSource(FPRClass, Op, Src1, Op->Flags); - OrderedNode *Src2Node = LoadSource(FPRClass, Op, Src2, Op->Flags); +OrderedNode* OpDispatchBuilder::PSHUFBOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2) { + OrderedNode* Src1Node = LoadSource(FPRClass, Op, Src1, Op->Flags); + OrderedNode* Src2Node = LoadSource(FPRClass, Op, Src2, Op->Flags); const auto SrcSize = GetSrcSize(Op); const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE; // We perform the 256-bit version as two 128-bit operations due to // the lane splitting behavior, so cap the maximum size at 16. - const auto SanitizedSrcSize = std::min(SrcSize, uint8_t{16}); + const auto SanitizedSrcSize = std::min(SrcSize, uint8_t {16}); // PSHUFB doesn't 100% match VTBL behaviour // VTBL will set the element zero if the index is greater than @@ -1242,29 +1023,28 @@ OrderedNode* OpDispatchBuilder::PSHUFBOpImpl(OpcodeArgs, const X86Tables::Decode // Mask the selection bits and top bit correctly // Bits [6:4] is reserved for 128-bit/256-bit // Bits [6:3] is reserved for 64-bit - const uint8_t MaskImm = SrcSize == 8 ? 0b1000'0111 - : 0b1000'1111; + const uint8_t MaskImm = SrcSize == 8 ? 0b1000'0111 : 0b1000'1111; - OrderedNode *MaskVector = _VectorImm(SrcSize, 1, MaskImm); - OrderedNode *MaskedIndices = _VAnd(SrcSize, SrcSize, Src2Node, MaskVector); + OrderedNode* MaskVector = _VectorImm(SrcSize, 1, MaskImm); + OrderedNode* MaskedIndices = _VAnd(SrcSize, SrcSize, Src2Node, MaskVector); - OrderedNode *Low = _VTBL1(SanitizedSrcSize, Src1Node, MaskedIndices); + OrderedNode* Low = _VTBL1(SanitizedSrcSize, Src1Node, MaskedIndices); if (!Is256Bit) { return Low; } - OrderedNode *HighSrc1 = _VInsElement(SrcSize, 16, 0, 1, Src1Node, Src1Node); - OrderedNode *High = _VTBL1(SanitizedSrcSize, HighSrc1, MaskedIndices); + OrderedNode* HighSrc1 = _VInsElement(SrcSize, 16, 0, 1, Src1Node, Src1Node); + OrderedNode* High = _VTBL1(SanitizedSrcSize, HighSrc1, MaskedIndices); return _VInsElement(SrcSize, 16, 1, 0, Low, High); } void OpDispatchBuilder::PSHUFBOp(OpcodeArgs) { - OrderedNode *Result = PSHUFBOpImpl(Op, Op->Dest, Op->Src[0]); + OrderedNode* Result = PSHUFBOpImpl(Op, Op->Dest, Op->Src[0]); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::VPSHUFBOp(OpcodeArgs) { - OrderedNode *Result = PSHUFBOpImpl(Op, Op->Src[0], Op->Src[1]); + OrderedNode* Result = PSHUFBOpImpl(Op, Op->Src[0], Op->Src[1]); StoreResult(FPRClass, Op, Result, -1); } @@ -1273,39 +1053,40 @@ void OpDispatchBuilder::PSHUFW8ByteOp(OpcodeArgs) { uint16_t Shuffle = Op->Src[1].Data.Literal.Value; const auto Size = GetSrcSize(Op); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Dest{}; + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest {}; // TODO: There can be more optimized copies here. switch (Shuffle) { - case IdentityCopy: { - // Special case identity copy. - Dest = Src; - break; - } - case 0b01'01'00'00: { - // Zip with self. - // Dest[0] = Src[0] - // Dest[1] = Src[0] - // Dest[2] = Src[1] - // Dest[3] = Src[1] - Dest = _VZip(Size, 2, Src, Src); - break; - } - case 0b00'00'00'00: - case 0b01'01'01'01: - case 0b10'10'10'10: - case 0b11'11'11'11: { - // Special case element duplicate and broadcasts. - Dest = _VDupElement(Size, 2, Src, (Shuffle & 0b11)); - break; - } - default: { - // PSHUFW (mmx) also needs to scale by 16 to get correct low element. - auto LookupIndexes = LoadAndCacheIndexedNamedVectorConstant(Size, FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFLW, Shuffle * 16); - Dest = _VTBL1(Size, Src, LookupIndexes); - break; - } + case IdentityCopy: { + // Special case identity copy. + Dest = Src; + break; + } + case 0b01'01'00'00: { + // Zip with self. + // Dest[0] = Src[0] + // Dest[1] = Src[0] + // Dest[2] = Src[1] + // Dest[3] = Src[1] + Dest = _VZip(Size, 2, Src, Src); + break; + } + case 0b00'00'00'00: + case 0b01'01'01'01: + case 0b10'10'10'10: + case 0b11'11'11'11: { + // Special case element duplicate and broadcasts. + Dest = _VDupElement(Size, 2, Src, (Shuffle & 0b11)); + break; + } + default: { + // PSHUFW (mmx) also needs to scale by 16 to get correct low element. + auto LookupIndexes = + LoadAndCacheIndexedNamedVectorConstant(Size, FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFLW, Shuffle * 16); + Dest = _VTBL1(Size, Src, LookupIndexes); + break; + } } StoreResult(FPRClass, Op, Dest, -1); @@ -1317,117 +1098,113 @@ void OpDispatchBuilder::PSHUFWOp(OpcodeArgs) { uint16_t Shuffle = Op->Src[1].Data.Literal.Value; const auto Size = GetSrcSize(Op); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Dest{}; + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest {}; const uint8_t NumElements = Size / 2; const uint8_t HalfNumElements = NumElements >> 1; // TODO: There can be more optimized copies here. switch (Shuffle) { - case IdentityCopy: { - // Special case identity copy. - Dest = Src; - break; - } - case 0b01'01'00'00: { - // Zip with self. - // Dest[0] = Src[0] - // Dest[1] = Src[0] - // Dest[2] = Src[1] - // Dest[3] = Src[1] - auto Zip = _VZip(Size, 2, Src, Src); - if (Low) { - Dest = _VZip(Size, 8, Zip, Src); - } - else { - Dest = _VZip(Size, 8, Src, Zip); - } - break; - } - case 0b00'00'00'00: - case 0b01'01'01'01: - case 0b10'10'10'10: - case 0b11'11'11'11: { - // Special case element duplicate and broadcast to low or high 64-bits. - auto DUP = _VDupElement(Size, 2, Src, (Low ? 0 : HalfNumElements) + (Shuffle & 0b11)); - if (Low) { - // DUP goes low. - // Source goes high. - Dest = _VTrn2(Size, 8, DUP, Src); - } - else { - // DUP goes high. - // Source goes low. - Dest = _VTrn(Size, 8, Src, DUP); - } - break; + case IdentityCopy: { + // Special case identity copy. + Dest = Src; + break; + } + case 0b01'01'00'00: { + // Zip with self. + // Dest[0] = Src[0] + // Dest[1] = Src[0] + // Dest[2] = Src[1] + // Dest[3] = Src[1] + auto Zip = _VZip(Size, 2, Src, Src); + if (Low) { + Dest = _VZip(Size, 8, Zip, Src); + } else { + Dest = _VZip(Size, 8, Src, Zip); } - default: { - // PSHUFLW needs to scale index by 16. - // PSHUFHW needs to scale index by 16. - // PSHUFW (mmx) also needs to scale by 16 to get correct low element. - const auto IndexedVectorConstant = Low ? - FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFLW : - FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFHW; - auto LookupIndexes = LoadAndCacheIndexedNamedVectorConstant(Size, IndexedVectorConstant, Shuffle * 16); - Dest = _VTBL1(Size, Src, LookupIndexes); - break; + break; + } + case 0b00'00'00'00: + case 0b01'01'01'01: + case 0b10'10'10'10: + case 0b11'11'11'11: { + // Special case element duplicate and broadcast to low or high 64-bits. + auto DUP = _VDupElement(Size, 2, Src, (Low ? 0 : HalfNumElements) + (Shuffle & 0b11)); + if (Low) { + // DUP goes low. + // Source goes high. + Dest = _VTrn2(Size, 8, DUP, Src); + } else { + // DUP goes high. + // Source goes low. + Dest = _VTrn(Size, 8, Src, DUP); } + break; + } + default: { + // PSHUFLW needs to scale index by 16. + // PSHUFHW needs to scale index by 16. + // PSHUFW (mmx) also needs to scale by 16 to get correct low element. + const auto IndexedVectorConstant = Low ? FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFLW : + FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFHW; + auto LookupIndexes = LoadAndCacheIndexedNamedVectorConstant(Size, IndexedVectorConstant, Shuffle * 16); + Dest = _VTBL1(Size, Src, LookupIndexes); + break; + } } StoreResult(FPRClass, Op, Dest, -1); } -template -void OpDispatchBuilder::PSHUFWOp(OpcodeArgs); -template -void OpDispatchBuilder::PSHUFWOp(OpcodeArgs); +template void OpDispatchBuilder::PSHUFWOp(OpcodeArgs); +template void OpDispatchBuilder::PSHUFWOp(OpcodeArgs); void OpDispatchBuilder::PSHUFDOp(OpcodeArgs) { constexpr auto IdentityCopy = 0b11'10'01'00; uint16_t Shuffle = Op->Src[1].Data.Literal.Value; const auto Size = GetSrcSize(Op); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Dest{}; + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest {}; // TODO: There can be more optimized copies here. switch (Shuffle) { - case IdentityCopy: { - // Special case identity copy. - Dest = Src; - break; - } - case 0b01'01'00'00: { - // Zip with self. - // Dest[0] = Src[0] - // Dest[1] = Src[0] - // Dest[2] = Src[1] - // Dest[3] = Src[1] - Dest = _VZip(Size, 4, Src, Src); - break; - } - case 0b00'00'00'00: - case 0b01'01'01'01: - case 0b10'10'10'10: - case 0b11'11'11'11: { - // Special case element duplicate and broadcast to low or high 64-bits. - Dest = _VDupElement(Size, 4, Src, Shuffle & 0b11); - break; - } - default: { - // PSHUFD needs to scale index by 16. - auto LookupIndexes = LoadAndCacheIndexedNamedVectorConstant(Size, FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFD, Shuffle * 16); - Dest = _VTBL1(Size, Src, LookupIndexes); - break; - } + case IdentityCopy: { + // Special case identity copy. + Dest = Src; + break; + } + case 0b01'01'00'00: { + // Zip with self. + // Dest[0] = Src[0] + // Dest[1] = Src[0] + // Dest[2] = Src[1] + // Dest[3] = Src[1] + Dest = _VZip(Size, 4, Src, Src); + break; + } + case 0b00'00'00'00: + case 0b01'01'01'01: + case 0b10'10'10'10: + case 0b11'11'11'11: { + // Special case element duplicate and broadcast to low or high 64-bits. + Dest = _VDupElement(Size, 4, Src, Shuffle & 0b11); + break; + } + default: { + // PSHUFD needs to scale index by 16. + auto LookupIndexes = + LoadAndCacheIndexedNamedVectorConstant(Size, FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFD, Shuffle * 16); + Dest = _VTBL1(Size, Src, LookupIndexes); + break; + } } StoreResult(FPRClass, Op, Dest, -1); } -template +template void OpDispatchBuilder::VPSHUFWOp(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE; @@ -1435,7 +1212,7 @@ void OpDispatchBuilder::VPSHUFWOp(OpcodeArgs) { LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src[1] needs to be a literal"); auto Shuffle = Op->Src[1].Data.Literal.Value; - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); // Note/TODO: With better immediate facilities or vector loading in our IR // much of this can be reduced to setting up a table index register @@ -1450,7 +1227,7 @@ void OpDispatchBuilder::VPSHUFWOp(OpcodeArgs) { // Should be much nicer than doing repeated inserts in any case. const size_t BaseElement = Low ? 0 : 4; - OrderedNode *Result = Src; + OrderedNode* Result = Src; if (Is256Bit) { for (size_t i = 0; i < 4; i++) { const auto Index = Shuffle & 0b11; @@ -1478,19 +1255,14 @@ void OpDispatchBuilder::VPSHUFWOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VPSHUFWOp<2, false>(OpcodeArgs); -template -void OpDispatchBuilder::VPSHUFWOp<2, true>(OpcodeArgs); -template -void OpDispatchBuilder::VPSHUFWOp<4, true>(OpcodeArgs); +template void OpDispatchBuilder::VPSHUFWOp<2, false>(OpcodeArgs); +template void OpDispatchBuilder::VPSHUFWOp<2, true>(OpcodeArgs); +template void OpDispatchBuilder::VPSHUFWOp<4, true>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::SHUFOpImpl(OpcodeArgs, size_t ElementSize, - const X86Tables::DecodedOperand& Src1, - const X86Tables::DecodedOperand& Src2, - const X86Tables::DecodedOperand& Imm) { - OrderedNode *Src1Node = LoadSource(FPRClass, Op, Src1, Op->Flags); - OrderedNode *Src2Node = LoadSource(FPRClass, Op, Src2, Op->Flags); +OrderedNode* OpDispatchBuilder::SHUFOpImpl(OpcodeArgs, size_t ElementSize, const X86Tables::DecodedOperand& Src1, + const X86Tables::DecodedOperand& Src2, const X86Tables::DecodedOperand& Imm) { + OrderedNode* Src1Node = LoadSource(FPRClass, Op, Src1, Op->Flags); + OrderedNode* Src2Node = LoadSource(FPRClass, Op, Src2, Op->Flags); LOGMAN_THROW_A_FMT(Imm.IsLiteral(), "Imm needs to be a literal"); uint8_t Shuffle = Imm.Data.Literal.Value; @@ -1504,7 +1276,7 @@ OrderedNode* OpDispatchBuilder::SHUFOpImpl(OpcodeArgs, size_t ElementSize, const uint8_t DstSize = GetDstSize(Op); const bool Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; - std::array Srcs{}; + std::array Srcs {}; for (size_t i = 0; i < HalfNumElements; ++i) { Srcs[i] = Src1Node; } @@ -1512,21 +1284,20 @@ OrderedNode* OpDispatchBuilder::SHUFOpImpl(OpcodeArgs, size_t ElementSize, Srcs[i] = Src2Node; } - OrderedNode *Dest = Src1Node; + OrderedNode* Dest = Src1Node; const uint8_t SelectionMask = NumElements - 1; const uint8_t ShiftAmount = std::popcount(SelectionMask); if (Is256Bit) { for (uint8_t Element = 0; Element < NumElements; ++Element) { - const auto SrcIndex1 = Shuffle & SelectionMask; + const auto SrcIndex1 = Shuffle & SelectionMask; // AVX differs the behavior of VSHUFPD and VSHUFPS. // The same immediate bits are used for both lanes with VSHUFPS, // but VSHUFPD uses different immediate bits for each lane. - const auto SrcIndex2 = ElementSize == 4 ? SrcIndex1 - : ((Shuffle >> 2) & SelectionMask); + const auto SrcIndex2 = ElementSize == 4 ? SrcIndex1 : ((Shuffle >> 2) & SelectionMask); - OrderedNode *Insert = _VInsElement(DstSize, ElementSize, Element, SrcIndex1, Dest, Srcs[Element]); + OrderedNode* Insert = _VInsElement(DstSize, ElementSize, Element, SrcIndex1, Dest, Srcs[Element]); Dest = _VInsElement(DstSize, ElementSize, Element + NumElements, SrcIndex2 + NumElements, Insert, Srcs[Element]); Shuffle >>= ShiftAmount; @@ -1536,148 +1307,147 @@ OrderedNode* OpDispatchBuilder::SHUFOpImpl(OpcodeArgs, size_t ElementSize, // We can shuffle optimally in a lot of cases. // TODO: We can optimize more of these cases. switch (Shuffle) { - case 0b01'00'01'00: - // Combining of low 64-bits. - // Dest[63:0] = Src1[63:0] - // Dest[127:64] = Src2[63:0] - return _VZip(DstSize, 8, Src1Node, Src2Node); - case 0b11'10'11'10: - // Combining of high 64-bits. - // Dest[63:0] = Src1[127:64] - // Dest[127:64] = Src2[127:64] - return _VZip2(DstSize, 8, Src1Node, Src2Node); - case 0b11'10'01'00: - // Mixing Low and high elements - // Dest[63:0] = Src1[63:0] - // Dest[127:64] = Src2[127:64] - return _VInsElement(DstSize, 8, 1, 1, Src1Node, Src2Node); - case 0b01'00'11'10: - // Mixing Low and high elements, inverse of above - // Dest[63:0] = Src1[127:64] - // Dest[127:64] = Src2[63:0] - return _VExtr(DstSize, 1, Src2Node, Src1Node, 8); - case 0b10'00'10'00: - // Mixing even elements. - // Dest[31:0] = Src1[31:0] - // Dest[63:32] = Src1[95:64] - // Dest[95:64] = Src2[31:0] - // Dest[127:96] = Src2[95:64] - return _VUnZip(DstSize, ElementSize, Src1Node, Src2Node); - case 0b11'01'11'01: - // Mixing odd elements. - // Dest[31:0] = Src1[63:32] - // Dest[63:32] = Src1[127:96] - // Dest[95:64] = Src2[63:32] - // Dest[127:96] = Src2[127:96] - return _VUnZip2(DstSize, ElementSize, Src1Node, Src2Node); - case 0b11'10'00'00: - case 0b11'10'01'01: - case 0b11'10'10'10: - case 0b11'10'11'11: { - // Bottom elements duplicated, Top 64-bits inserted - auto DupSrc1 = _VDupElement(DstSize, ElementSize, Src1Node, Shuffle & 0b11); - return _VZip2(DstSize, 8, DupSrc1, Src2Node); - } - case 0b01'00'00'00: - case 0b01'00'01'01: - case 0b01'00'10'10: - case 0b01'00'11'11: { - // Bottom elements duplicated, Bottom 64-bits inserted - auto DupSrc1 = _VDupElement(DstSize, ElementSize, Src1Node, Shuffle & 0b11); - return _VZip(DstSize, 8, DupSrc1, Src2Node); - } - case 0b00'00'01'00: - case 0b01'01'01'00: - case 0b10'10'01'00: - case 0b11'11'01'00: { - // Top elements duplicated, Bottom 64-bits inserted - auto DupSrc2 = _VDupElement(DstSize, ElementSize, Src2Node, (Shuffle >> 4) & 0b11); - return _VZip(DstSize, 8, Src1Node, DupSrc2); - } - case 0b00'00'11'10: - case 0b01'01'11'10: - case 0b10'10'11'10: - case 0b11'11'11'10: { - // Top elements duplicated, Top 64-bits inserted - auto DupSrc2 = _VDupElement(DstSize, ElementSize, Src2Node, (Shuffle >> 4) & 0b11); - return _VZip2(DstSize, 8, Src1Node, DupSrc2); - } - case 0b01'00'01'11: { - // TODO: This doesn't generate optimal code. - // RA doesn't understand that Src1Node is dead after VInsElement due to SRA class differences. - // With RA fixes this would be 2 instructions. - // Odd elements inverted, Low 64-bits inserted - Src1Node = _VInsElement(DstSize, 4, 0, 3, Src1Node, Src1Node); - return _VZip(DstSize, 8, Src1Node, Src2Node); - } - case 0b11'10'01'11: { - // TODO: This doesn't generate optimal code. - // RA doesn't understand that Src1Node is dead after VInsElement due to SRA class differences. - // With RA fixes this would be 2 instructions. - // Odd elements inverted, Top 64-bits inserted - Src1Node = _VInsElement(DstSize, 4, 0, 3, Src1Node, Src1Node); - return _VInsElement(DstSize, 8, 1, 1, Src1Node, Src2Node); - } - case 0b01'00'00'01: { - // Lower 32-bit elements inverted, low 64-bits inserted - Src1Node = _VRev64(DstSize, 4, Src1Node); - return _VZip(DstSize, 8, Src1Node, Src2Node); - } - case 0b11'10'00'01: { - // TODO: This doesn't generate optimal code. - // RA doesn't understand that Src1Node is dead after VInsElement due to SRA class differences. - // With RA fixes this would be 2 instructions. - // Lower 32-bit elements inverted, Top 64-bits inserted - Src1Node = _VRev64(DstSize, 4, Src1Node); - return _VInsElement(DstSize, 8, 1, 1, Src1Node, Src2Node); - } - case 0b00'00'00'00: - case 0b00'00'01'01: - case 0b00'00'10'10: - case 0b00'00'11'11: - case 0b01'01'00'00: - case 0b01'01'01'01: - case 0b01'01'10'10: - case 0b01'01'11'11: - case 0b10'10'00'00: - case 0b10'10'01'01: - case 0b10'10'10'10: - case 0b10'10'11'11: - case 0b11'11'00'00: - case 0b11'11'01'01: - case 0b11'11'10'10: - case 0b11'11'11'11: - { - // Duplicate element in upper and lower across each 64-bit segment. - auto DupSrc1 = _VDupElement(DstSize, ElementSize, Src1Node, Shuffle & 0b11); - auto DupSrc2 = _VDupElement(DstSize, ElementSize, Src2Node, (Shuffle >> 4) & 0b11); - return _VZip(DstSize, 8, DupSrc1, DupSrc2); + case 0b01'00'01'00: + // Combining of low 64-bits. + // Dest[63:0] = Src1[63:0] + // Dest[127:64] = Src2[63:0] + return _VZip(DstSize, 8, Src1Node, Src2Node); + case 0b11'10'11'10: + // Combining of high 64-bits. + // Dest[63:0] = Src1[127:64] + // Dest[127:64] = Src2[127:64] + return _VZip2(DstSize, 8, Src1Node, Src2Node); + case 0b11'10'01'00: + // Mixing Low and high elements + // Dest[63:0] = Src1[63:0] + // Dest[127:64] = Src2[127:64] + return _VInsElement(DstSize, 8, 1, 1, Src1Node, Src2Node); + case 0b01'00'11'10: + // Mixing Low and high elements, inverse of above + // Dest[63:0] = Src1[127:64] + // Dest[127:64] = Src2[63:0] + return _VExtr(DstSize, 1, Src2Node, Src1Node, 8); + case 0b10'00'10'00: + // Mixing even elements. + // Dest[31:0] = Src1[31:0] + // Dest[63:32] = Src1[95:64] + // Dest[95:64] = Src2[31:0] + // Dest[127:96] = Src2[95:64] + return _VUnZip(DstSize, ElementSize, Src1Node, Src2Node); + case 0b11'01'11'01: + // Mixing odd elements. + // Dest[31:0] = Src1[63:32] + // Dest[63:32] = Src1[127:96] + // Dest[95:64] = Src2[63:32] + // Dest[127:96] = Src2[127:96] + return _VUnZip2(DstSize, ElementSize, Src1Node, Src2Node); + case 0b11'10'00'00: + case 0b11'10'01'01: + case 0b11'10'10'10: + case 0b11'10'11'11: { + // Bottom elements duplicated, Top 64-bits inserted + auto DupSrc1 = _VDupElement(DstSize, ElementSize, Src1Node, Shuffle & 0b11); + return _VZip2(DstSize, 8, DupSrc1, Src2Node); + } + case 0b01'00'00'00: + case 0b01'00'01'01: + case 0b01'00'10'10: + case 0b01'00'11'11: { + // Bottom elements duplicated, Bottom 64-bits inserted + auto DupSrc1 = _VDupElement(DstSize, ElementSize, Src1Node, Shuffle & 0b11); + return _VZip(DstSize, 8, DupSrc1, Src2Node); + } + case 0b00'00'01'00: + case 0b01'01'01'00: + case 0b10'10'01'00: + case 0b11'11'01'00: { + // Top elements duplicated, Bottom 64-bits inserted + auto DupSrc2 = _VDupElement(DstSize, ElementSize, Src2Node, (Shuffle >> 4) & 0b11); + return _VZip(DstSize, 8, Src1Node, DupSrc2); + } + case 0b00'00'11'10: + case 0b01'01'11'10: + case 0b10'10'11'10: + case 0b11'11'11'10: { + // Top elements duplicated, Top 64-bits inserted + auto DupSrc2 = _VDupElement(DstSize, ElementSize, Src2Node, (Shuffle >> 4) & 0b11); + return _VZip2(DstSize, 8, Src1Node, DupSrc2); + } + case 0b01'00'01'11: { + // TODO: This doesn't generate optimal code. + // RA doesn't understand that Src1Node is dead after VInsElement due to SRA class differences. + // With RA fixes this would be 2 instructions. + // Odd elements inverted, Low 64-bits inserted + Src1Node = _VInsElement(DstSize, 4, 0, 3, Src1Node, Src1Node); + return _VZip(DstSize, 8, Src1Node, Src2Node); + } + case 0b11'10'01'11: { + // TODO: This doesn't generate optimal code. + // RA doesn't understand that Src1Node is dead after VInsElement due to SRA class differences. + // With RA fixes this would be 2 instructions. + // Odd elements inverted, Top 64-bits inserted + Src1Node = _VInsElement(DstSize, 4, 0, 3, Src1Node, Src1Node); + return _VInsElement(DstSize, 8, 1, 1, Src1Node, Src2Node); + } + case 0b01'00'00'01: { + // Lower 32-bit elements inverted, low 64-bits inserted + Src1Node = _VRev64(DstSize, 4, Src1Node); + return _VZip(DstSize, 8, Src1Node, Src2Node); + } + case 0b11'10'00'01: { + // TODO: This doesn't generate optimal code. + // RA doesn't understand that Src1Node is dead after VInsElement due to SRA class differences. + // With RA fixes this would be 2 instructions. + // Lower 32-bit elements inverted, Top 64-bits inserted + Src1Node = _VRev64(DstSize, 4, Src1Node); + return _VInsElement(DstSize, 8, 1, 1, Src1Node, Src2Node); + } + case 0b00'00'00'00: + case 0b00'00'01'01: + case 0b00'00'10'10: + case 0b00'00'11'11: + case 0b01'01'00'00: + case 0b01'01'01'01: + case 0b01'01'10'10: + case 0b01'01'11'11: + case 0b10'10'00'00: + case 0b10'10'01'01: + case 0b10'10'10'10: + case 0b10'10'11'11: + case 0b11'11'00'00: + case 0b11'11'01'01: + case 0b11'11'10'10: + case 0b11'11'11'11: { + // Duplicate element in upper and lower across each 64-bit segment. + auto DupSrc1 = _VDupElement(DstSize, ElementSize, Src1Node, Shuffle & 0b11); + auto DupSrc2 = _VDupElement(DstSize, ElementSize, Src2Node, (Shuffle >> 4) & 0b11); + return _VZip(DstSize, 8, DupSrc1, DupSrc2); + } + default: + // Use a TBL2 operation to handle this implementation. If the backend supports it. + if (CTX->BackendFeatures.SupportsVTBL2) { + auto LookupIndexes = + LoadAndCacheIndexedNamedVectorConstant(DstSize, FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_SHUFPS, Shuffle * 16); + return _VTBL2(DstSize, Src1Node, Src2Node, LookupIndexes); } - default: - // Use a TBL2 operation to handle this implementation. If the backend supports it. - if (CTX->BackendFeatures.SupportsVTBL2) { - auto LookupIndexes = LoadAndCacheIndexedNamedVectorConstant(DstSize, FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_SHUFPS, Shuffle * 16); - return _VTBL2(DstSize, Src1Node, Src2Node, LookupIndexes); - } - break; + break; } - } - else { + } else { switch (Shuffle & 0b11) { - case 0b00: - // Low 64-bits of each source interleaved. - return _VZip(DstSize, ElementSize, Src1Node, Src2Node); - case 0b01: - // Upper 64-bits of Src1 in lower bits - // Lower 64-bits of Src2 in upper bits. - return _VExtr(DstSize, 1, Src2Node, Src1Node, 8); - case 0b10: - // Lower 32-bits of Src1 in lower bits. - // Upper 64-bits of Src2 in upper bits. - return _VInsElement(DstSize, ElementSize, 1, 1, Src1Node, Src2Node); - case 0b11: - // Upper 64-bits of each source interleaved. - return _VZip2(DstSize, ElementSize, Src1Node, Src2Node); + case 0b00: + // Low 64-bits of each source interleaved. + return _VZip(DstSize, ElementSize, Src1Node, Src2Node); + case 0b01: + // Upper 64-bits of Src1 in lower bits + // Lower 64-bits of Src2 in upper bits. + return _VExtr(DstSize, 1, Src2Node, Src1Node, 8); + case 0b10: + // Lower 32-bits of Src1 in lower bits. + // Upper 64-bits of Src2 in upper bits. + return _VInsElement(DstSize, ElementSize, 1, 1, Src1Node, Src2Node); + case 0b11: + // Upper 64-bits of each source interleaved. + return _VZip2(DstSize, ElementSize, Src1Node, Src2Node); } } @@ -1693,46 +1463,42 @@ OrderedNode* OpDispatchBuilder::SHUFOpImpl(OpcodeArgs, size_t ElementSize, template void OpDispatchBuilder::SHUFOp(OpcodeArgs) { - OrderedNode *Result = SHUFOpImpl(Op, ElementSize, Op->Dest, Op->Src[0], Op->Src[1]); + OrderedNode* Result = SHUFOpImpl(Op, ElementSize, Op->Dest, Op->Src[0], Op->Src[1]); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::SHUFOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::SHUFOp<8>(OpcodeArgs); +template void OpDispatchBuilder::SHUFOp<4>(OpcodeArgs); +template void OpDispatchBuilder::SHUFOp<8>(OpcodeArgs); -template +template void OpDispatchBuilder::VSHUFOp(OpcodeArgs) { - OrderedNode *Result = SHUFOpImpl(Op, ElementSize, Op->Src[0], Op->Src[1], Op->Src[2]); + OrderedNode* Result = SHUFOpImpl(Op, ElementSize, Op->Src[0], Op->Src[1], Op->Src[2]); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VSHUFOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::VSHUFOp<8>(OpcodeArgs); +template void OpDispatchBuilder::VSHUFOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VSHUFOp<8>(OpcodeArgs); void OpDispatchBuilder::VANDNOp(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - OrderedNode *Dest = _VBic(SrcSize, SrcSize, Src2, Src1); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Dest = _VBic(SrcSize, SrcSize, Src2, Src1); StoreResult(FPRClass, Op, Dest, -1); } -template +template void OpDispatchBuilder::VHADDPOp(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE; - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); DeriveOp(Res, IROp, _VFAddP(SrcSize, ElementSize, Src1, Src2)); - OrderedNode *Dest = Res; - if (Is256Bit) { + OrderedNode* Dest = Res; + if (Is256Bit) { Dest = _VInsElement(SrcSize, 8, 1, 2, Res, Res); Dest = _VInsElement(SrcSize, 8, 2, 1, Dest, Res); } @@ -1740,27 +1506,22 @@ void OpDispatchBuilder::VHADDPOp(OpcodeArgs) { StoreResult(FPRClass, Op, Dest, -1); } -template -void OpDispatchBuilder::VHADDPOp(OpcodeArgs); -template -void OpDispatchBuilder::VHADDPOp(OpcodeArgs); -template -void OpDispatchBuilder::VHADDPOp(OpcodeArgs); -template -void OpDispatchBuilder::VHADDPOp(OpcodeArgs); +template void OpDispatchBuilder::VHADDPOp(OpcodeArgs); +template void OpDispatchBuilder::VHADDPOp(OpcodeArgs); +template void OpDispatchBuilder::VHADDPOp(OpcodeArgs); +template void OpDispatchBuilder::VHADDPOp(OpcodeArgs); -template +template void OpDispatchBuilder::VBROADCASTOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); - OrderedNode *Result{}; + OrderedNode* Result {}; if (Op->Src[0].IsGPR()) { - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Result = _VDupElement(DstSize, ElementSize, Src, 0); } else { // Get the address to broadcast from into a GPR. - OrderedNode *Address = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], CTX->GetGPRSize(), Op->Flags, - {.LoadData = false}); + OrderedNode* Address = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], CTX->GetGPRSize(), Op->Flags, {.LoadData = false}); Address = AppendSegmentOffset(Address, Op->Flags); Result = _VBroadcastFromMem(DstSize, ElementSize, Address); @@ -1772,26 +1533,19 @@ void OpDispatchBuilder::VBROADCASTOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VBROADCASTOp<1>(OpcodeArgs); -template -void OpDispatchBuilder::VBROADCASTOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::VBROADCASTOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::VBROADCASTOp<8>(OpcodeArgs); -template -void OpDispatchBuilder::VBROADCASTOp<16>(OpcodeArgs); - -OrderedNode* OpDispatchBuilder::PINSROpImpl(OpcodeArgs, size_t ElementSize, - const X86Tables::DecodedOperand& Src1Op, - const X86Tables::DecodedOperand& Src2Op, - const X86Tables::DecodedOperand& Imm) { +template void OpDispatchBuilder::VBROADCASTOp<1>(OpcodeArgs); +template void OpDispatchBuilder::VBROADCASTOp<2>(OpcodeArgs); +template void OpDispatchBuilder::VBROADCASTOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VBROADCASTOp<8>(OpcodeArgs); +template void OpDispatchBuilder::VBROADCASTOp<16>(OpcodeArgs); + +OrderedNode* OpDispatchBuilder::PINSROpImpl(OpcodeArgs, size_t ElementSize, const X86Tables::DecodedOperand& Src1Op, + const X86Tables::DecodedOperand& Src2Op, const X86Tables::DecodedOperand& Imm) { const auto Size = GetDstSize(Op); const auto NumElements = Size / ElementSize; LOGMAN_THROW_A_FMT(Imm.IsLiteral(), "Imm needs to be literal here"); const uint64_t Index = Imm.Data.Literal.Value & (NumElements - 1); - OrderedNode *Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, Size, Op->Flags); + OrderedNode* Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, Size, Op->Flags); if (Src2Op.IsGPR()) { // If the source is a GPR then convert directly from the GPR. @@ -1806,21 +1560,17 @@ OrderedNode* OpDispatchBuilder::PINSROpImpl(OpcodeArgs, size_t ElementSize, template void OpDispatchBuilder::PINSROp(OpcodeArgs) { - OrderedNode *Result = PINSROpImpl(Op, ElementSize, Op->Dest, Op->Src[0], Op->Src[1]); + OrderedNode* Result = PINSROpImpl(Op, ElementSize, Op->Dest, Op->Src[0], Op->Src[1]); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::PINSROp<1>(OpcodeArgs); -template -void OpDispatchBuilder::PINSROp<2>(OpcodeArgs); -template -void OpDispatchBuilder::PINSROp<4>(OpcodeArgs); -template -void OpDispatchBuilder::PINSROp<8>(OpcodeArgs); +template void OpDispatchBuilder::PINSROp<1>(OpcodeArgs); +template void OpDispatchBuilder::PINSROp<2>(OpcodeArgs); +template void OpDispatchBuilder::PINSROp<4>(OpcodeArgs); +template void OpDispatchBuilder::PINSROp<8>(OpcodeArgs); void OpDispatchBuilder::VPINSRBOp(OpcodeArgs) { - OrderedNode *Result = PINSROpImpl(Op, 1, Op->Src[0], Op->Src[1], Op->Src[2]); + OrderedNode* Result = PINSROpImpl(Op, 1, Op->Src[0], Op->Src[1], Op->Src[2]); if (Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) { Result = _VMov(16, Result); } @@ -1829,7 +1579,7 @@ void OpDispatchBuilder::VPINSRBOp(OpcodeArgs) { void OpDispatchBuilder::VPINSRDQOp(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); - OrderedNode *Result = PINSROpImpl(Op, SrcSize, Op->Src[0], Op->Src[1], Op->Src[2]); + OrderedNode* Result = PINSROpImpl(Op, SrcSize, Op->Src[0], Op->Src[1], Op->Src[2]); if (Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) { Result = _VMov(16, Result); } @@ -1837,15 +1587,14 @@ void OpDispatchBuilder::VPINSRDQOp(OpcodeArgs) { } void OpDispatchBuilder::VPINSRWOp(OpcodeArgs) { - OrderedNode *Result = PINSROpImpl(Op, 2, Op->Src[0], Op->Src[1], Op->Src[2]); + OrderedNode* Result = PINSROpImpl(Op, 2, Op->Src[0], Op->Src[1], Op->Src[2]); if (Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) { Result = _VMov(16, Result); } StoreResult(FPRClass, Op, Result, -1); } -OrderedNode* OpDispatchBuilder::InsertPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, - const X86Tables::DecodedOperand& Src2, +OrderedNode* OpDispatchBuilder::InsertPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2, const X86Tables::DecodedOperand& Imm) { LOGMAN_THROW_A_FMT(Imm.IsLiteral(), "Imm needs to be literal here"); const uint8_t ImmValue = Imm.Data.Literal.Value; @@ -1855,7 +1604,7 @@ OrderedNode* OpDispatchBuilder::InsertPSOpImpl(OpcodeArgs, const X86Tables::Deco const auto DstSize = GetDstSize(Op); - OrderedNode *Dest{}; + OrderedNode* Dest {}; if (ZMask != 0xF) { // Only need to load destination if it isn't a full zero Dest = LoadSource_WithOpSize(FPRClass, Op, Src1, DstSize, Op->Flags); @@ -1863,7 +1612,7 @@ OrderedNode* OpDispatchBuilder::InsertPSOpImpl(OpcodeArgs, const X86Tables::Deco if ((ZMask & (1 << CountD)) == 0) { // In the case that ZMask overwrites the destination element, then don't even insert - OrderedNode *Src{}; + OrderedNode* Src {}; if (Src2.IsGPR()) { Src = LoadSource(FPRClass, Op, Src2, Op->Flags); } else { @@ -1893,12 +1642,12 @@ OrderedNode* OpDispatchBuilder::InsertPSOpImpl(OpcodeArgs, const X86Tables::Deco } void OpDispatchBuilder::InsertPSOp(OpcodeArgs) { - OrderedNode *Result = InsertPSOpImpl(Op, Op->Dest, Op->Src[0], Op->Src[1]); + OrderedNode* Result = InsertPSOpImpl(Op, Op->Dest, Op->Src[0], Op->Src[1]); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::VINSERTPSOp(OpcodeArgs) { - OrderedNode *Result = InsertPSOpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2]); + OrderedNode* Result = InsertPSOpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2]); StoreResult(FPRClass, Op, Result, -1); } @@ -1906,7 +1655,7 @@ template void OpDispatchBuilder::PExtrOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); uint64_t Index = Op->Src[1].Data.Literal.Value; @@ -1926,101 +1675,88 @@ void OpDispatchBuilder::PExtrOp(OpcodeArgs) { if (Op->Dest.IsGPR()) { const uint8_t GPRSize = CTX->GetGPRSize(); // Extract already zero extends the result. - OrderedNode *Result = _VExtractToGPR(16, OverridenElementSize, Src, Index); + OrderedNode* Result = _VExtractToGPR(16, OverridenElementSize, Src, Index); StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Result, GPRSize, -1); return; } // If we are storing to memory then we store the size of the element extracted - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); _VStoreVectorElement(16, OverridenElementSize, Src, Index, Dest); } -template -void OpDispatchBuilder::PExtrOp<1>(OpcodeArgs); -template -void OpDispatchBuilder::PExtrOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::PExtrOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::PExtrOp<8>(OpcodeArgs); +template void OpDispatchBuilder::PExtrOp<1>(OpcodeArgs); +template void OpDispatchBuilder::PExtrOp<2>(OpcodeArgs); +template void OpDispatchBuilder::PExtrOp<4>(OpcodeArgs); +template void OpDispatchBuilder::PExtrOp<8>(OpcodeArgs); void OpDispatchBuilder::VEXTRACT128Op(OpcodeArgs) { const auto DstIsXMM = Op->Dest.IsGPR(); const auto StoreSize = DstIsXMM ? 32 : 16; - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src[1] needs to be literal here"); const auto Selector = Op->Src[1].Data.Literal.Value & 0b1; // A selector of zero is the same as doing a 128-bit vector move. if (Selector == 0) { - OrderedNode *Result = DstIsXMM ? _VMov(16, Src) : Src; + OrderedNode* Result = DstIsXMM ? _VMov(16, Src) : Src; StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, StoreSize, -1); return; } // Otherwise replicate the element and only store the first 128-bits. - OrderedNode *Result = _VDupElement(32, 16, Src, Selector); + OrderedNode* Result = _VDupElement(32, 16, Src, Selector); if (DstIsXMM) { Result = _VMov(16, Result); } StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, StoreSize, -1); } -OrderedNode* OpDispatchBuilder::PSIGNImpl(OpcodeArgs, size_t ElementSize, - OrderedNode *Src1, OrderedNode *Src2) { +OrderedNode* OpDispatchBuilder::PSIGNImpl(OpcodeArgs, size_t ElementSize, OrderedNode* Src1, OrderedNode* Src2) { const auto Size = GetSrcSize(Op); if (CTX->BackendFeatures.SupportsSaturatingRoundingShifts) { - OrderedNode *Control = _VSQSHL(Size, ElementSize, Src2, (ElementSize * 8) - 1); + OrderedNode* Control = _VSQSHL(Size, ElementSize, Src2, (ElementSize * 8) - 1); Control = _VSRSHR(Size, ElementSize, Control, (ElementSize * 8) - 1); return _VMul(Size, ElementSize, Src1, Control); - } - else { + } else { auto NegVec = _VNeg(Size, ElementSize, Src1); - OrderedNode *CmpLT = _VCMPLTZ(Size, ElementSize, Src2); - OrderedNode *CmpEQ = _VCMPEQZ(Size, ElementSize, Src2); + OrderedNode* CmpLT = _VCMPLTZ(Size, ElementSize, Src2); + OrderedNode* CmpEQ = _VCMPEQZ(Size, ElementSize, Src2); auto BSLResult = _VBSL(Size, CmpLT, NegVec, Src1); return _VBic(Size, Size, BSLResult, CmpEQ); } } -template +template void OpDispatchBuilder::PSIGN(OpcodeArgs) { - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); OrderedNode* Res = PSIGNImpl(Op, ElementSize, Dest, Src); StoreResult(FPRClass, Op, Res, -1); } -template -void OpDispatchBuilder::PSIGN<1>(OpcodeArgs); -template -void OpDispatchBuilder::PSIGN<2>(OpcodeArgs); -template -void OpDispatchBuilder::PSIGN<4>(OpcodeArgs); +template void OpDispatchBuilder::PSIGN<1>(OpcodeArgs); +template void OpDispatchBuilder::PSIGN<2>(OpcodeArgs); +template void OpDispatchBuilder::PSIGN<4>(OpcodeArgs); -template +template void OpDispatchBuilder::VPSIGN(OpcodeArgs) { - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); OrderedNode* Res = PSIGNImpl(Op, ElementSize, Src1, Src2); StoreResult(FPRClass, Op, Res, -1); } -template -void OpDispatchBuilder::VPSIGN<1>(OpcodeArgs); -template -void OpDispatchBuilder::VPSIGN<2>(OpcodeArgs); -template -void OpDispatchBuilder::VPSIGN<4>(OpcodeArgs); +template void OpDispatchBuilder::VPSIGN<1>(OpcodeArgs); +template void OpDispatchBuilder::VPSIGN<2>(OpcodeArgs); +template void OpDispatchBuilder::VPSIGN<4>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::PSRLDOpImpl(OpcodeArgs, size_t ElementSize, - OrderedNode *Src, OrderedNode *ShiftVec) { +OrderedNode* OpDispatchBuilder::PSRLDOpImpl(OpcodeArgs, size_t ElementSize, OrderedNode* Src, OrderedNode* ShiftVec) { const auto Size = GetSrcSize(Op); // Incoming element size for the shift source is always 8 @@ -2029,28 +1765,25 @@ OrderedNode* OpDispatchBuilder::PSRLDOpImpl(OpcodeArgs, size_t ElementSize, template void OpDispatchBuilder::PSRLDOp(OpcodeArgs) { - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result = PSRLDOpImpl(Op, ElementSize, Dest, Src); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Result = PSRLDOpImpl(Op, ElementSize, Dest, Src); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::PSRLDOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::PSRLDOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::PSRLDOp<8>(OpcodeArgs); +template void OpDispatchBuilder::PSRLDOp<2>(OpcodeArgs); +template void OpDispatchBuilder::PSRLDOp<4>(OpcodeArgs); +template void OpDispatchBuilder::PSRLDOp<8>(OpcodeArgs); -template +template void OpDispatchBuilder::VPSRLDOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Shift = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - OrderedNode *Result = PSRLDOpImpl(Op, ElementSize, Src, Shift); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Shift = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Result = PSRLDOpImpl(Op, ElementSize, Src, Shift); if (Is128Bit) { Result = _VMov(16, Result); @@ -2058,12 +1791,9 @@ void OpDispatchBuilder::VPSRLDOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VPSRLDOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::VPSRLDOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::VPSRLDOp<8>(OpcodeArgs); +template void OpDispatchBuilder::VPSRLDOp<2>(OpcodeArgs); +template void OpDispatchBuilder::VPSRLDOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VPSRLDOp<8>(OpcodeArgs); template void OpDispatchBuilder::PSRLI(OpcodeArgs) { @@ -2076,19 +1806,16 @@ void OpDispatchBuilder::PSRLI(OpcodeArgs) { const auto Size = GetSrcSize(Op); - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Shift = _VUShrI(Size, ElementSize, Dest, ShiftConstant); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Shift = _VUShrI(Size, ElementSize, Dest, ShiftConstant); StoreResult(FPRClass, Op, Shift, -1); } -template -void OpDispatchBuilder::PSRLI<2>(OpcodeArgs); -template -void OpDispatchBuilder::PSRLI<4>(OpcodeArgs); -template -void OpDispatchBuilder::PSRLI<8>(OpcodeArgs); +template void OpDispatchBuilder::PSRLI<2>(OpcodeArgs); +template void OpDispatchBuilder::PSRLI<4>(OpcodeArgs); +template void OpDispatchBuilder::PSRLI<8>(OpcodeArgs); -template +template void OpDispatchBuilder::VPSRLIOp(OpcodeArgs) { const auto Size = GetSrcSize(Op); const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE; @@ -2096,8 +1823,8 @@ void OpDispatchBuilder::VPSRLIOp(OpcodeArgs) { LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); const uint64_t ShiftConstant = Op->Src[1].Data.Literal.Value; - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result = Src; + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Result = Src; if (ShiftConstant != 0) [[likely]] { Result = _VUShrI(Size, ElementSize, Src, ShiftConstant); @@ -2110,15 +1837,11 @@ void OpDispatchBuilder::VPSRLIOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VPSRLIOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::VPSRLIOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::VPSRLIOp<8>(OpcodeArgs); +template void OpDispatchBuilder::VPSRLIOp<2>(OpcodeArgs); +template void OpDispatchBuilder::VPSRLIOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VPSRLIOp<8>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::PSLLIImpl(OpcodeArgs, size_t ElementSize, - OrderedNode *Src, uint64_t Shift) { +OrderedNode* OpDispatchBuilder::PSLLIImpl(OpcodeArgs, size_t ElementSize, OrderedNode* Src, uint64_t Shift) { if (Shift == 0) [[unlikely]] { // If zero-shift then just return the source. return Src; @@ -2136,28 +1859,25 @@ void OpDispatchBuilder::PSLLI(OpcodeArgs) { return; } - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Result = PSLLIImpl(Op, ElementSize, Dest, ShiftConstant); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Result = PSLLIImpl(Op, ElementSize, Dest, ShiftConstant); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::PSLLI<2>(OpcodeArgs); -template -void OpDispatchBuilder::PSLLI<4>(OpcodeArgs); -template -void OpDispatchBuilder::PSLLI<8>(OpcodeArgs); +template void OpDispatchBuilder::PSLLI<2>(OpcodeArgs); +template void OpDispatchBuilder::PSLLI<4>(OpcodeArgs); +template void OpDispatchBuilder::PSLLI<8>(OpcodeArgs); -template +template void OpDispatchBuilder::VPSLLIOp(OpcodeArgs) { LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); const uint64_t ShiftConstant = Op->Src[1].Data.Literal.Value; const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result = PSLLIImpl(Op, ElementSize, Src, ShiftConstant); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Result = PSLLIImpl(Op, ElementSize, Src, ShiftConstant); if (ShiftConstant == 0 && Is128Bit) { Result = _VMov(16, Result); } @@ -2165,15 +1885,11 @@ void OpDispatchBuilder::VPSLLIOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VPSLLIOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::VPSLLIOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::VPSLLIOp<8>(OpcodeArgs); +template void OpDispatchBuilder::VPSLLIOp<2>(OpcodeArgs); +template void OpDispatchBuilder::VPSLLIOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VPSLLIOp<8>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::PSLLImpl(OpcodeArgs, size_t ElementSize, - OrderedNode *Src, OrderedNode *ShiftVec) { +OrderedNode* OpDispatchBuilder::PSLLImpl(OpcodeArgs, size_t ElementSize, OrderedNode* Src, OrderedNode* ShiftVec) { const auto Size = GetDstSize(Op); // Incoming element size for the shift source is always 8 @@ -2182,28 +1898,25 @@ OrderedNode* OpDispatchBuilder::PSLLImpl(OpcodeArgs, size_t ElementSize, template void OpDispatchBuilder::PSLL(OpcodeArgs) { - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result = PSLLImpl(Op, ElementSize, Dest, Src); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Result = PSLLImpl(Op, ElementSize, Dest, Src); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::PSLL<2>(OpcodeArgs); -template -void OpDispatchBuilder::PSLL<4>(OpcodeArgs); -template -void OpDispatchBuilder::PSLL<8>(OpcodeArgs); +template void OpDispatchBuilder::PSLL<2>(OpcodeArgs); +template void OpDispatchBuilder::PSLL<4>(OpcodeArgs); +template void OpDispatchBuilder::PSLL<8>(OpcodeArgs); -template +template void OpDispatchBuilder::VPSLLOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], 16, Op->Flags); - OrderedNode *Result = PSLLImpl(Op, ElementSize, Src1, Src2); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], 16, Op->Flags); + OrderedNode* Result = PSLLImpl(Op, ElementSize, Src1, Src2); if (Is128Bit) { Result = _VMov(16, Result); @@ -2211,15 +1924,11 @@ void OpDispatchBuilder::VPSLLOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VPSLLOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::VPSLLOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::VPSLLOp<8>(OpcodeArgs); +template void OpDispatchBuilder::VPSLLOp<2>(OpcodeArgs); +template void OpDispatchBuilder::VPSLLOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VPSLLOp<8>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::PSRAOpImpl(OpcodeArgs, size_t ElementSize, - OrderedNode *Src, OrderedNode *ShiftVec) { +OrderedNode* OpDispatchBuilder::PSRAOpImpl(OpcodeArgs, size_t ElementSize, OrderedNode* Src, OrderedNode* ShiftVec) { const auto Size = GetDstSize(Op); // Incoming element size for the shift source is always 8 @@ -2228,26 +1937,24 @@ OrderedNode* OpDispatchBuilder::PSRAOpImpl(OpcodeArgs, size_t ElementSize, template void OpDispatchBuilder::PSRAOp(OpcodeArgs) { - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result = PSRAOpImpl(Op, ElementSize, Dest, Src); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Result = PSRAOpImpl(Op, ElementSize, Dest, Src); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::PSRAOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::PSRAOp<4>(OpcodeArgs); +template void OpDispatchBuilder::PSRAOp<2>(OpcodeArgs); +template void OpDispatchBuilder::PSRAOp<4>(OpcodeArgs); -template +template void OpDispatchBuilder::VPSRAOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - OrderedNode *Result = PSRAOpImpl(Op, ElementSize, Src1, Src2); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Result = PSRAOpImpl(Op, ElementSize, Src1, Src2); if (Is128Bit) { Result = _VMov(16, Result); @@ -2255,10 +1962,8 @@ void OpDispatchBuilder::VPSRAOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VPSRAOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::VPSRAOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VPSRAOp<2>(OpcodeArgs); +template void OpDispatchBuilder::VPSRAOp<4>(OpcodeArgs); void OpDispatchBuilder::PSRLDQ(OpcodeArgs) { LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); @@ -2270,8 +1975,8 @@ void OpDispatchBuilder::PSRLDQ(OpcodeArgs) { const auto Size = GetDstSize(Op); - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Result = LoadAndCacheNamedVectorConstant(Size, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Result = LoadAndCacheNamedVectorConstant(Size, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); if (Shift < Size) { Result = _VExtr(Size, 1, Result, Dest, Shift); @@ -2286,9 +1991,9 @@ void OpDispatchBuilder::VPSRLDQOp(OpcodeArgs) { LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); const uint64_t Shift = Op->Src[1].Data.Literal.Value; - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result{}; + OrderedNode* Result {}; if (Shift == 0) [[unlikely]] { if (Is128Bit) { Result = _VMov(16, Src); @@ -2304,8 +2009,8 @@ void OpDispatchBuilder::VPSRLDQOp(OpcodeArgs) { } } else { if (Shift < Core::CPUState::XMM_SSE_REG_SIZE) { - OrderedNode *ResultBottom = _VExtr(16, 1, Result, Src, Shift); - OrderedNode *ResultTop = _VExtr(DstSize, 1, Result, Src, 16 + Shift); + OrderedNode* ResultBottom = _VExtr(16, 1, Result, Src, Shift); + OrderedNode* ResultTop = _VExtr(DstSize, 1, Result, Src, 16 + Shift); Result = _VInsElement(DstSize, 16, 1, 0, ResultBottom, ResultTop); } @@ -2325,8 +2030,8 @@ void OpDispatchBuilder::PSLLDQ(OpcodeArgs) { const auto Size = GetDstSize(Op); - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Result = LoadAndCacheNamedVectorConstant(Size, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Result = LoadAndCacheNamedVectorConstant(Size, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); if (Shift < Size) { Result = _VExtr(Size, 1, Dest, Result, Size - Shift); } @@ -2341,9 +2046,9 @@ void OpDispatchBuilder::VPSLLDQOp(OpcodeArgs) { LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); const uint64_t Shift = Op->Src[1].Data.Literal.Value; - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result = Src; + OrderedNode* Result = Src; if (Shift == 0) { if (Is128Bit) { @@ -2357,8 +2062,8 @@ void OpDispatchBuilder::VPSLLDQOp(OpcodeArgs) { } } else { if (Shift < Core::CPUState::XMM_SSE_REG_SIZE) { - OrderedNode *ResultBottom = _VExtr(16, 1, Src, Result, 16 - Shift); - OrderedNode* ResultTop = _VExtr(DstSize, 1, Src, Result, DstSize - Shift); + OrderedNode* ResultBottom = _VExtr(16, 1, Src, Result, 16 - Shift); + OrderedNode* ResultTop = _VExtr(DstSize, 1, Src, Result, DstSize - Shift); Result = _VInsElement(DstSize, 16, 1, 0, ResultBottom, ResultTop); } @@ -2378,25 +2083,23 @@ void OpDispatchBuilder::PSRAIOp(OpcodeArgs) { const auto Size = GetDstSize(Op); - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Result = _VSShrI(Size, ElementSize, Dest, Shift); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Result = _VSShrI(Size, ElementSize, Dest, Shift); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::PSRAIOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::PSRAIOp<4>(OpcodeArgs); +template void OpDispatchBuilder::PSRAIOp<2>(OpcodeArgs); +template void OpDispatchBuilder::PSRAIOp<4>(OpcodeArgs); -template +template void OpDispatchBuilder::VPSRAIOp(OpcodeArgs) { LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); const uint64_t Shift = Op->Src[1].Data.Literal.Value; const auto Size = GetDstSize(Op); const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE; - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result = Src; + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Result = Src; if (Shift != 0) [[likely]] { Result = _VSShrI(Size, ElementSize, Src, Shift); @@ -2409,17 +2112,15 @@ void OpDispatchBuilder::VPSRAIOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VPSRAIOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::VPSRAIOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VPSRAIOp<2>(OpcodeArgs); +template void OpDispatchBuilder::VPSRAIOp<4>(OpcodeArgs); void OpDispatchBuilder::AVXVariableShiftImpl(OpcodeArgs, IROps IROp) { const auto DstSize = GetDstSize(Op); const auto SrcSize = GetSrcSize(Op); - OrderedNode *Vector = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], DstSize, Op->Flags); - OrderedNode *ShiftVector = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], DstSize, Op->Flags); + OrderedNode* Vector = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], DstSize, Op->Flags); + OrderedNode* ShiftVector = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], DstSize, Op->Flags); DeriveOp(Shift, IROp, _VUShr(DstSize, SrcSize, Vector, ShiftVector, true)); @@ -2443,8 +2144,8 @@ void OpDispatchBuilder::MOVDDUPOp(OpcodeArgs) { // unnecessarily zero extend the vector. Otherwise, if // memory, then we want to load the element size exactly. const auto SrcSize = Op->Src[0].IsGPR() ? 16U : GetSrcSize(Op); - OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); - OrderedNode *Res = _VDupElement(16, GetSrcSize(Op), Src, 0); + OrderedNode* Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); + OrderedNode* Res = _VDupElement(16, GetSrcSize(Op), Src, 0); StoreResult(FPRClass, Op, Res, -1); } @@ -2455,10 +2156,10 @@ void OpDispatchBuilder::VMOVDDUPOp(OpcodeArgs) { const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE; const auto MemSize = Is256Bit ? 32 : 8; - OrderedNode *Src = IsSrcGPR ? LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags) - : LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], MemSize, Op->Flags); + OrderedNode* Src = IsSrcGPR ? LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags) : + LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], MemSize, Op->Flags); - OrderedNode *Res{}; + OrderedNode* Res {}; if (Is256Bit) { Res = _VTrn(SrcSize, 8, Src, Src); } else { @@ -2468,26 +2169,23 @@ void OpDispatchBuilder::VMOVDDUPOp(OpcodeArgs) { StoreResult(FPRClass, Op, Res, -1); } -OrderedNode* OpDispatchBuilder::CVTGPR_To_FPRImpl(OpcodeArgs, size_t DstElementSize, - const X86Tables::DecodedOperand& Src1Op, +OrderedNode* OpDispatchBuilder::CVTGPR_To_FPRImpl(OpcodeArgs, size_t DstElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op) { const auto SrcSize = GetSrcSize(Op); - OrderedNode *Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, 16, Op->Flags); - OrderedNode *Converted{}; + OrderedNode* Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, 16, Op->Flags); + OrderedNode* Converted {}; if (Src2Op.IsGPR()) { // If the source is a GPR then convert directly from the GPR. auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Src2Op, CTX->GetGPRSize(), Op->Flags); Converted = _Float_FromGPR_S(DstElementSize, SrcSize, Src2); - } - else if (SrcSize != DstElementSize) { + } else if (SrcSize != DstElementSize) { // If the source is from memory but the Source size and destination size aren't the same, // then it is more optimal to load in to a GPR and convert between GPR->FPR. // ARM GPR->FPR conversion supports different size source and destinations while FPR->FPR doesn't. auto Src2 = LoadSource(GPRClass, Op, Src2Op, Op->Flags); Converted = _Float_FromGPR_S(DstElementSize, SrcSize, Src2); - } - else { + } else { // In the case of cvtsi2s{s,d} where the source and destination are the same size, // then it is more optimal to load in to the FPR register directly and convert there. auto Src2 = LoadSource(FPRClass, Op, Src2Op, Op->Flags); @@ -2499,24 +2197,20 @@ OrderedNode* OpDispatchBuilder::CVTGPR_To_FPRImpl(OpcodeArgs, size_t DstElementS template void OpDispatchBuilder::CVTGPR_To_FPR(OpcodeArgs) { - OrderedNode *Result = CVTGPR_To_FPRImpl(Op, DstElementSize, Op->Dest, Op->Src[0]); + OrderedNode* Result = CVTGPR_To_FPRImpl(Op, DstElementSize, Op->Dest, Op->Src[0]); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::CVTGPR_To_FPR<4>(OpcodeArgs); -template -void OpDispatchBuilder::CVTGPR_To_FPR<8>(OpcodeArgs); +template void OpDispatchBuilder::CVTGPR_To_FPR<4>(OpcodeArgs); +template void OpDispatchBuilder::CVTGPR_To_FPR<8>(OpcodeArgs); -template +template void OpDispatchBuilder::AVXCVTGPR_To_FPR(OpcodeArgs) { - OrderedNode *Result = CVTGPR_To_FPRImpl(Op, DstElementSize, Op->Src[0], Op->Src[1]); + OrderedNode* Result = CVTGPR_To_FPRImpl(Op, DstElementSize, Op->Src[0], Op->Src[1]); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::AVXCVTGPR_To_FPR<4>(OpcodeArgs); -template -void OpDispatchBuilder::AVXCVTGPR_To_FPR<8>(OpcodeArgs); +template void OpDispatchBuilder::AVXCVTGPR_To_FPR<4>(OpcodeArgs); +template void OpDispatchBuilder::AVXCVTGPR_To_FPR<8>(OpcodeArgs); template void OpDispatchBuilder::CVTFPR_To_GPR(OpcodeArgs) { @@ -2524,7 +2218,7 @@ void OpDispatchBuilder::CVTFPR_To_GPR(OpcodeArgs) { // unnecessarily zero extend the vector. Otherwise, if // memory, then we want to load the element size exactly. const auto SrcSize = Op->Src[0].IsGPR() ? 16U : GetSrcSize(Op); - OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); + OrderedNode* Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); // GPR size is determined by REX.W // Source Element size is determined by instruction @@ -2532,28 +2226,23 @@ void OpDispatchBuilder::CVTFPR_To_GPR(OpcodeArgs) { if constexpr (HostRoundingMode) { Src = _Float_ToGPR_S(GPRSize, SrcElementSize, Src); - } - else { + } else { Src = _Float_ToGPR_ZS(GPRSize, SrcElementSize, Src); } StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Src, GPRSize, -1); } -template -void OpDispatchBuilder::CVTFPR_To_GPR<4, true>(OpcodeArgs); -template -void OpDispatchBuilder::CVTFPR_To_GPR<4, false>(OpcodeArgs); +template void OpDispatchBuilder::CVTFPR_To_GPR<4, true>(OpcodeArgs); +template void OpDispatchBuilder::CVTFPR_To_GPR<4, false>(OpcodeArgs); -template -void OpDispatchBuilder::CVTFPR_To_GPR<8, true>(OpcodeArgs); -template -void OpDispatchBuilder::CVTFPR_To_GPR<8, false>(OpcodeArgs); +template void OpDispatchBuilder::CVTFPR_To_GPR<8, true>(OpcodeArgs); +template void OpDispatchBuilder::CVTFPR_To_GPR<8, false>(OpcodeArgs); OrderedNode* OpDispatchBuilder::Vector_CVT_Int_To_FloatImpl(OpcodeArgs, size_t SrcElementSize, bool Widen) { const size_t Size = GetDstSize(Op); - OrderedNode *Src = [&] { + OrderedNode* Src = [&] { if (Widen) { // If loading a vector, use the full size, so we don't // unnecessarily zero extend the vector. Otherwise, if @@ -2576,32 +2265,27 @@ OrderedNode* OpDispatchBuilder::Vector_CVT_Int_To_FloatImpl(OpcodeArgs, size_t S template void OpDispatchBuilder::Vector_CVT_Int_To_Float(OpcodeArgs) { - OrderedNode *Result = Vector_CVT_Int_To_FloatImpl(Op, SrcElementSize, Widen); + OrderedNode* Result = Vector_CVT_Int_To_FloatImpl(Op, SrcElementSize, Widen); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::Vector_CVT_Int_To_Float<4, true>(OpcodeArgs); -template -void OpDispatchBuilder::Vector_CVT_Int_To_Float<4, false>(OpcodeArgs); +template void OpDispatchBuilder::Vector_CVT_Int_To_Float<4, true>(OpcodeArgs); +template void OpDispatchBuilder::Vector_CVT_Int_To_Float<4, false>(OpcodeArgs); -template +template void OpDispatchBuilder::AVXVector_CVT_Int_To_Float(OpcodeArgs) { - OrderedNode *Result = Vector_CVT_Int_To_FloatImpl(Op, SrcElementSize, Widen); + OrderedNode* Result = Vector_CVT_Int_To_FloatImpl(Op, SrcElementSize, Widen); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::AVXVector_CVT_Int_To_Float<4, false>(OpcodeArgs); -template -void OpDispatchBuilder::AVXVector_CVT_Int_To_Float<4, true>(OpcodeArgs); +template void OpDispatchBuilder::AVXVector_CVT_Int_To_Float<4, false>(OpcodeArgs); +template void OpDispatchBuilder::AVXVector_CVT_Int_To_Float<4, true>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::Vector_CVT_Float_To_IntImpl(OpcodeArgs, size_t SrcElementSize, - bool Narrow, bool HostRoundingMode) { +OrderedNode* OpDispatchBuilder::Vector_CVT_Float_To_IntImpl(OpcodeArgs, size_t SrcElementSize, bool Narrow, bool HostRoundingMode) { const size_t DstSize = GetDstSize(Op); size_t ElementSize = SrcElementSize; - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); if (Narrow) { Src = _Vector_FToF(DstSize, SrcElementSize >> 1, Src, SrcElementSize); @@ -2618,31 +2302,26 @@ OrderedNode* OpDispatchBuilder::Vector_CVT_Float_To_IntImpl(OpcodeArgs, size_t S template void OpDispatchBuilder::Vector_CVT_Float_To_Int(OpcodeArgs) { const size_t DstSize = GetDstSize(Op); - OrderedNode *Result = Vector_CVT_Float_To_IntImpl(Op, SrcElementSize, Narrow, HostRoundingMode); + OrderedNode* Result = Vector_CVT_Float_To_IntImpl(Op, SrcElementSize, Narrow, HostRoundingMode); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); } -template -void OpDispatchBuilder::Vector_CVT_Float_To_Int<4, false, false>(OpcodeArgs); -template -void OpDispatchBuilder::Vector_CVT_Float_To_Int<4, false, true>(OpcodeArgs); -template -void OpDispatchBuilder::Vector_CVT_Float_To_Int<4, true, false>(OpcodeArgs); +template void OpDispatchBuilder::Vector_CVT_Float_To_Int<4, false, false>(OpcodeArgs); +template void OpDispatchBuilder::Vector_CVT_Float_To_Int<4, false, true>(OpcodeArgs); +template void OpDispatchBuilder::Vector_CVT_Float_To_Int<4, true, false>(OpcodeArgs); -template -void OpDispatchBuilder::Vector_CVT_Float_To_Int<8, true, true>(OpcodeArgs); -template -void OpDispatchBuilder::Vector_CVT_Float_To_Int<8, true, false>(OpcodeArgs); +template void OpDispatchBuilder::Vector_CVT_Float_To_Int<8, true, true>(OpcodeArgs); +template void OpDispatchBuilder::Vector_CVT_Float_To_Int<8, true, false>(OpcodeArgs); -template +template void OpDispatchBuilder::AVXVector_CVT_Float_To_Int(OpcodeArgs) { const auto DstSize = GetDstSize(Op); // VCVTPD2DQ/VCVTTPD2DQ only use the bottom lane, even for the 256-bit version. const auto Truncate = SrcElementSize == 8 && Narrow; - OrderedNode *Result = Vector_CVT_Float_To_IntImpl(Op, SrcElementSize, Narrow, HostRoundingMode); + OrderedNode* Result = Vector_CVT_Float_To_IntImpl(Op, SrcElementSize, Narrow, HostRoundingMode); if (Truncate) { Result = _VMov(16, Result); @@ -2650,53 +2329,44 @@ void OpDispatchBuilder::AVXVector_CVT_Float_To_Int(OpcodeArgs) { StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); } -template -void OpDispatchBuilder::AVXVector_CVT_Float_To_Int<4, false, false>(OpcodeArgs); -template -void OpDispatchBuilder::AVXVector_CVT_Float_To_Int<4, false, true>(OpcodeArgs); +template void OpDispatchBuilder::AVXVector_CVT_Float_To_Int<4, false, false>(OpcodeArgs); +template void OpDispatchBuilder::AVXVector_CVT_Float_To_Int<4, false, true>(OpcodeArgs); -template -void OpDispatchBuilder::AVXVector_CVT_Float_To_Int<8, true, false>(OpcodeArgs); -template -void OpDispatchBuilder::AVXVector_CVT_Float_To_Int<8, true, true>(OpcodeArgs); +template void OpDispatchBuilder::AVXVector_CVT_Float_To_Int<8, true, false>(OpcodeArgs); +template void OpDispatchBuilder::AVXVector_CVT_Float_To_Int<8, true, true>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::Scalar_CVT_Float_To_FloatImpl(OpcodeArgs, size_t DstElementSize, size_t SrcElementSize, - const X86Tables::DecodedOperand& Src1Op, - const X86Tables::DecodedOperand& Src2Op) { +OrderedNode* OpDispatchBuilder::Scalar_CVT_Float_To_FloatImpl( + OpcodeArgs, size_t DstElementSize, size_t SrcElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op) { // In the case of vectors, we can just specify the full vector length, // so that we don't unnecessarily zero-extend the entire vector. // Otherwise, if it's a memory load, then we only want to load its exact size. const auto Src2Size = Src2Op.IsGPR() ? 16U : SrcElementSize; - OrderedNode *Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, 16, Op->Flags); - OrderedNode *Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, Src2Size, Op->Flags); + OrderedNode* Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, 16, Op->Flags); + OrderedNode* Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, Src2Size, Op->Flags); - OrderedNode *Converted = _Float_FToF(DstElementSize, SrcElementSize, Src2); + OrderedNode* Converted = _Float_FToF(DstElementSize, SrcElementSize, Src2); return _VInsElement(16, DstElementSize, 0, 0, Src1, Converted); } template void OpDispatchBuilder::Scalar_CVT_Float_To_Float(OpcodeArgs) { - OrderedNode *Result = Scalar_CVT_Float_To_FloatImpl(Op, DstElementSize, SrcElementSize, Op->Dest, Op->Src[0]); + OrderedNode* Result = Scalar_CVT_Float_To_FloatImpl(Op, DstElementSize, SrcElementSize, Op->Dest, Op->Src[0]); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::Scalar_CVT_Float_To_Float<4, 8>(OpcodeArgs); -template -void OpDispatchBuilder::Scalar_CVT_Float_To_Float<8, 4>(OpcodeArgs); +template void OpDispatchBuilder::Scalar_CVT_Float_To_Float<4, 8>(OpcodeArgs); +template void OpDispatchBuilder::Scalar_CVT_Float_To_Float<8, 4>(OpcodeArgs); -template +template void OpDispatchBuilder::AVXScalar_CVT_Float_To_Float(OpcodeArgs) { - OrderedNode *Result = Scalar_CVT_Float_To_FloatImpl(Op, DstElementSize, SrcElementSize, Op->Src[0], Op->Src[1]); + OrderedNode* Result = Scalar_CVT_Float_To_FloatImpl(Op, DstElementSize, SrcElementSize, Op->Src[0], Op->Src[1]); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::AVXScalar_CVT_Float_To_Float<4, 8>(OpcodeArgs); -template -void OpDispatchBuilder::AVXScalar_CVT_Float_To_Float<8, 4>(OpcodeArgs); +template void OpDispatchBuilder::AVXScalar_CVT_Float_To_Float<4, 8>(OpcodeArgs); +template void OpDispatchBuilder::AVXScalar_CVT_Float_To_Float<8, 4>(OpcodeArgs); void OpDispatchBuilder::Vector_CVT_Float_To_FloatImpl(OpcodeArgs, size_t DstElementSize, size_t SrcElementSize, bool IsAVX) { const auto SrcSize = GetSrcSize(Op); @@ -2704,13 +2374,11 @@ void OpDispatchBuilder::Vector_CVT_Float_To_FloatImpl(OpcodeArgs, size_t DstElem const auto IsFloatSrc = SrcElementSize == 4; const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; - const auto LoadSize = IsFloatSrc && !Op->Src[0].IsGPR() ? - SrcSize / 2 : - SrcSize; + const auto LoadSize = IsFloatSrc && !Op->Src[0].IsGPR() ? SrcSize / 2 : SrcSize; - OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], LoadSize, Op->Flags); + OrderedNode* Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], LoadSize, Op->Flags); - OrderedNode *Result{}; + OrderedNode* Result {}; if (DstElementSize > SrcElementSize) { Result = _Vector_FToF(SrcSize, SrcElementSize << 1, Src, SrcElementSize); } else { @@ -2733,22 +2401,18 @@ template void OpDispatchBuilder::Vector_CVT_Float_To_Float(OpcodeArgs) { Vector_CVT_Float_To_FloatImpl(Op, DstElementSize, SrcElementSize, false); } -template -void OpDispatchBuilder::Vector_CVT_Float_To_Float<4, 8>(OpcodeArgs); -template -void OpDispatchBuilder::Vector_CVT_Float_To_Float<8, 4>(OpcodeArgs); +template void OpDispatchBuilder::Vector_CVT_Float_To_Float<4, 8>(OpcodeArgs); +template void OpDispatchBuilder::Vector_CVT_Float_To_Float<8, 4>(OpcodeArgs); template void OpDispatchBuilder::AVXVector_CVT_Float_To_Float(OpcodeArgs) { Vector_CVT_Float_To_FloatImpl(Op, DstElementSize, SrcElementSize, true); } -template -void OpDispatchBuilder::AVXVector_CVT_Float_To_Float<4, 8>(OpcodeArgs); -template -void OpDispatchBuilder::AVXVector_CVT_Float_To_Float<8, 4>(OpcodeArgs); +template void OpDispatchBuilder::AVXVector_CVT_Float_To_Float<4, 8>(OpcodeArgs); +template void OpDispatchBuilder::AVXVector_CVT_Float_To_Float<8, 4>(OpcodeArgs); void OpDispatchBuilder::MMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs) { - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); // Always 32-bit. size_t ElementSize = 4; @@ -2769,7 +2433,7 @@ void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs) { // unnecessarily zero extend the vector. Otherwise, if // memory, then we want to load the element size exactly. const auto SrcSize = Op->Src[0].IsGPR() ? 16U : GetSrcSize(Op); - OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); + OrderedNode* Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); size_t ElementSize = SrcElementSize; size_t Size = GetDstSize(Op); @@ -2781,32 +2445,27 @@ void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs) { if constexpr (HostRoundingMode) { Src = _Vector_FToS(Size, ElementSize, Src); - } - else { + } else { Src = _Vector_FToZS(Size, ElementSize, Src); } StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, Size, -1); } -template -void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<4, false, false>(OpcodeArgs); -template -void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<4, false, true>(OpcodeArgs); -template -void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<8, true, false>(OpcodeArgs); -template -void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<8, true, true>(OpcodeArgs); +template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<4, false, false>(OpcodeArgs); +template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<4, false, true>(OpcodeArgs); +template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<8, true, false>(OpcodeArgs); +template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<8, true, true>(OpcodeArgs); void OpDispatchBuilder::MASKMOVOp(OpcodeArgs) { const auto Size = GetSrcSize(Op); - OrderedNode *MaskSrc = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* MaskSrc = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); // Mask only cares about the top bit of each byte MaskSrc = _VCMPLTZ(Size, 1, MaskSrc); // Vector that will overwrite byte elements. - OrderedNode *VectorSrc = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* VectorSrc = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); // RDI source auto MemDest = LoadGPRRegister(X86State::REG_RDI); @@ -2814,7 +2473,7 @@ void OpDispatchBuilder::MASKMOVOp(OpcodeArgs) { // DS prefix by default. MemDest = AppendSegmentOffset(MemDest, Op->Flags, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX); - OrderedNode *XMMReg = _LoadMem(FPRClass, Size, MemDest, 1); + OrderedNode* XMMReg = _LoadMem(FPRClass, Size, MemDest, 1); // If the Mask element high bit is set then overwrite the element with the source, else keep the memory variant XMMReg = _VBSL(Size, MaskSrc, VectorSrc, XMMReg); @@ -2822,26 +2481,24 @@ void OpDispatchBuilder::MASKMOVOp(OpcodeArgs) { } void OpDispatchBuilder::VMASKMOVOpImpl(OpcodeArgs, size_t ElementSize, size_t DataSize, bool IsStore, - const X86Tables::DecodedOperand& MaskOp, - const X86Tables::DecodedOperand& DataOp) { + const X86Tables::DecodedOperand& MaskOp, const X86Tables::DecodedOperand& DataOp) { const auto MakeAddress = [this, Op](const X86Tables::DecodedOperand& Data) { - OrderedNode *BaseAddr = LoadSource_WithOpSize(GPRClass, Op, Data, CTX->GetGPRSize(), Op->Flags, - {.LoadData = false}); + OrderedNode* BaseAddr = LoadSource_WithOpSize(GPRClass, Op, Data, CTX->GetGPRSize(), Op->Flags, {.LoadData = false}); return AppendSegmentOffset(BaseAddr, Op->Flags); }; - OrderedNode *Mask = LoadSource_WithOpSize(FPRClass, Op, MaskOp, DataSize, Op->Flags); + OrderedNode* Mask = LoadSource_WithOpSize(FPRClass, Op, MaskOp, DataSize, Op->Flags); if (IsStore) { - OrderedNode *Data = LoadSource_WithOpSize(FPRClass, Op, DataOp, DataSize, Op->Flags); - OrderedNode *Address = MakeAddress(Op->Dest); + OrderedNode* Data = LoadSource_WithOpSize(FPRClass, Op, DataOp, DataSize, Op->Flags); + OrderedNode* Address = MakeAddress(Op->Dest); _VStoreVectorMasked(DataSize, ElementSize, Mask, Data, Address, Invalid(), MEM_OFFSET_SXTX, 1); } else { const auto Is128Bit = GetDstSize(Op) == Core::CPUState::XMM_SSE_REG_SIZE; - OrderedNode *Address = MakeAddress(DataOp); - OrderedNode *Result = _VLoadVectorMasked(DataSize, ElementSize, Mask, Address, Invalid(), MEM_OFFSET_SXTX, 1); + OrderedNode* Address = MakeAddress(DataOp); + OrderedNode* Result = _VLoadVectorMasked(DataSize, ElementSize, Mask, Address, Invalid(), MEM_OFFSET_SXTX, 1); if (Is128Bit) { Result = _VMov(16, Result); @@ -2850,96 +2507,107 @@ void OpDispatchBuilder::VMASKMOVOpImpl(OpcodeArgs, size_t ElementSize, size_t Da } } -template +template void OpDispatchBuilder::VMASKMOVOp(OpcodeArgs) { VMASKMOVOpImpl(Op, ElementSize, GetDstSize(Op), IsStore, Op->Src[0], Op->Src[1]); } -template -void OpDispatchBuilder::VMASKMOVOp<4, false>(OpcodeArgs); -template -void OpDispatchBuilder::VMASKMOVOp<4, true>(OpcodeArgs); -template -void OpDispatchBuilder::VMASKMOVOp<8, false>(OpcodeArgs); -template -void OpDispatchBuilder::VMASKMOVOp<8, true>(OpcodeArgs); +template void OpDispatchBuilder::VMASKMOVOp<4, false>(OpcodeArgs); +template void OpDispatchBuilder::VMASKMOVOp<4, true>(OpcodeArgs); +template void OpDispatchBuilder::VMASKMOVOp<8, false>(OpcodeArgs); +template void OpDispatchBuilder::VMASKMOVOp<8, true>(OpcodeArgs); -template +template void OpDispatchBuilder::VPMASKMOVOp(OpcodeArgs) { VMASKMOVOpImpl(Op, GetSrcSize(Op), GetDstSize(Op), IsStore, Op->Src[0], Op->Src[1]); } -template -void OpDispatchBuilder::VPMASKMOVOp(OpcodeArgs); -template -void OpDispatchBuilder::VPMASKMOVOp(OpcodeArgs); +template void OpDispatchBuilder::VPMASKMOVOp(OpcodeArgs); +template void OpDispatchBuilder::VPMASKMOVOp(OpcodeArgs); void OpDispatchBuilder::MOVBetweenGPR_FPR(OpcodeArgs) { - if (Op->Dest.IsGPR() && - Op->Dest.Data.GPR.GPR >= FEXCore::X86State::REG_XMM_0) { + if (Op->Dest.IsGPR() && Op->Dest.Data.GPR.GPR >= FEXCore::X86State::REG_XMM_0) { if (Op->Src[0].IsGPR()) { // Loading from GPR and moving to Vector. - OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], CTX->GetGPRSize(), Op->Flags); + OrderedNode* Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], CTX->GetGPRSize(), Op->Flags); // zext to 128bit auto Converted = _VCastFromGPR(16, GetSrcSize(Op), Src); StoreResult(FPRClass, Op, Op->Dest, Converted, -1); - } - else { + } else { // Loading from Memory as a scalar. Zero extend - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); StoreResult(FPRClass, Op, Op->Dest, Src, -1); } - } - else { - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0],Op->Flags); + } else { + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); if (Op->Dest.IsGPR()) { auto ElementSize = GetDstSize(Op); // Extract element from GPR. Zero extending in the process. Src = _VExtractToGPR(GetSrcSize(Op), ElementSize, Src, 0); StoreResult(GPRClass, Op, Op->Dest, Src, -1); - } - else { + } else { // Storing first element to memory. - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); _StoreMem(FPRClass, GetDstSize(Op), Dest, Src, 1); } } } -OrderedNode* OpDispatchBuilder::VFCMPOpImpl(OpcodeArgs, size_t ElementSize, - OrderedNode *Src1, OrderedNode *Src2, uint8_t CompType) { +OrderedNode* OpDispatchBuilder::VFCMPOpImpl(OpcodeArgs, size_t ElementSize, OrderedNode* Src1, OrderedNode* Src2, uint8_t CompType) { const auto Size = GetSrcSize(Op); - OrderedNode *Result{}; + OrderedNode* Result {}; switch (CompType) { - case 0x00: case 0x08: case 0x10: case 0x18: // EQ - Result = _VFCMPEQ(Size, ElementSize, Src1, Src2); - break; - case 0x01: case 0x09: case 0x11: case 0x19: // LT, GT(Swapped operand) - Result = _VFCMPLT(Size, ElementSize, Src1, Src2); - break; - case 0x02: case 0x0A: case 0x12: case 0x1A: // LE, GE(Swapped operand) - Result = _VFCMPLE(Size, ElementSize, Src1, Src2); - break; - case 0x03: case 0x0B: case 0x13: case 0x1B: // Unordered - Result = _VFCMPUNO(Size, ElementSize, Src1, Src2); - break; - case 0x04: case 0x0C: case 0x14: case 0x1C: // NEQ - Result = _VFCMPNEQ(Size, ElementSize, Src1, Src2); - break; - case 0x05: case 0x0D: case 0x15: case 0x1D: // NLT, NGT(Swapped operand) - Result = _VFCMPLT(Size, ElementSize, Src1, Src2); - Result = _VNot(Size, ElementSize, Result); - break; - case 0x06: case 0x0E: case 0x16: case 0x1E: // NLE, NGE(Swapped operand) - Result = _VFCMPLE(Size, ElementSize, Src1, Src2); - Result = _VNot(Size, ElementSize, Result); - break; - case 0x07: case 0x0F: case 0x17: case 0x1F: // Ordered - Result = _VFCMPORD(Size, ElementSize, Src1, Src2); - break; - default: - LOGMAN_MSG_A_FMT("Unknown Comparison type: {}", CompType); - break; + case 0x00: + case 0x08: + case 0x10: + case 0x18: // EQ + Result = _VFCMPEQ(Size, ElementSize, Src1, Src2); + break; + case 0x01: + case 0x09: + case 0x11: + case 0x19: // LT, GT(Swapped operand) + Result = _VFCMPLT(Size, ElementSize, Src1, Src2); + break; + case 0x02: + case 0x0A: + case 0x12: + case 0x1A: // LE, GE(Swapped operand) + Result = _VFCMPLE(Size, ElementSize, Src1, Src2); + break; + case 0x03: + case 0x0B: + case 0x13: + case 0x1B: // Unordered + Result = _VFCMPUNO(Size, ElementSize, Src1, Src2); + break; + case 0x04: + case 0x0C: + case 0x14: + case 0x1C: // NEQ + Result = _VFCMPNEQ(Size, ElementSize, Src1, Src2); + break; + case 0x05: + case 0x0D: + case 0x15: + case 0x1D: // NLT, NGT(Swapped operand) + Result = _VFCMPLT(Size, ElementSize, Src1, Src2); + Result = _VNot(Size, ElementSize, Result); + break; + case 0x06: + case 0x0E: + case 0x16: + case 0x1E: // NLE, NGE(Swapped operand) + Result = _VFCMPLE(Size, ElementSize, Src1, Src2); + Result = _VNot(Size, ElementSize, Result); + break; + case 0x07: + case 0x0F: + case 0x17: + case 0x1F: // Ordered + Result = _VFCMPORD(Size, ElementSize, Src1, Src2); + break; + default: LOGMAN_MSG_A_FMT("Unknown Comparison type: {}", CompType); break; } return Result; @@ -2952,8 +2620,8 @@ void OpDispatchBuilder::VFCMPOp(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); const auto DstSize = GetDstSize(Op); - OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); - OrderedNode *Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, DstSize, Op->Flags); + OrderedNode* Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); + OrderedNode* Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, DstSize, Op->Flags); const uint8_t CompType = Op->Src[1].Data.Literal.Value; OrderedNode* Result = VFCMPOpImpl(Op, ElementSize, Dest, Src, CompType); @@ -2961,12 +2629,10 @@ void OpDispatchBuilder::VFCMPOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VFCMPOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::VFCMPOp<8>(OpcodeArgs); +template void OpDispatchBuilder::VFCMPOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VFCMPOp<8>(OpcodeArgs); -template +template void OpDispatchBuilder::AVXVFCMPOp(OpcodeArgs) { // No need for zero-extending in the scalar case, since // all we need is an insert at the end of the operation. @@ -2976,20 +2642,18 @@ void OpDispatchBuilder::AVXVFCMPOp(OpcodeArgs) { LOGMAN_THROW_A_FMT(Op->Src[2].IsLiteral(), "Src[2] needs to be literal"); const uint8_t CompType = Op->Src[2].Data.Literal.Value; - OrderedNode *Src1 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], DstSize, Op->Flags); - OrderedNode *Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], SrcSize, Op->Flags); - OrderedNode *Result = VFCMPOpImpl(Op, ElementSize, Src1, Src2, CompType); + OrderedNode* Src1 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], DstSize, Op->Flags); + OrderedNode* Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], SrcSize, Op->Flags); + OrderedNode* Result = VFCMPOpImpl(Op, ElementSize, Src1, Src2, CompType); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::AVXVFCMPOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::AVXVFCMPOp<8>(OpcodeArgs); +template void OpDispatchBuilder::AVXVFCMPOp<4>(OpcodeArgs); +template void OpDispatchBuilder::AVXVFCMPOp<8>(OpcodeArgs); void OpDispatchBuilder::FXSaveOp(OpcodeArgs) { - OrderedNode *Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); Mem = AppendSegmentOffset(Mem, Op->Flags); SaveX87State(Op, Mem); @@ -3003,27 +2667,25 @@ void OpDispatchBuilder::XSaveOp(OpcodeArgs) { void OpDispatchBuilder::XSaveOpImpl(OpcodeArgs) { const auto XSaveBase = [this, Op] { - OrderedNode *Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); return AppendSegmentOffset(Mem, Op->Flags); }; // NOTE: Mask should be EAX and EDX concatenated, but we only need to test // for features that are in the lower 32 bits, so EAX only is sufficient. - OrderedNode *Mask = LoadGPRRegister(X86State::REG_RAX); - OrderedNode *Base = XSaveBase(); + OrderedNode* Mask = LoadGPRRegister(X86State::REG_RAX); + OrderedNode* Base = XSaveBase(); const auto OpSize = IR::SizeToOpSize(CTX->GetGPRSize()); - const auto StoreIfFlagSet = [&](uint32_t BitIndex, auto fn, uint32_t FieldSize = 1){ - OrderedNode *BitFlag = _Bfe(OpSize, FieldSize, BitIndex, Mask); + const auto StoreIfFlagSet = [&](uint32_t BitIndex, auto fn, uint32_t FieldSize = 1) { + OrderedNode* BitFlag = _Bfe(OpSize, FieldSize, BitIndex, Mask); auto CondJump_ = CondJump(BitFlag, {COND_NEQ}); auto StoreBlock = CreateNewCodeBlockAfter(GetCurrentBlock()); SetTrueJumpTarget(CondJump_, StoreBlock); SetCurrentCodeBlock(StoreBlock); StartNewBlock(); - { - fn(); - } + { fn(); } auto Jump_ = Jump(); auto NextJumpTarget = CreateNewCodeBlockAfter(StoreBlock); SetJumpTarget(Jump_, NextJumpTarget); @@ -3041,22 +2703,22 @@ void OpDispatchBuilder::XSaveOpImpl(OpcodeArgs) { StoreIfFlagSet(1, [this, Base] { SaveSSEState(Base); }); } // AVX - if (CTX->HostFeatures.SupportsAVX) - { + if (CTX->HostFeatures.SupportsAVX) { StoreIfFlagSet(2, [this, Base] { SaveAVXState(Base); }); } // We need to save MXCSR and MXCSR_MASK if either SSE or AVX are requested to be saved { - StoreIfFlagSet(1, [this, Base] { SaveMXCSRState(Base); }, 2); + StoreIfFlagSet( + 1, [this, Base] { SaveMXCSRState(Base); }, 2); } // Update XSTATE_BV region of the XSAVE header { - OrderedNode *HeaderOffset = _Add(OpSize, Base, _Constant(512)); + OrderedNode* HeaderOffset = _Add(OpSize, Base, _Constant(512)); // NOTE: We currently only support the first 3 bits (x87, SSE, and AVX) - OrderedNode *RequestedFeatures = _Bfe(OpSize, 3, 0, Mask); + OrderedNode* RequestedFeatures = _Bfe(OpSize, 3, 0, Mask); // XSTATE_BV section of the header is 8 bytes in size, but we only really // care about setting at most 3 bits in the first byte. We zero out the rest. @@ -3064,7 +2726,7 @@ void OpDispatchBuilder::XSaveOpImpl(OpcodeArgs) { } } -void OpDispatchBuilder::SaveX87State(OpcodeArgs, OrderedNode *MemBase) { +void OpDispatchBuilder::SaveX87State(OpcodeArgs, OrderedNode* MemBase) { const auto OpSize = IR::SizeToOpSize(CTX->GetGPRSize()); // Saves 512bytes to the memory location provided // Header changes depending on if REX.W is set or not @@ -3073,8 +2735,7 @@ void OpDispatchBuilder::SaveX87State(OpcodeArgs, OrderedNode *MemBase) { // ------------------------------------------ // 00 | FCW | FSW | FTW | | FOP | FIP | // 16 | FDP | MXCSR | MXCSR_MASK| - } - else { + } else { // BYTE | 0 1 | 2 3 | 4 | 5 | 6 7 | 8 9 | a b | c d | e f | // ------------------------------------------ // 00 | FCW | FSW | FTW | | FOP | FIP[31:0] | FCS | | @@ -3087,13 +2748,13 @@ void OpDispatchBuilder::SaveX87State(OpcodeArgs, OrderedNode *MemBase) { } { - OrderedNode *MemLocation = _Add(OpSize, MemBase, _Constant(2)); + OrderedNode* MemLocation = _Add(OpSize, MemBase, _Constant(2)); _StoreMem(GPRClass, 2, MemLocation, ReconstructFSW(), 2); } { // Abridged FTW - OrderedNode *MemLocation = _Add(OpSize, MemBase, _Constant(4)); + OrderedNode* MemLocation = _Add(OpSize, MemBase, _Constant(4)); auto AbridgedFTW = _LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); _StoreMem(GPRClass, 1, MemLocation, AbridgedFTW, 2); } @@ -3143,67 +2804,67 @@ void OpDispatchBuilder::SaveX87State(OpcodeArgs, OrderedNode *MemBase) { // If OSFXSR bit in CR4 is not set than FXSAVE /may/ not save the XMM registers // This is implementation dependent for (uint32_t i = 0; i < Core::CPUState::NUM_MMS; ++i) { - OrderedNode *MMReg = _LoadContext(16, FPRClass, offsetof(FEXCore::Core::CPUState, mm[i])); - OrderedNode *MemLocation = _Add(OpSize, MemBase, _Constant(i * 16 + 32)); + OrderedNode* MMReg = _LoadContext(16, FPRClass, offsetof(FEXCore::Core::CPUState, mm[i])); + OrderedNode* MemLocation = _Add(OpSize, MemBase, _Constant(i * 16 + 32)); _StoreMem(FPRClass, 16, MemLocation, MMReg, 16); } } -void OpDispatchBuilder::SaveSSEState(OrderedNode *MemBase) { +void OpDispatchBuilder::SaveSSEState(OrderedNode* MemBase) { const auto OpSize = IR::SizeToOpSize(CTX->GetGPRSize()); const auto NumRegs = CTX->Config.Is64BitMode ? 16U : 8U; for (uint32_t i = 0; i < NumRegs; ++i) { - OrderedNode *XMMReg = LoadXMMRegister(i); - OrderedNode *MemLocation = _Add(OpSize, MemBase, _Constant(i * 16 + 160)); + OrderedNode* XMMReg = LoadXMMRegister(i); + OrderedNode* MemLocation = _Add(OpSize, MemBase, _Constant(i * 16 + 160)); _StoreMem(FPRClass, 16, MemLocation, XMMReg, 16); } } -void OpDispatchBuilder::SaveMXCSRState(OrderedNode *MemBase) { +void OpDispatchBuilder::SaveMXCSRState(OrderedNode* MemBase) { const auto OpSize = IR::SizeToOpSize(CTX->GetGPRSize()); - OrderedNode *MXCSR = GetMXCSR(); - OrderedNode *MXCSRLocation = _Add(OpSize, MemBase, _Constant(24)); + OrderedNode* MXCSR = GetMXCSR(); + OrderedNode* MXCSRLocation = _Add(OpSize, MemBase, _Constant(24)); _StoreMem(GPRClass, 4, MXCSRLocation, MXCSR, 4); // Store the mask for all bits. - OrderedNode *MXCSRMaskLocation = _Add(OpSize, MXCSRLocation, _Constant(4)); + OrderedNode* MXCSRMaskLocation = _Add(OpSize, MXCSRLocation, _Constant(4)); _StoreMem(GPRClass, 4, MXCSRMaskLocation, _Constant(0xFFFF), 4); } -void OpDispatchBuilder::SaveAVXState(OrderedNode *MemBase) { +void OpDispatchBuilder::SaveAVXState(OrderedNode* MemBase) { const auto OpSize = IR::SizeToOpSize(CTX->GetGPRSize()); const auto NumRegs = CTX->Config.Is64BitMode ? 16U : 8U; for (uint32_t i = 0; i < NumRegs; ++i) { - OrderedNode *Upper = _VDupElement(32, 16, LoadXMMRegister(i), 1); - OrderedNode *MemLocation = _Add(OpSize, MemBase, _Constant(i * 16 + 576)); + OrderedNode* Upper = _VDupElement(32, 16, LoadXMMRegister(i), 1); + OrderedNode* MemLocation = _Add(OpSize, MemBase, _Constant(i * 16 + 576)); _StoreMem(FPRClass, 16, MemLocation, Upper, 16); } } -OrderedNode *OpDispatchBuilder::GetMXCSR() { +OrderedNode* OpDispatchBuilder::GetMXCSR() { // Default MXCSR Value - OrderedNode *MXCSR = _Constant(0x1F80); - OrderedNode *RoundingMode = _GetRoundingMode(); + OrderedNode* MXCSR = _Constant(0x1F80); + OrderedNode* RoundingMode = _GetRoundingMode(); return _Bfi(OpSize::i32Bit, 3, 13, MXCSR, RoundingMode); } void OpDispatchBuilder::FXRStoreOp(OpcodeArgs) { const auto OpSize = IR::SizeToOpSize(CTX->GetGPRSize()); - OrderedNode *Mem = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.LoadData = false}); + OrderedNode* Mem = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.LoadData = false}); Mem = AppendSegmentOffset(Mem, Op->Flags); RestoreX87State(Mem); RestoreSSEState(Mem); - OrderedNode *MXCSRLocation = _Add(OpSize, Mem, _Constant(24)); - OrderedNode *MXCSR = _LoadMem(GPRClass, 4, MXCSRLocation, 4); + OrderedNode* MXCSRLocation = _Add(OpSize, Mem, _Constant(24)); + OrderedNode* MXCSR = _LoadMem(GPRClass, 4, MXCSRLocation, 4); RestoreMXCSRState(MXCSR); } @@ -3211,29 +2872,27 @@ void OpDispatchBuilder::XRstorOpImpl(OpcodeArgs) { const auto OpSize = IR::SizeToOpSize(CTX->GetGPRSize()); const auto XSaveBase = [this, Op] { - OrderedNode *Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); return AppendSegmentOffset(Mem, Op->Flags); }; // Set up base address for the XSAVE region to restore from, and also read the // XSTATE_BV bit flags out of the XSTATE header. - OrderedNode *Base = XSaveBase(); - OrderedNode *Mask = _LoadMem(GPRClass, 8, _Add(OpSize, Base, _Constant(512)), 8); + OrderedNode* Base = XSaveBase(); + OrderedNode* Mask = _LoadMem(GPRClass, 8, _Add(OpSize, Base, _Constant(512)), 8); // If a bit in our XSTATE_BV is set, then we restore from that region of the XSAVE area, // otherwise, if not set, then we need to set the relevant data the bit corresponds to // to it's defined initial configuration. - const auto RestoreIfFlagSetOrDefault = [&](uint32_t BitIndex, auto restore_fn, auto default_fn, uint32_t FieldSize = 1){ - OrderedNode *BitFlag = _Bfe(OpSize, FieldSize, BitIndex, Mask); + const auto RestoreIfFlagSetOrDefault = [&](uint32_t BitIndex, auto restore_fn, auto default_fn, uint32_t FieldSize = 1) { + OrderedNode* BitFlag = _Bfe(OpSize, FieldSize, BitIndex, Mask); auto CondJump_ = CondJump(BitFlag, {COND_NEQ}); auto RestoreBlock = CreateNewCodeBlockAfter(GetCurrentBlock()); SetTrueJumpTarget(CondJump_, RestoreBlock); SetCurrentCodeBlock(RestoreBlock); StartNewBlock(); - { - restore_fn(); - } + { restore_fn(); } auto RestoreExitJump = Jump(); auto DefaultBlock = CreateNewCodeBlockAfter(RestoreBlock); auto ExitBlock = CreateNewCodeBlockAfter(DefaultBlock); @@ -3241,9 +2900,7 @@ void OpDispatchBuilder::XRstorOpImpl(OpcodeArgs) { SetFalseJumpTarget(CondJump_, DefaultBlock); SetCurrentCodeBlock(DefaultBlock); StartNewBlock(); - { - default_fn(); - } + { default_fn(); } auto DefaultExitJump = Jump(); SetJumpTarget(DefaultExitJump, ExitBlock); SetCurrentCodeBlock(ExitBlock); @@ -3252,88 +2909,85 @@ void OpDispatchBuilder::XRstorOpImpl(OpcodeArgs) { // x87 { - RestoreIfFlagSetOrDefault(0, - [this, Base] { RestoreX87State(Base); }, - [this, Op] { DefaultX87State(Op); }); + RestoreIfFlagSetOrDefault( + 0, [this, Base] { RestoreX87State(Base); }, [this, Op] { DefaultX87State(Op); }); } // SSE { - RestoreIfFlagSetOrDefault(1, - [this, Base] { RestoreSSEState(Base); }, - [this] { DefaultSSEState(); }); + RestoreIfFlagSetOrDefault( + 1, [this, Base] { RestoreSSEState(Base); }, [this] { DefaultSSEState(); }); } // AVX - if (CTX->HostFeatures.SupportsAVX) - { - RestoreIfFlagSetOrDefault(2, - [this, Base] { RestoreAVXState(Base); }, - [this] { DefaultAVXState(); }); + if (CTX->HostFeatures.SupportsAVX) { + RestoreIfFlagSetOrDefault( + 2, [this, Base] { RestoreAVXState(Base); }, [this] { DefaultAVXState(); }); } { // We need to restore the MXCSR if either SSE or AVX are requested to be saved - RestoreIfFlagSetOrDefault(1, - [this, Base, OpSize] { - OrderedNode *MXCSRLocation = _Add(OpSize, Base, _Constant(24)); - OrderedNode *MXCSR = _LoadMem(GPRClass, 4, MXCSRLocation, 4); - RestoreMXCSRState(MXCSR); - }, - [] { /* Intentionally do nothing*/ }, 2); + RestoreIfFlagSetOrDefault( + 1, + [this, Base, OpSize] { + OrderedNode* MXCSRLocation = _Add(OpSize, Base, _Constant(24)); + OrderedNode* MXCSR = _LoadMem(GPRClass, 4, MXCSRLocation, 4); + RestoreMXCSRState(MXCSR); + }, + [] { /* Intentionally do nothing*/ }, 2); } } -void OpDispatchBuilder::RestoreX87State(OrderedNode *MemBase) { +void OpDispatchBuilder::RestoreX87State(OrderedNode* MemBase) { const auto OpSize = IR::SizeToOpSize(CTX->GetGPRSize()); auto NewFCW = _LoadMem(GPRClass, 2, MemBase, 2); _StoreContext(2, GPRClass, NewFCW, offsetof(FEXCore::Core::CPUState, FCW)); { - OrderedNode *MemLocation = _Add(OpSize, MemBase, _Constant(2)); + OrderedNode* MemLocation = _Add(OpSize, MemBase, _Constant(2)); auto NewFSW = _LoadMem(GPRClass, 2, MemLocation, 2); ReconstructX87StateFromFSW(NewFSW); } { // Abridged FTW - OrderedNode *MemLocation = _Add(OpSize, MemBase, _Constant(4)); + OrderedNode* MemLocation = _Add(OpSize, MemBase, _Constant(4)); auto NewAbridgedFTW = _LoadMem(GPRClass, 1, MemLocation, 2); _StoreContext(1, GPRClass, NewAbridgedFTW, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); } for (uint32_t i = 0; i < Core::CPUState::NUM_MMS; ++i) { - OrderedNode *MemLocation = _Add(OpSize, MemBase, _Constant(i * 16 + 32)); + OrderedNode* MemLocation = _Add(OpSize, MemBase, _Constant(i * 16 + 32)); auto MMReg = _LoadMem(FPRClass, 16, MemLocation, 16); _StoreContext(16, FPRClass, MMReg, offsetof(FEXCore::Core::CPUState, mm[i])); } } -void OpDispatchBuilder::RestoreSSEState(OrderedNode *MemBase) { +void OpDispatchBuilder::RestoreSSEState(OrderedNode* MemBase) { const auto OpSize = IR::SizeToOpSize(CTX->GetGPRSize()); const auto NumRegs = CTX->Config.Is64BitMode ? 16U : 8U; for (uint32_t i = 0; i < NumRegs; ++i) { - OrderedNode *MemLocation = _Add(OpSize, MemBase, _Constant(i * 16 + 160)); - OrderedNode *XMMReg = _LoadMem(FPRClass, 16, MemLocation, 16); + OrderedNode* MemLocation = _Add(OpSize, MemBase, _Constant(i * 16 + 160)); + OrderedNode* XMMReg = _LoadMem(FPRClass, 16, MemLocation, 16); StoreXMMRegister(i, XMMReg); } } -void OpDispatchBuilder::RestoreMXCSRState(OrderedNode *MXCSR) { +void OpDispatchBuilder::RestoreMXCSRState(OrderedNode* MXCSR) { // We only support the rounding mode and FTZ bit being set - OrderedNode *RoundingMode = _Bfe(OpSize::i32Bit, 3, 13, MXCSR); + OrderedNode* RoundingMode = _Bfe(OpSize::i32Bit, 3, 13, MXCSR); _SetRoundingMode(RoundingMode); } -void OpDispatchBuilder::RestoreAVXState(OrderedNode *MemBase) { +void OpDispatchBuilder::RestoreAVXState(OrderedNode* MemBase) { const auto OpSize = IR::SizeToOpSize(CTX->GetGPRSize()); const auto NumRegs = CTX->Config.Is64BitMode ? 16U : 8U; for (uint32_t i = 0; i < NumRegs; ++i) { - OrderedNode *XMMReg = LoadXMMRegister(i); - OrderedNode *MemLocation = _Add(OpSize, MemBase, _Constant(i * 16 + 576)); - OrderedNode *YMMHReg = _LoadMem(FPRClass, 16, MemLocation, 16); - OrderedNode *YMM = _VInsElement(32, 16, 1, 0, XMMReg, YMMHReg); + OrderedNode* XMMReg = LoadXMMRegister(i); + OrderedNode* MemLocation = _Add(OpSize, MemBase, _Constant(i * 16 + 576)); + OrderedNode* YMMHReg = _LoadMem(FPRClass, 16, MemLocation, 16); + OrderedNode* YMM = _VInsElement(32, 16, 1, 0, XMMReg, YMMHReg); StoreXMMRegister(i, YMM); } } @@ -3345,7 +2999,7 @@ void OpDispatchBuilder::DefaultX87State(OpcodeArgs) { // On top of resetting the flags to a default state, we also need to clear // all of the ST0-7/MM0-7 registers to zero. - OrderedNode *ZeroVector = LoadAndCacheNamedVectorConstant(Core::CPUState::MM_REG_SIZE, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); + OrderedNode* ZeroVector = LoadAndCacheNamedVectorConstant(Core::CPUState::MM_REG_SIZE, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); for (uint32_t i = 0; i < Core::CPUState::NUM_MMS; ++i) { _StoreContext(16, FPRClass, ZeroVector, offsetof(FEXCore::Core::CPUState, mm[i])); } @@ -3354,7 +3008,7 @@ void OpDispatchBuilder::DefaultX87State(OpcodeArgs) { void OpDispatchBuilder::DefaultSSEState() { const auto NumRegs = CTX->Config.Is64BitMode ? 16U : 8U; - OrderedNode *ZeroVector = LoadAndCacheNamedVectorConstant(Core::CPUState::XMM_SSE_REG_SIZE, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); + OrderedNode* ZeroVector = LoadAndCacheNamedVectorConstant(Core::CPUState::XMM_SSE_REG_SIZE, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); for (uint32_t i = 0; i < NumRegs; ++i) { StoreXMMRegister(i, ZeroVector); } @@ -3364,67 +3018,64 @@ void OpDispatchBuilder::DefaultAVXState() { const auto NumRegs = CTX->Config.Is64BitMode ? 16U : 8U; for (uint32_t i = 0; i < NumRegs; i++) { - OrderedNode* Reg = LoadXMMRegister(i); - OrderedNode* Dst = _VMov(16, Reg); - StoreXMMRegister(i, Dst); - } + OrderedNode* Reg = LoadXMMRegister(i); + OrderedNode* Dst = _VMov(16, Reg); + StoreXMMRegister(i, Dst); + } } -OrderedNode* OpDispatchBuilder::PALIGNROpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, - const X86Tables::DecodedOperand& Src2, - const X86Tables::DecodedOperand& Imm, - bool IsAVX) { +OrderedNode* OpDispatchBuilder::PALIGNROpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2, + const X86Tables::DecodedOperand& Imm, bool IsAVX) { LOGMAN_THROW_A_FMT(Imm.IsLiteral(), "Imm needs to be a literal"); // For the 256-bit case we handle it as pairs of 128-bit halves. const auto DstSize = GetDstSize(Op); - const auto SanitizedDstSize = std::min(DstSize, uint8_t{16}); + const auto SanitizedDstSize = std::min(DstSize, uint8_t {16}); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; const auto Index = Imm.Data.Literal.Value; - OrderedNode *Src2Node = LoadSource(FPRClass, Op, Src2, Op->Flags); + OrderedNode* Src2Node = LoadSource(FPRClass, Op, Src2, Op->Flags); if (Index == 0) { if (IsAVX && !Is256Bit) { // 128-bit AVX needs to zero the upper bits. return _VMov(16, Src2Node); - } - else { + } else { return Src2Node; } } - OrderedNode *Src1Node = LoadSource(FPRClass, Op, Src1, Op->Flags); + OrderedNode* Src1Node = LoadSource(FPRClass, Op, Src1, Op->Flags); if (Index >= (SanitizedDstSize * 2)) { // If the immediate is greater than both vectors combined then it zeroes the vector return LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); } - OrderedNode *Low = _VExtr(SanitizedDstSize, 1, Src1Node, Src2Node, Index); + OrderedNode* Low = _VExtr(SanitizedDstSize, 1, Src1Node, Src2Node, Index); if (!Is256Bit) { return Low; } - OrderedNode *HighSrc1 = _VInsElement(DstSize, 16, 0, 1, Src1Node, Src1Node); - OrderedNode *HighSrc2 = _VInsElement(DstSize, 16, 0, 1, Src2Node, Src2Node); - OrderedNode *High = _VExtr(SanitizedDstSize, 1, HighSrc1, HighSrc2, Index); + OrderedNode* HighSrc1 = _VInsElement(DstSize, 16, 0, 1, Src1Node, Src1Node); + OrderedNode* HighSrc2 = _VInsElement(DstSize, 16, 0, 1, Src2Node, Src2Node); + OrderedNode* High = _VExtr(SanitizedDstSize, 1, HighSrc1, HighSrc2, Index); return _VInsElement(DstSize, 16, 1, 0, Low, High); } void OpDispatchBuilder::PAlignrOp(OpcodeArgs) { - OrderedNode *Result = PALIGNROpImpl(Op, Op->Dest, Op->Src[0], Op->Src[1], false); + OrderedNode* Result = PALIGNROpImpl(Op, Op->Dest, Op->Src[0], Op->Src[1], false); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::VPALIGNROp(OpcodeArgs) { - OrderedNode *Result = PALIGNROpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2], true); + OrderedNode* Result = PALIGNROpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2], true); StoreResult(FPRClass, Op, Result, -1); } template void OpDispatchBuilder::UCOMISxOp(OpcodeArgs) { const auto SrcSize = Op->Src[0].IsGPR() ? GetGuestVectorLength() : GetSrcSize(Op); - OrderedNode *Src1 = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, GetGuestVectorLength(), Op->Flags); - OrderedNode *Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); + OrderedNode* Src1 = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, GetGuestVectorLength(), Op->Flags); + OrderedNode* Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); HandleNZCVWrite(); _FCmp(ElementSize, Src1, Src2); @@ -3436,13 +3087,11 @@ void OpDispatchBuilder::UCOMISxOp(OpcodeArgs) { SetRFLAG(_Constant(0)); } -template -void OpDispatchBuilder::UCOMISxOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::UCOMISxOp<8>(OpcodeArgs); +template void OpDispatchBuilder::UCOMISxOp<4>(OpcodeArgs); +template void OpDispatchBuilder::UCOMISxOp<8>(OpcodeArgs); void OpDispatchBuilder::LDMXCSR(OpcodeArgs) { - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); RestoreMXCSRState(Dest); } @@ -3450,8 +3099,7 @@ void OpDispatchBuilder::STMXCSR(OpcodeArgs) { StoreResult(GPRClass, Op, GetMXCSR(), -1); } -OrderedNode* OpDispatchBuilder::PACKUSOpImpl(OpcodeArgs, size_t ElementSize, - OrderedNode *Src1, OrderedNode *Src2) { +OrderedNode* OpDispatchBuilder::PACKUSOpImpl(OpcodeArgs, size_t ElementSize, OrderedNode* Src1, OrderedNode* Src2) { const auto Size = GetSrcSize(Op); return _VSQXTUNPair(Size, ElementSize, Src1, Src2); @@ -3459,26 +3107,24 @@ OrderedNode* OpDispatchBuilder::PACKUSOpImpl(OpcodeArgs, size_t ElementSize, template void OpDispatchBuilder::PACKUSOp(OpcodeArgs) { - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result = PACKUSOpImpl(Op, ElementSize, Dest, Src); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Result = PACKUSOpImpl(Op, ElementSize, Dest, Src); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::PACKUSOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::PACKUSOp<4>(OpcodeArgs); +template void OpDispatchBuilder::PACKUSOp<2>(OpcodeArgs); +template void OpDispatchBuilder::PACKUSOp<4>(OpcodeArgs); template void OpDispatchBuilder::VPACKUSOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - OrderedNode *Result = PACKUSOpImpl(Op, ElementSize, Src1, Src2); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Result = PACKUSOpImpl(Op, ElementSize, Src1, Src2); if (Is256Bit) { // We do a little cheeky 64-bit swapping to interleave the result. @@ -3488,39 +3134,34 @@ void OpDispatchBuilder::VPACKUSOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VPACKUSOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::VPACKUSOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VPACKUSOp<2>(OpcodeArgs); +template void OpDispatchBuilder::VPACKUSOp<4>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::PACKSSOpImpl(OpcodeArgs, size_t ElementSize, - OrderedNode *Src1, OrderedNode *Src2) { +OrderedNode* OpDispatchBuilder::PACKSSOpImpl(OpcodeArgs, size_t ElementSize, OrderedNode* Src1, OrderedNode* Src2) { const auto Size = GetSrcSize(Op); return _VSQXTNPair(Size, ElementSize, Src1, Src2); } template void OpDispatchBuilder::PACKSSOp(OpcodeArgs) { - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result = PACKSSOpImpl(Op, ElementSize, Dest, Src); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Result = PACKSSOpImpl(Op, ElementSize, Dest, Src); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::PACKSSOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::PACKSSOp<4>(OpcodeArgs); +template void OpDispatchBuilder::PACKSSOp<2>(OpcodeArgs); +template void OpDispatchBuilder::PACKSSOp<4>(OpcodeArgs); template void OpDispatchBuilder::VPACKSSOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - OrderedNode *Result = PACKSSOpImpl(Op, ElementSize, Src1, Src2); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Result = PACKSSOpImpl(Op, ElementSize, Src1, Src2); if (Is256Bit) { // We do a little cheeky 64-bit swapping to interleave the result. @@ -3530,13 +3171,10 @@ void OpDispatchBuilder::VPACKSSOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VPACKSSOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::VPACKSSOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VPACKSSOp<2>(OpcodeArgs); +template void OpDispatchBuilder::VPACKSSOp<4>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::PMULLOpImpl(OpcodeArgs, size_t ElementSize, bool Signed, - OrderedNode *Src1, OrderedNode *Src2) { +OrderedNode* OpDispatchBuilder::PMULLOpImpl(OpcodeArgs, size_t ElementSize, bool Signed, OrderedNode* Src1, OrderedNode* Src2) { const auto Size = GetSrcSize(Op); if (Size == 8) { @@ -3559,41 +3197,35 @@ OrderedNode* OpDispatchBuilder::PMULLOpImpl(OpcodeArgs, size_t ElementSize, bool template void OpDispatchBuilder::PMULLOp(OpcodeArgs) { - static_assert(ElementSize == sizeof(uint32_t), - "Currently only handles 32-bit -> 64-bit"); + static_assert(ElementSize == sizeof(uint32_t), "Currently only handles 32-bit -> 64-bit"); - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Res = PMULLOpImpl(Op, ElementSize, Signed, Src1, Src2); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Res = PMULLOpImpl(Op, ElementSize, Signed, Src1, Src2); StoreResult(FPRClass, Op, Res, -1); } -template -void OpDispatchBuilder::PMULLOp<4, false>(OpcodeArgs); -template -void OpDispatchBuilder::PMULLOp<4, true>(OpcodeArgs); +template void OpDispatchBuilder::PMULLOp<4, false>(OpcodeArgs); +template void OpDispatchBuilder::PMULLOp<4, true>(OpcodeArgs); -template +template void OpDispatchBuilder::VPMULLOp(OpcodeArgs) { - static_assert(ElementSize == sizeof(uint32_t), - "Currently only handles 32-bit -> 64-bit"); + static_assert(ElementSize == sizeof(uint32_t), "Currently only handles 32-bit -> 64-bit"); - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - OrderedNode *Result = PMULLOpImpl(Op, ElementSize, Signed, Src1, Src2); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Result = PMULLOpImpl(Op, ElementSize, Signed, Src1, Src2); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VPMULLOp<4, false>(OpcodeArgs); -template -void OpDispatchBuilder::VPMULLOp<4, true>(OpcodeArgs); +template void OpDispatchBuilder::VPMULLOp<4, false>(OpcodeArgs); +template void OpDispatchBuilder::VPMULLOp<4, true>(OpcodeArgs); template void OpDispatchBuilder::MOVQ2DQ(OpcodeArgs) { - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); // This instruction is a bit special in that if the source is MMX then it zexts to 128bit if constexpr (ToXMM) { @@ -3601,33 +3233,27 @@ void OpDispatchBuilder::MOVQ2DQ(OpcodeArgs) { Src = _VMov(16, Src); StoreXMMRegister(Index, Src); - } - else { + } else { // This is simple, just store the result StoreResult(FPRClass, Op, Src, -1); } } -template -void OpDispatchBuilder::MOVQ2DQ(OpcodeArgs); -template -void OpDispatchBuilder::MOVQ2DQ(OpcodeArgs); +template void OpDispatchBuilder::MOVQ2DQ(OpcodeArgs); +template void OpDispatchBuilder::MOVQ2DQ(OpcodeArgs); -OrderedNode* OpDispatchBuilder::ADDSUBPOpImpl(OpcodeArgs, size_t ElementSize, - OrderedNode *Src1, OrderedNode *Src2) { +OrderedNode* OpDispatchBuilder::ADDSUBPOpImpl(OpcodeArgs, size_t ElementSize, OrderedNode* Src1, OrderedNode* Src2) { const auto Size = GetSrcSize(Op); if (CTX->HostFeatures.SupportsFCMA) { if (ElementSize == 4) { auto Swizzle = _VRev64(Size, 4, Src2); return _VFCADD(Size, ElementSize, Src1, Swizzle, 90); - } - else { + } else { auto Swizzle = _VExtr(Size, 1, Src2, Src2, 8); return _VFCADD(Size, ElementSize, Src1, Swizzle, 90); } - } - else { + } else { auto ConstantEOR = LoadAndCacheNamedVectorConstant(Size, ElementSize == 4 ? NAMED_VECTOR_PADDSUBPS_INVERT : NAMED_VECTOR_PADDSUBPD_INVERT); auto InvertedSource = _VXor(Size, ElementSize, Src2, ConstantEOR); return _VFAdd(Size, ElementSize, Src1, InvertedSource); @@ -3636,37 +3262,33 @@ OrderedNode* OpDispatchBuilder::ADDSUBPOpImpl(OpcodeArgs, size_t ElementSize, template void OpDispatchBuilder::ADDSUBPOp(OpcodeArgs) { - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result = ADDSUBPOpImpl(Op, ElementSize, Dest, Src); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Result = ADDSUBPOpImpl(Op, ElementSize, Dest, Src); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::ADDSUBPOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::ADDSUBPOp<8>(OpcodeArgs); +template void OpDispatchBuilder::ADDSUBPOp<4>(OpcodeArgs); +template void OpDispatchBuilder::ADDSUBPOp<8>(OpcodeArgs); template void OpDispatchBuilder::VADDSUBPOp(OpcodeArgs) { - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - OrderedNode *Result = ADDSUBPOpImpl(Op, ElementSize, Src1, Src2); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Result = ADDSUBPOpImpl(Op, ElementSize, Src1, Src2); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VADDSUBPOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::VADDSUBPOp<8>(OpcodeArgs); +template void OpDispatchBuilder::VADDSUBPOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VADDSUBPOp<8>(OpcodeArgs); void OpDispatchBuilder::PFNACCOp(OpcodeArgs) { auto Size = GetSrcSize(Op); - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); auto DestUnzip = _VUnZip(Size, 4, Dest, Src); auto SrcUnzip = _VUnZip2(Size, 4, Dest, Src); @@ -3678,11 +3300,11 @@ void OpDispatchBuilder::PFNACCOp(OpcodeArgs) { void OpDispatchBuilder::PFPNACCOp(OpcodeArgs) { auto Size = GetSrcSize(Op); - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *ResAdd{}; - OrderedNode *ResSub{}; + OrderedNode* ResAdd {}; + OrderedNode* ResSub {}; auto UpperSubDest = _VDupElement(Size, 4, Dest, 1); ResSub = _VFSub(4, 4, Dest, UpperSubDest); @@ -3695,14 +3317,14 @@ void OpDispatchBuilder::PFPNACCOp(OpcodeArgs) { void OpDispatchBuilder::PSWAPDOp(OpcodeArgs) { auto Size = GetSrcSize(Op); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); auto Result = _VRev64(Size, 4, Src); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::PI2FWOp(OpcodeArgs) { - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); size_t Size = GetDstSize(Op); @@ -3720,7 +3342,7 @@ void OpDispatchBuilder::PI2FWOp(OpcodeArgs) { } void OpDispatchBuilder::PF2IWOp(OpcodeArgs) { - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); size_t Size = GetDstSize(Op); @@ -3739,17 +3361,17 @@ void OpDispatchBuilder::PF2IWOp(OpcodeArgs) { void OpDispatchBuilder::PMULHRWOp(OpcodeArgs) { auto Size = GetSrcSize(Op); - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Res{}; + OrderedNode* Res {}; // Implementation is more efficient for 8byte registers // Multiplies 4 16bit values in to 4 32bit values Res = _VSMull(Size * 2, 2, Dest, Src); // Load 0x0000_8000 in to each 32-bit element. - OrderedNode *VConstant = _VectorImm(16, 4, 0x80, 8); + OrderedNode* VConstant = _VectorImm(16, 4, 0x80, 8); Res = _VAdd(Size * 2, 4, Res, VConstant); @@ -3762,39 +3384,33 @@ void OpDispatchBuilder::PMULHRWOp(OpcodeArgs) { template void OpDispatchBuilder::VPFCMPOp(OpcodeArgs) { auto Size = GetSrcSize(Op); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, GetDstSize(Op), Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, GetDstSize(Op), Op->Flags); - OrderedNode *Result{}; + OrderedNode* Result {}; // This maps 1:1 to an AArch64 NEON Op - //auto ALUOp = _VCMPGT(Size, 4, Dest, Src); + // auto ALUOp = _VCMPGT(Size, 4, Dest, Src); switch (CompType) { - case 0x00: // EQ - Result = _VFCMPEQ(Size, 4, Dest, Src); + case 0x00: // EQ + Result = _VFCMPEQ(Size, 4, Dest, Src); break; - case 0x01: // GE(Swapped operand) - Result = _VFCMPLE(Size, 4, Src, Dest); + case 0x01: // GE(Swapped operand) + Result = _VFCMPLE(Size, 4, Src, Dest); break; - case 0x02: // GT - Result = _VFCMPGT(Size, 4, Dest, Src); - break; - default: - LOGMAN_MSG_A_FMT("Unknown Comparison type: {}", CompType); + case 0x02: // GT + Result = _VFCMPGT(Size, 4, Dest, Src); break; + default: LOGMAN_MSG_A_FMT("Unknown Comparison type: {}", CompType); break; } StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VPFCMPOp<0>(OpcodeArgs); -template -void OpDispatchBuilder::VPFCMPOp<1>(OpcodeArgs); -template -void OpDispatchBuilder::VPFCMPOp<2>(OpcodeArgs); +template void OpDispatchBuilder::VPFCMPOp<0>(OpcodeArgs); +template void OpDispatchBuilder::VPFCMPOp<1>(OpcodeArgs); +template void OpDispatchBuilder::VPFCMPOp<2>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::PMADDWDOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, - const X86Tables::DecodedOperand& Src2) { +OrderedNode* OpDispatchBuilder::PMADDWDOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2) { // This is a pretty curious operation // Does two MADD operations across 4 16bit signed integers and accumulates to 32bit integers in the destination // @@ -3805,8 +3421,8 @@ OrderedNode* OpDispatchBuilder::PMADDWDOpImpl(OpcodeArgs, const X86Tables::Decod auto Size = GetSrcSize(Op); - OrderedNode *Src1Node = LoadSource(FPRClass, Op, Src1, Op->Flags); - OrderedNode *Src2Node = LoadSource(FPRClass, Op, Src2, Op->Flags); + OrderedNode* Src1Node = LoadSource(FPRClass, Op, Src1, Op->Flags); + OrderedNode* Src2Node = LoadSource(FPRClass, Op, Src2, Op->Flags); if (Size == 8) { // MMX implementation can be slightly more optimal @@ -3823,30 +3439,29 @@ OrderedNode* OpDispatchBuilder::PMADDWDOpImpl(OpcodeArgs, const X86Tables::Decod } void OpDispatchBuilder::PMADDWD(OpcodeArgs) { - OrderedNode *Result = PMADDWDOpImpl(Op, Op->Dest, Op->Src[0]); + OrderedNode* Result = PMADDWDOpImpl(Op, Op->Dest, Op->Src[0]); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::VPMADDWDOp(OpcodeArgs) { - OrderedNode *Result = PMADDWDOpImpl(Op, Op->Src[0], Op->Src[1]); + OrderedNode* Result = PMADDWDOpImpl(Op, Op->Src[0], Op->Src[1]); StoreResult(FPRClass, Op, Result, -1); } -OrderedNode* OpDispatchBuilder::PMADDUBSWOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1Op, - const X86Tables::DecodedOperand& Src2Op) { +OrderedNode* OpDispatchBuilder::PMADDUBSWOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op) { const auto Size = GetSrcSize(Op); - OrderedNode *Src1 = LoadSource(FPRClass, Op, Src1Op, Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Src2Op, Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Src1Op, Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Src2Op, Op->Flags); if (Size == 8) { // 64bit is more efficient // Src1 is unsigned - auto Src1_16b = _VUXTL(Size * 2, 1, Src1); // [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56] + auto Src1_16b = _VUXTL(Size * 2, 1, Src1); // [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56] // Src2 is signed - auto Src2_16b = _VSXTL(Size * 2, 1, Src2); // [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56] + auto Src2_16b = _VSXTL(Size * 2, 1, Src2); // [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56] auto ResMul_L = _VSMull(Size * 2, 2, Src1_16b, Src2_16b); auto ResMul_H = _VSMull2(Size * 2, 2, Src1_16b, Src2_16b); @@ -3864,13 +3479,13 @@ OrderedNode* OpDispatchBuilder::PMADDUBSWOpImpl(OpcodeArgs, const X86Tables::Dec // Requires implementing IR ops for BIC (vector, immediate) although. // Src1 is unsigned - auto Src1_16b_L = _VUXTL(Size, 1, Src1); // [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56] - auto Src2_16b_L = _VSXTL(Size, 1, Src2); // [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56] - auto ResMul_L = _VMul(Size, 2, Src1_16b_L, Src2_16b_L); + auto Src1_16b_L = _VUXTL(Size, 1, Src1); // [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56] + auto Src2_16b_L = _VSXTL(Size, 1, Src2); // [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56] + auto ResMul_L = _VMul(Size, 2, Src1_16b_L, Src2_16b_L); // Src2 is signed - auto Src1_16b_H = _VUXTL2(Size, 1, Src1); // Offset to +64bits [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56] - auto Src2_16b_H = _VSXTL2(Size, 1, Src2); // Offset to +64bits [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56] + auto Src1_16b_H = _VUXTL2(Size, 1, Src1); // Offset to +64bits [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56] + auto Src2_16b_H = _VSXTL2(Size, 1, Src2); // Offset to +64bits [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56] auto ResMul_L_H = _VMul(Size, 2, Src1_16b_H, Src2_16b_H); auto TmpZip1 = _VUnZip(Size, 2, ResMul_L, ResMul_L_H); @@ -3880,48 +3495,44 @@ OrderedNode* OpDispatchBuilder::PMADDUBSWOpImpl(OpcodeArgs, const X86Tables::Dec } void OpDispatchBuilder::PMADDUBSW(OpcodeArgs) { - OrderedNode * Result = PMADDUBSWOpImpl(Op, Op->Dest, Op->Src[0]); + OrderedNode* Result = PMADDUBSWOpImpl(Op, Op->Dest, Op->Src[0]); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::VPMADDUBSWOp(OpcodeArgs) { - OrderedNode * Result = PMADDUBSWOpImpl(Op, Op->Src[0], Op->Src[1]); + OrderedNode* Result = PMADDUBSWOpImpl(Op, Op->Src[0], Op->Src[1]); StoreResult(FPRClass, Op, Result, -1); } -OrderedNode* OpDispatchBuilder::PMULHWOpImpl(OpcodeArgs, bool Signed, - OrderedNode *Src1, OrderedNode *Src2) { +OrderedNode* OpDispatchBuilder::PMULHWOpImpl(OpcodeArgs, bool Signed, OrderedNode* Src1, OrderedNode* Src2) { const auto Size = GetSrcSize(Op); if (Signed) { return _VSMulH(Size, 2, Src1, Src2); - } - else { + } else { return _VUMulH(Size, 2, Src1, Src2); } } template void OpDispatchBuilder::PMULHW(OpcodeArgs) { - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result = PMULHWOpImpl(Op, Signed, Dest, Src); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Result = PMULHWOpImpl(Op, Signed, Dest, Src); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::PMULHW(OpcodeArgs); -template -void OpDispatchBuilder::PMULHW(OpcodeArgs); +template void OpDispatchBuilder::PMULHW(OpcodeArgs); +template void OpDispatchBuilder::PMULHW(OpcodeArgs); -template +template void OpDispatchBuilder::VPMULHWOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - OrderedNode *Result = PMULHWOpImpl(Op, Signed, Dest, Src); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Result = PMULHWOpImpl(Op, Signed, Dest, Src); if (Is128Bit) { Result = _VMov(16, Result); @@ -3929,15 +3540,13 @@ void OpDispatchBuilder::VPMULHWOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VPMULHWOp(OpcodeArgs); -template -void OpDispatchBuilder::VPMULHWOp(OpcodeArgs); +template void OpDispatchBuilder::VPMULHWOp(OpcodeArgs); +template void OpDispatchBuilder::VPMULHWOp(OpcodeArgs); -OrderedNode* OpDispatchBuilder::PMULHRSWOpImpl(OpcodeArgs, OrderedNode *Src1, OrderedNode *Src2) { +OrderedNode* OpDispatchBuilder::PMULHRSWOpImpl(OpcodeArgs, OrderedNode* Src1, OrderedNode* Src2) { const auto Size = GetSrcSize(Op); - OrderedNode *Res{}; + OrderedNode* Res {}; if (Size == 8) { // Implementation is more efficient for 8byte registers Res = _VSMull(Size * 2, 2, Src1, Src2); @@ -3947,8 +3556,8 @@ OrderedNode* OpDispatchBuilder::PMULHRSWOpImpl(OpcodeArgs, OrderedNode *Src1, Or return _VUShrNI(Size * 2, 4, Res, 1); } else { // 128-bit and 256-bit are less efficient - OrderedNode *ResultLow; - OrderedNode *ResultHigh; + OrderedNode* ResultLow; + OrderedNode* ResultHigh; ResultLow = _VSMull(Size, 2, Src1, Src2); ResultHigh = _VSMull2(Size, 2, Src1, Src2); @@ -3967,28 +3576,27 @@ OrderedNode* OpDispatchBuilder::PMULHRSWOpImpl(OpcodeArgs, OrderedNode *Src1, Or } void OpDispatchBuilder::PMULHRSW(OpcodeArgs) { - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result = PMULHRSWOpImpl(Op, Dest, Src); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Result = PMULHRSWOpImpl(Op, Dest, Src); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::VPMULHRSWOp(OpcodeArgs) { - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - OrderedNode *Result = PMULHRSWOpImpl(Op, Dest, Src); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Result = PMULHRSWOpImpl(Op, Dest, Src); StoreResult(FPRClass, Op, Result, -1); } -OrderedNode* OpDispatchBuilder::HSUBPOpImpl(OpcodeArgs, size_t ElementSize, - const X86Tables::DecodedOperand& Src1Op, +OrderedNode* OpDispatchBuilder::HSUBPOpImpl(OpcodeArgs, size_t ElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op) { const auto SrcSize = GetSrcSize(Op); - OrderedNode *Src1 = LoadSource(FPRClass, Op, Src1Op, Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Src2Op, Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Src1Op, Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Src2Op, Op->Flags); auto Even = _VUnZip(SrcSize, ElementSize, Src1, Src2); auto Odd = _VUnZip2(SrcSize, ElementSize, Src1, Src2); @@ -3997,22 +3605,20 @@ OrderedNode* OpDispatchBuilder::HSUBPOpImpl(OpcodeArgs, size_t ElementSize, template void OpDispatchBuilder::HSUBP(OpcodeArgs) { - OrderedNode *Result = HSUBPOpImpl(Op, ElementSize, Op->Dest, Op->Src[0]); + OrderedNode* Result = HSUBPOpImpl(Op, ElementSize, Op->Dest, Op->Src[0]); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::HSUBP<4>(OpcodeArgs); -template -void OpDispatchBuilder::HSUBP<8>(OpcodeArgs); +template void OpDispatchBuilder::HSUBP<4>(OpcodeArgs); +template void OpDispatchBuilder::HSUBP<8>(OpcodeArgs); -template +template void OpDispatchBuilder::VHSUBPOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; - OrderedNode *Result = HSUBPOpImpl(Op, ElementSize, Op->Src[0], Op->Src[1]); - OrderedNode *Dest = Result; + OrderedNode* Result = HSUBPOpImpl(Op, ElementSize, Op->Src[0], Op->Src[1]); + OrderedNode* Dest = Result; if (Is256Bit) { Dest = _VInsElement(DstSize, 8, 1, 2, Result, Result); Dest = _VInsElement(DstSize, 8, 2, 1, Dest, Result); @@ -4021,17 +3627,15 @@ void OpDispatchBuilder::VHSUBPOp(OpcodeArgs) { StoreResult(FPRClass, Op, Dest, -1); } -template -void OpDispatchBuilder::VHSUBPOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::VHSUBPOp<8>(OpcodeArgs); +template void OpDispatchBuilder::VHSUBPOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VHSUBPOp<8>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::PHSUBOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, - const X86Tables::DecodedOperand& Src2, size_t ElementSize) { +OrderedNode* +OpDispatchBuilder::PHSUBOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2, size_t ElementSize) { const auto Size = GetSrcSize(Op); - OrderedNode *Src1V = LoadSource(FPRClass, Op, Src1, Op->Flags); - OrderedNode *Src2V = LoadSource(FPRClass, Op, Src2, Op->Flags); + OrderedNode* Src1V = LoadSource(FPRClass, Op, Src1, Op->Flags); + OrderedNode* Src2V = LoadSource(FPRClass, Op, Src2, Op->Flags); auto Even = _VUnZip(Size, ElementSize, Src1V, Src2V); auto Odd = _VUnZip2(Size, ElementSize, Src1V, Src2V); @@ -4040,40 +3644,35 @@ OrderedNode* OpDispatchBuilder::PHSUBOpImpl(OpcodeArgs, const X86Tables::Decoded template void OpDispatchBuilder::PHSUB(OpcodeArgs) { - OrderedNode *Result = PHSUBOpImpl(Op, Op->Dest, Op->Src[0], ElementSize); + OrderedNode* Result = PHSUBOpImpl(Op, Op->Dest, Op->Src[0], ElementSize); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::PHSUB<2>(OpcodeArgs); -template -void OpDispatchBuilder::PHSUB<4>(OpcodeArgs); +template void OpDispatchBuilder::PHSUB<2>(OpcodeArgs); +template void OpDispatchBuilder::PHSUB<4>(OpcodeArgs); -template +template void OpDispatchBuilder::VPHSUBOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; - OrderedNode *Result = PHSUBOpImpl(Op, Op->Src[0], Op->Src[1], ElementSize); + OrderedNode* Result = PHSUBOpImpl(Op, Op->Src[0], Op->Src[1], ElementSize); if (Is256Bit) { - OrderedNode *Inserted = _VInsElement(DstSize, 8, 1, 2, Result, Result); + OrderedNode* Inserted = _VInsElement(DstSize, 8, 1, 2, Result, Result); Result = _VInsElement(DstSize, 8, 2, 1, Inserted, Result); } StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VPHSUBOp<2>(OpcodeArgs); -template -void OpDispatchBuilder::VPHSUBOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VPHSUBOp<2>(OpcodeArgs); +template void OpDispatchBuilder::VPHSUBOp<4>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::PHADDSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1Op, - const X86Tables::DecodedOperand& Src2Op) { +OrderedNode* OpDispatchBuilder::PHADDSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op) { const auto Size = GetSrcSize(Op); const uint8_t ElementSize = 2; - OrderedNode *Src1 = LoadSource(FPRClass, Op, Src1Op, Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Src2Op, Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Src1Op, Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Src2Op, Op->Flags); auto Even = _VUnZip(Size, ElementSize, Src1, Src2); auto Odd = _VUnZip2(Size, ElementSize, Src1, Src2); @@ -4083,7 +3682,7 @@ OrderedNode* OpDispatchBuilder::PHADDSOpImpl(OpcodeArgs, const X86Tables::Decode } void OpDispatchBuilder::PHADDS(OpcodeArgs) { - OrderedNode *Result = PHADDSOpImpl(Op, Op->Dest, Op->Src[0]); + OrderedNode* Result = PHADDSOpImpl(Op, Op->Dest, Op->Src[0]); StoreResult(FPRClass, Op, Result, -1); } @@ -4091,8 +3690,8 @@ void OpDispatchBuilder::VPHADDSWOp(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE; - OrderedNode *Result = PHADDSOpImpl(Op, Op->Src[0], Op->Src[1]); - OrderedNode *Dest = Result; + OrderedNode* Result = PHADDSOpImpl(Op, Op->Src[0], Op->Src[1]); + OrderedNode* Dest = Result; if (Is256Bit) { Dest = _VInsElement(SrcSize, 8, 1, 2, Result, Result); @@ -4102,13 +3701,12 @@ void OpDispatchBuilder::VPHADDSWOp(OpcodeArgs) { StoreResult(FPRClass, Op, Dest, -1); } -OrderedNode* OpDispatchBuilder::PHSUBSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1Op, - const X86Tables::DecodedOperand& Src2Op) { +OrderedNode* OpDispatchBuilder::PHSUBSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op) { const auto Size = GetSrcSize(Op); const uint8_t ElementSize = 2; - OrderedNode *Src1 = LoadSource(FPRClass, Op, Src1Op, Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Src2Op, Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Src1Op, Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Src2Op, Op->Flags); auto Even = _VUnZip(Size, ElementSize, Src1, Src2); auto Odd = _VUnZip2(Size, ElementSize, Src1, Src2); @@ -4118,7 +3716,7 @@ OrderedNode* OpDispatchBuilder::PHSUBSOpImpl(OpcodeArgs, const X86Tables::Decode } void OpDispatchBuilder::PHSUBS(OpcodeArgs) { - OrderedNode *Result = PHSUBSOpImpl(Op, Op->Dest, Op->Src[0]); + OrderedNode* Result = PHSUBSOpImpl(Op, Op->Dest, Op->Src[0]); StoreResult(FPRClass, Op, Result, -1); } @@ -4126,8 +3724,8 @@ void OpDispatchBuilder::VPHSUBSWOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; - OrderedNode *Result = PHSUBSOpImpl(Op, Op->Src[0], Op->Src[1]); - OrderedNode *Dest = Result; + OrderedNode* Result = PHSUBSOpImpl(Op, Op->Src[0], Op->Src[1]); + OrderedNode* Dest = Result; if (Is256Bit) { Dest = _VInsElement(DstSize, 8, 1, 2, Result, Result); Dest = _VInsElement(DstSize, 8, 2, 1, Dest, Result); @@ -4136,9 +3734,7 @@ void OpDispatchBuilder::VPHSUBSWOp(OpcodeArgs) { StoreResult(FPRClass, Op, Dest, -1); } -OrderedNode* OpDispatchBuilder::PSADBWOpImpl(OpcodeArgs, - const X86Tables::DecodedOperand& Src1Op, - const X86Tables::DecodedOperand& Src2Op) { +OrderedNode* OpDispatchBuilder::PSADBWOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op) { // The documentation is actually incorrect in how this instruction operates // It strongly implies that the `abs(dest[i] - src[i])` operates in 8bit space // but it actually operates in more than 8bit space @@ -4147,8 +3743,8 @@ OrderedNode* OpDispatchBuilder::PSADBWOpImpl(OpcodeArgs, const auto Size = GetSrcSize(Op); const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE; - OrderedNode *Src1 = LoadSource(FPRClass, Op, Src1Op, Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Src2Op, Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Src1Op, Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Src2Op, Op->Flags); if (Size == 8) { auto AbsResult = _VUABDL(Size * 2, 1, Src1, Src2); @@ -4160,39 +3756,38 @@ OrderedNode* OpDispatchBuilder::PSADBWOpImpl(OpcodeArgs, auto AbsResult_Low = _VUABDL(Size, 1, Src1, Src2); auto AbsResult_High = _VUABDL2(Size, 1, Src1, Src2); - OrderedNode *Result_Low = _VAddV(16, 2, AbsResult_Low); - OrderedNode *Result_High = _VAddV(16, 2, AbsResult_High); + OrderedNode* Result_Low = _VAddV(16, 2, AbsResult_Low); + OrderedNode* Result_High = _VAddV(16, 2, AbsResult_High); auto Low = _VZip(Size, 8, Result_Low, Result_High); if (Is128Bit) { return Low; } - OrderedNode *HighSrc1 = _VDupElement(Size, 16, AbsResult_Low, 1); - OrderedNode *HighSrc2 = _VDupElement(Size, 16, AbsResult_High, 1); + OrderedNode* HighSrc1 = _VDupElement(Size, 16, AbsResult_Low, 1); + OrderedNode* HighSrc2 = _VDupElement(Size, 16, AbsResult_High, 1); - OrderedNode *HighResult_Low = _VAddV(16, 2, HighSrc1); - OrderedNode *HighResult_High = _VAddV(16, 2, HighSrc2); + OrderedNode* HighResult_Low = _VAddV(16, 2, HighSrc1); + OrderedNode* HighResult_High = _VAddV(16, 2, HighSrc2); - OrderedNode *High = _VInsElement(Size, 8, 1, 0, HighResult_Low, HighResult_High); - OrderedNode *Full = _VInsElement(Size, 16, 1, 0, Low, High); + OrderedNode* High = _VInsElement(Size, 8, 1, 0, HighResult_Low, HighResult_High); + OrderedNode* Full = _VInsElement(Size, 16, 1, 0, Low, High); - OrderedNode *Tmp = _VInsElement(Size, 8, 2, 1, Full, Full); + OrderedNode* Tmp = _VInsElement(Size, 8, 2, 1, Full, Full); return _VInsElement(Size, 8, 1, 2, Tmp, Full); } void OpDispatchBuilder::PSADBW(OpcodeArgs) { - OrderedNode *Result = PSADBWOpImpl(Op, Op->Dest, Op->Src[0]); + OrderedNode* Result = PSADBWOpImpl(Op, Op->Dest, Op->Src[0]); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::VPSADBWOp(OpcodeArgs) { - OrderedNode *Result = PSADBWOpImpl(Op, Op->Src[0], Op->Src[1]); + OrderedNode* Result = PSADBWOpImpl(Op, Op->Src[0], Op->Src[1]); StoreResult(FPRClass, Op, Result, -1); } -OrderedNode* OpDispatchBuilder::ExtendVectorElementsImpl(OpcodeArgs, size_t ElementSize, - size_t DstElementSize, bool Signed) { +OrderedNode* OpDispatchBuilder::ExtendVectorElementsImpl(OpcodeArgs, size_t ElementSize, size_t DstElementSize, bool Signed) { const auto DstSize = GetDstSize(Op); const auto GetSrc = [&] { @@ -4208,12 +3803,10 @@ OrderedNode* OpDispatchBuilder::ExtendVectorElementsImpl(OpcodeArgs, size_t Elem } }; - OrderedNode *Src = GetSrc(); - OrderedNode *Result{Src}; + OrderedNode* Src = GetSrc(); + OrderedNode* Result {Src}; - for (size_t CurrentElementSize = ElementSize; - CurrentElementSize != DstElementSize; - CurrentElementSize <<= 1) { + for (size_t CurrentElementSize = ElementSize; CurrentElementSize != DstElementSize; CurrentElementSize <<= 1) { if (Signed) { Result = _VSXTL(DstSize, CurrentElementSize, Result); } else { @@ -4224,40 +3817,27 @@ OrderedNode* OpDispatchBuilder::ExtendVectorElementsImpl(OpcodeArgs, size_t Elem return Result; } -template +template void OpDispatchBuilder::ExtendVectorElements(OpcodeArgs) { - OrderedNode *Result = ExtendVectorElementsImpl(Op, ElementSize, DstElementSize, Signed); + OrderedNode* Result = ExtendVectorElementsImpl(Op, ElementSize, DstElementSize, Signed); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::ExtendVectorElements<1, 2, false>(OpcodeArgs); -template -void OpDispatchBuilder::ExtendVectorElements<1, 4, false>(OpcodeArgs); -template -void OpDispatchBuilder::ExtendVectorElements<1, 8, false>(OpcodeArgs); -template -void OpDispatchBuilder::ExtendVectorElements<2, 4, false>(OpcodeArgs); -template -void OpDispatchBuilder::ExtendVectorElements<2, 8, false>(OpcodeArgs); -template -void OpDispatchBuilder::ExtendVectorElements<4, 8, false>(OpcodeArgs); - -template -void OpDispatchBuilder::ExtendVectorElements<1, 2, true>(OpcodeArgs); -template -void OpDispatchBuilder::ExtendVectorElements<1, 4, true>(OpcodeArgs); -template -void OpDispatchBuilder::ExtendVectorElements<1, 8, true>(OpcodeArgs); -template -void OpDispatchBuilder::ExtendVectorElements<2, 4, true>(OpcodeArgs); -template -void OpDispatchBuilder::ExtendVectorElements<2, 8, true>(OpcodeArgs); -template -void OpDispatchBuilder::ExtendVectorElements<4, 8, true>(OpcodeArgs); - -OrderedNode* OpDispatchBuilder::VectorRoundImpl(OpcodeArgs, size_t ElementSize, - OrderedNode *Src, uint64_t Mode) { +template void OpDispatchBuilder::ExtendVectorElements<1, 2, false>(OpcodeArgs); +template void OpDispatchBuilder::ExtendVectorElements<1, 4, false>(OpcodeArgs); +template void OpDispatchBuilder::ExtendVectorElements<1, 8, false>(OpcodeArgs); +template void OpDispatchBuilder::ExtendVectorElements<2, 4, false>(OpcodeArgs); +template void OpDispatchBuilder::ExtendVectorElements<2, 8, false>(OpcodeArgs); +template void OpDispatchBuilder::ExtendVectorElements<4, 8, false>(OpcodeArgs); + +template void OpDispatchBuilder::ExtendVectorElements<1, 2, true>(OpcodeArgs); +template void OpDispatchBuilder::ExtendVectorElements<1, 4, true>(OpcodeArgs); +template void OpDispatchBuilder::ExtendVectorElements<1, 8, true>(OpcodeArgs); +template void OpDispatchBuilder::ExtendVectorElements<2, 4, true>(OpcodeArgs); +template void OpDispatchBuilder::ExtendVectorElements<2, 8, true>(OpcodeArgs); +template void OpDispatchBuilder::ExtendVectorElements<4, 8, true>(OpcodeArgs); + +OrderedNode* OpDispatchBuilder::VectorRoundImpl(OpcodeArgs, size_t ElementSize, OrderedNode* Src, uint64_t Mode) { const auto Size = GetDstSize(Op); const uint64_t RoundControlSource = (Mode >> 2) & 1; uint64_t RoundControl = Mode & 0b11; @@ -4267,10 +3847,7 @@ OrderedNode* OpDispatchBuilder::VectorRoundImpl(OpcodeArgs, size_t ElementSize, } static constexpr std::array SourceModes = { - FEXCore::IR::Round_Nearest, - FEXCore::IR::Round_Negative_Infinity, - FEXCore::IR::Round_Positive_Infinity, - FEXCore::IR::Round_Towards_Zero, + FEXCore::IR::Round_Nearest, FEXCore::IR::Round_Negative_Infinity, FEXCore::IR::Round_Positive_Infinity, FEXCore::IR::Round_Towards_Zero, FEXCore::IR::Round_Host, }; @@ -4283,7 +3860,7 @@ void OpDispatchBuilder::VectorRound(OpcodeArgs) { // No need to zero extend the vector in the event we have a // scalar source, especially since it's only inserted into another vector. const auto SrcSize = GetSrcSize(Op); - OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); + OrderedNode* Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); const uint64_t Mode = Op->Src[1].Data.Literal.Value; @@ -4293,12 +3870,10 @@ void OpDispatchBuilder::VectorRound(OpcodeArgs) { StoreResult(FPRClass, Op, Src, -1); } -template -void OpDispatchBuilder::VectorRound<4>(OpcodeArgs); -template -void OpDispatchBuilder::VectorRound<8>(OpcodeArgs); +template void OpDispatchBuilder::VectorRound<4>(OpcodeArgs); +template void OpDispatchBuilder::VectorRound<8>(OpcodeArgs); -template +template void OpDispatchBuilder::AVXVectorRound(OpcodeArgs) { LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); const auto Mode = Op->Src[1].Data.Literal.Value; @@ -4307,16 +3882,14 @@ void OpDispatchBuilder::AVXVectorRound(OpcodeArgs) { // scalar source, especially since it's only inserted into another vector. const auto SrcSize = GetSrcSize(Op); - OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); - OrderedNode *Result = VectorRoundImpl(Op, ElementSize, Src, Mode); + OrderedNode* Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); + OrderedNode* Result = VectorRoundImpl(Op, ElementSize, Src, Mode); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::AVXVectorRound<4>(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorRound<8>(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorRound<4>(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorRound<8>(OpcodeArgs); template void OpDispatchBuilder::VectorBlend(OpcodeArgs) { @@ -4325,200 +3898,196 @@ void OpDispatchBuilder::VectorBlend(OpcodeArgs) { LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); uint8_t Select = Op->Src[1].Data.Literal.Value; - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); if constexpr (ElementSize == 4) { Select &= 0b1111; switch (Select) { - case 0b0000: - // No-op - return; - case 0b0001: - // Dest[31:0] = Src[31:0] - Dest = _VInsElement(DstSize, ElementSize, 0, 0, Dest, Src); - break; - case 0b0010: - // Dest[63:32] = Src[63:32] - Dest = _VInsElement(DstSize, ElementSize, 1, 1, Dest, Src); - break; - case 0b0011: - // Dest[31:0] = Src[31:0] - // Dest[63:32] = Src[63:32] - Dest = _VInsElement(DstSize, 8, 0, 0, Dest, Src); - break; - case 0b0100: - // Dest[95:64] = Src[95:64] - Dest = _VInsElement(DstSize, ElementSize, 2, 2, Dest, Src); - break; - case 0b0101: { - // Dest[31:0] = Src[31:0] - // Dest[63:32] = Dest[63:32] - // Dest[95:64] = Src[95:64] - // Dest[127:96] = Dest[127:96] - // Rotate the elements of the incoming source so they end up in the correct location. - // Then trn2 keeps the destination results in the expected location. - auto Temp = _VRev64(DstSize, 4, Src); - Dest = _VTrn2(DstSize, ElementSize, Temp, Dest); - break; - } - case 0b0110: { - // Dest[63:32] = Src[63:32] - // Dest[95:64] = Src[95:64] - auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_0110B); - Dest = _VTBX1(DstSize, Dest, Src, ConstantSwizzle); - break; - } - case 0b0111: { - // Dest[31:0] = Src[31:0] - // Dest[63:32] = Src[63:32] - // Dest[95:64] = Src[95:64] - auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_0111B); - Dest = _VTBX1(DstSize, Dest, Src, ConstantSwizzle); - break; - } - case 0b1000: - // Dest[127:96] = Src[127:96] - Dest = _VInsElement(DstSize, ElementSize, 3, 3, Dest, Src); - break; - case 0b1001: { - // Dest[31:0] = Src[31:0] - // Dest[127:96] = Src[127:96] - auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_1001B); - Dest = _VTBX1(DstSize, Dest, Src, ConstantSwizzle); - break; - } - case 0b1010: { - // Dest[31:0] = Dest[31:0] - // Dest[63:32] = Src[63:32] - // Dest[95:64] = Dest[95:64] - // Dest[127:96] = Src[127:96] - // Rotate the elements of the incoming destination so they end up in the correct location. - // Then trn2 keeps the source results in the expected location. - auto Temp = _VRev64(DstSize, 4, Dest); - Dest = _VTrn2(DstSize, ElementSize, Temp, Src); - break; - } - case 0b1011: { - // Dest[31:0] = Src[31:0] - // Dest[63:32] = Src[63:32] - // Dest[95:64] = Src[95:64] - auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_1011B); - Dest = _VTBX1(DstSize, Dest, Src, ConstantSwizzle); - break; - } - case 0b1100: - // Dest[95:64] = Src[95:64] - // Dest[127:96] = Src[127:96] - Dest = _VInsElement(DstSize, 8, 1, 1, Dest, Src); - break; - case 0b1101: { - // Dest[31:0] = Src[31:0] - // Dest[95:64] = Src[95:64] - // Dest[127:96] = Src[127:96] - auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_1101B); - Dest = _VTBX1(DstSize, Dest, Src, ConstantSwizzle); - break; - } - case 0b1110: { - // Dest[63:32] = Src[63:32] - // Dest[95:64] = Src[95:64] - // Dest[127:96] = Src[127:96] - auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_1110B); - Dest = _VTBX1(DstSize, Dest, Src, ConstantSwizzle); - break; - } - case 0b1111: - // Copy - Dest = Src; - break; - default: break; + case 0b0000: + // No-op + return; + case 0b0001: + // Dest[31:0] = Src[31:0] + Dest = _VInsElement(DstSize, ElementSize, 0, 0, Dest, Src); + break; + case 0b0010: + // Dest[63:32] = Src[63:32] + Dest = _VInsElement(DstSize, ElementSize, 1, 1, Dest, Src); + break; + case 0b0011: + // Dest[31:0] = Src[31:0] + // Dest[63:32] = Src[63:32] + Dest = _VInsElement(DstSize, 8, 0, 0, Dest, Src); + break; + case 0b0100: + // Dest[95:64] = Src[95:64] + Dest = _VInsElement(DstSize, ElementSize, 2, 2, Dest, Src); + break; + case 0b0101: { + // Dest[31:0] = Src[31:0] + // Dest[63:32] = Dest[63:32] + // Dest[95:64] = Src[95:64] + // Dest[127:96] = Dest[127:96] + // Rotate the elements of the incoming source so they end up in the correct location. + // Then trn2 keeps the destination results in the expected location. + auto Temp = _VRev64(DstSize, 4, Src); + Dest = _VTrn2(DstSize, ElementSize, Temp, Dest); + break; } - } - else if constexpr (ElementSize == 8) { + case 0b0110: { + // Dest[63:32] = Src[63:32] + // Dest[95:64] = Src[95:64] + auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_0110B); + Dest = _VTBX1(DstSize, Dest, Src, ConstantSwizzle); + break; + } + case 0b0111: { + // Dest[31:0] = Src[31:0] + // Dest[63:32] = Src[63:32] + // Dest[95:64] = Src[95:64] + auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_0111B); + Dest = _VTBX1(DstSize, Dest, Src, ConstantSwizzle); + break; + } + case 0b1000: + // Dest[127:96] = Src[127:96] + Dest = _VInsElement(DstSize, ElementSize, 3, 3, Dest, Src); + break; + case 0b1001: { + // Dest[31:0] = Src[31:0] + // Dest[127:96] = Src[127:96] + auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_1001B); + Dest = _VTBX1(DstSize, Dest, Src, ConstantSwizzle); + break; + } + case 0b1010: { + // Dest[31:0] = Dest[31:0] + // Dest[63:32] = Src[63:32] + // Dest[95:64] = Dest[95:64] + // Dest[127:96] = Src[127:96] + // Rotate the elements of the incoming destination so they end up in the correct location. + // Then trn2 keeps the source results in the expected location. + auto Temp = _VRev64(DstSize, 4, Dest); + Dest = _VTrn2(DstSize, ElementSize, Temp, Src); + break; + } + case 0b1011: { + // Dest[31:0] = Src[31:0] + // Dest[63:32] = Src[63:32] + // Dest[95:64] = Src[95:64] + auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_1011B); + Dest = _VTBX1(DstSize, Dest, Src, ConstantSwizzle); + break; + } + case 0b1100: + // Dest[95:64] = Src[95:64] + // Dest[127:96] = Src[127:96] + Dest = _VInsElement(DstSize, 8, 1, 1, Dest, Src); + break; + case 0b1101: { + // Dest[31:0] = Src[31:0] + // Dest[95:64] = Src[95:64] + // Dest[127:96] = Src[127:96] + auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_1101B); + Dest = _VTBX1(DstSize, Dest, Src, ConstantSwizzle); + break; + } + case 0b1110: { + // Dest[63:32] = Src[63:32] + // Dest[95:64] = Src[95:64] + // Dest[127:96] = Src[127:96] + auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_1110B); + Dest = _VTBX1(DstSize, Dest, Src, ConstantSwizzle); + break; + } + case 0b1111: + // Copy + Dest = Src; + break; + default: break; + } + } else if constexpr (ElementSize == 8) { Select &= 0b11; switch (Select) { - case 0b00: - // No-op - return; - case 0b01: - // Dest[63:0] = Src[63:0] - Dest = _VInsElement(DstSize, ElementSize, 0, 0, Dest, Src); - break; - case 0b10: - // Dest[127:64] = Src[127:64] - Dest = _VInsElement(DstSize, ElementSize, 1, 1, Dest, Src); - break; - case 0b11: - // Copy - Dest = Src; - break; + case 0b00: + // No-op + return; + case 0b01: + // Dest[63:0] = Src[63:0] + Dest = _VInsElement(DstSize, ElementSize, 0, 0, Dest, Src); + break; + case 0b10: + // Dest[127:64] = Src[127:64] + Dest = _VInsElement(DstSize, ElementSize, 1, 1, Dest, Src); + break; + case 0b11: + // Copy + Dest = Src; + break; } - } - else { + } else { // TODO: There are some of these swizzles that can be more optimal. // NamedConstant + VTBX1 is quite quick already. // Implement more if it becomes relevant. switch (Select) { - case 0b0000'0000: - // No-op - return; - case 0b0000'0001: - case 0b0000'0010: - case 0b0000'0100: - case 0b0000'1000: - case 0b0001'0000: - case 0b0010'0000: - case 0b0100'0000: - case 0b1000'0000: { - // Single 16-bit element insert. - const auto Element = FEXCore::ilog2(Select); - Dest = _VInsElement(DstSize, ElementSize, Element, Element, Dest, Src); - break; - } - case 0b0000'0011: - case 0b0000'1100: - case 0b0011'0000: - case 0b1100'0000: { - // Single 32-bit element insert. - const auto Element = std::countr_zero(Select) / 2; - Dest = _VInsElement(DstSize, 4, Element, Element, Dest, Src); - break; - } - case 0b0000'1111: - case 0b1111'0000: { - // Single 64-bit element insert. - const auto Element = std::countr_zero(Select) / 4; - Dest = _VInsElement(DstSize, 8, Element, Element, Dest, Src); - break; - } - case 0b1111'1111: - // Copy - Dest = Src; - break; - default: { - auto ConstantSwizzle = LoadAndCacheIndexedNamedVectorConstant(DstSize, FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PBLENDW, Select * 16); - Dest = _VTBX1(DstSize, Dest, Src, ConstantSwizzle); - break; - } + case 0b0000'0000: + // No-op + return; + case 0b0000'0001: + case 0b0000'0010: + case 0b0000'0100: + case 0b0000'1000: + case 0b0001'0000: + case 0b0010'0000: + case 0b0100'0000: + case 0b1000'0000: { + // Single 16-bit element insert. + const auto Element = FEXCore::ilog2(Select); + Dest = _VInsElement(DstSize, ElementSize, Element, Element, Dest, Src); + break; + } + case 0b0000'0011: + case 0b0000'1100: + case 0b0011'0000: + case 0b1100'0000: { + // Single 32-bit element insert. + const auto Element = std::countr_zero(Select) / 2; + Dest = _VInsElement(DstSize, 4, Element, Element, Dest, Src); + break; + } + case 0b0000'1111: + case 0b1111'0000: { + // Single 64-bit element insert. + const auto Element = std::countr_zero(Select) / 4; + Dest = _VInsElement(DstSize, 8, Element, Element, Dest, Src); + break; + } + case 0b1111'1111: + // Copy + Dest = Src; + break; + default: { + auto ConstantSwizzle = + LoadAndCacheIndexedNamedVectorConstant(DstSize, FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PBLENDW, Select * 16); + Dest = _VTBX1(DstSize, Dest, Src, ConstantSwizzle); + break; + } } } StoreResult(FPRClass, Op, Dest, -1); } -template -void OpDispatchBuilder::VectorBlend<2>(OpcodeArgs); -template -void OpDispatchBuilder::VectorBlend<4>(OpcodeArgs); -template -void OpDispatchBuilder::VectorBlend<8>(OpcodeArgs); +template void OpDispatchBuilder::VectorBlend<2>(OpcodeArgs); +template void OpDispatchBuilder::VectorBlend<4>(OpcodeArgs); +template void OpDispatchBuilder::VectorBlend<8>(OpcodeArgs); template void OpDispatchBuilder::VectorVariableBlend(OpcodeArgs) { auto Size = GetSrcSize(Op); - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); auto Mask = LoadXMMRegister(0); @@ -4533,37 +4102,31 @@ void OpDispatchBuilder::VectorVariableBlend(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VectorVariableBlend<1>(OpcodeArgs); -template -void OpDispatchBuilder::VectorVariableBlend<4>(OpcodeArgs); -template -void OpDispatchBuilder::VectorVariableBlend<8>(OpcodeArgs); +template void OpDispatchBuilder::VectorVariableBlend<1>(OpcodeArgs); +template void OpDispatchBuilder::VectorVariableBlend<4>(OpcodeArgs); +template void OpDispatchBuilder::VectorVariableBlend<8>(OpcodeArgs); -template +template void OpDispatchBuilder::AVXVectorVariableBlend(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); constexpr auto ElementSizeBits = ElementSize * 8; - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); LOGMAN_THROW_A_FMT(Op->Src[2].IsLiteral(), "Src[2] needs to be literal here"); const auto Src3Selector = Op->Src[2].Data.Literal.Value; // Mask register is encoded within bits [7:4] of the selector - OrderedNode *Mask = LoadXMMRegister((Src3Selector >> 4) & 0b1111); + OrderedNode* Mask = LoadXMMRegister((Src3Selector >> 4) & 0b1111); - OrderedNode *Shifted = _VSShrI(SrcSize, ElementSize, Mask, ElementSizeBits - 1); - OrderedNode *Result = _VBSL(SrcSize, Shifted, Src2, Src1); + OrderedNode* Shifted = _VSShrI(SrcSize, ElementSize, Mask, ElementSizeBits - 1); + OrderedNode* Result = _VBSL(SrcSize, Shifted, Src2, Src1); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::AVXVectorVariableBlend<1>(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorVariableBlend<4>(OpcodeArgs); -template -void OpDispatchBuilder::AVXVectorVariableBlend<8>(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorVariableBlend<1>(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorVariableBlend<4>(OpcodeArgs); +template void OpDispatchBuilder::AVXVectorVariableBlend<8>(OpcodeArgs); void OpDispatchBuilder::PTestOp(OpcodeArgs) { // Invalidate deferred flags early @@ -4571,11 +4134,11 @@ void OpDispatchBuilder::PTestOp(OpcodeArgs) { auto Size = GetSrcSize(Op); - OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Test1 = _VAnd(Size, 1, Dest, Src); - OrderedNode *Test2 = _VBic(Size, 1, Src, Dest); + OrderedNode* Test1 = _VAnd(Size, 1, Dest, Src); + OrderedNode* Test2 = _VBic(Size, 1, Src, Dest); // Element size must be less than 32-bit for the sign bit tricks. Test1 = _VUMaxV(Size, 2, Test1); @@ -4587,8 +4150,7 @@ void OpDispatchBuilder::PTestOp(OpcodeArgs) { auto ZeroConst = _Constant(0); auto OneConst = _Constant(1); - Test2 = _Select(FEXCore::IR::COND_EQ, - Test2, ZeroConst, OneConst, ZeroConst); + Test2 = _Select(FEXCore::IR::COND_EQ, Test2, ZeroConst, OneConst, ZeroConst); // Careful, these flags are different between {V,}PTEST and VTESTP{S,D} // Set ZF according to Test1. SF will be zeroed since we do a 32-bit test on @@ -4597,9 +4159,7 @@ void OpDispatchBuilder::PTestOp(OpcodeArgs) { SetNZ_ZeroCV(32, Test1); SetRFLAG(Test2); - uint32_t FlagsMaskToZero = - (1U << X86State::RFLAG_PF_RAW_LOC) | - (1U << X86State::RFLAG_AF_RAW_LOC); + uint32_t FlagsMaskToZero = (1U << X86State::RFLAG_PF_RAW_LOC) | (1U << X86State::RFLAG_AF_RAW_LOC); ZeroMultipleFlags(FlagsMaskToZero); } @@ -4609,52 +4169,48 @@ void OpDispatchBuilder::VTESTOpImpl(OpcodeArgs, size_t ElementSize) { const auto SrcSize = GetSrcSize(Op); const auto ElementSizeInBits = ElementSize * 8; - const auto MaskConstant = uint64_t{1} << (ElementSizeInBits - 1); + const auto MaskConstant = uint64_t {1} << (ElementSizeInBits - 1); - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Mask = _VDupFromGPR(SrcSize, ElementSize, _Constant(MaskConstant)); + OrderedNode* Mask = _VDupFromGPR(SrcSize, ElementSize, _Constant(MaskConstant)); - OrderedNode *AndTest = _VAnd(SrcSize, 1, Src2, Src1); - OrderedNode *AndNotTest = _VBic(SrcSize, 1, Src2, Src1); + OrderedNode* AndTest = _VAnd(SrcSize, 1, Src2, Src1); + OrderedNode* AndNotTest = _VBic(SrcSize, 1, Src2, Src1); - OrderedNode *MaskedAnd = _VAnd(SrcSize, 1, AndTest, Mask); - OrderedNode *MaskedAndNot = _VAnd(SrcSize, 1, AndNotTest, Mask); + OrderedNode* MaskedAnd = _VAnd(SrcSize, 1, AndTest, Mask); + OrderedNode* MaskedAndNot = _VAnd(SrcSize, 1, AndNotTest, Mask); - OrderedNode *MaxAnd = _VUMaxV(SrcSize, 2, MaskedAnd); - OrderedNode *MaxAndNot = _VUMaxV(SrcSize, 2, MaskedAndNot); + OrderedNode* MaxAnd = _VUMaxV(SrcSize, 2, MaskedAnd); + OrderedNode* MaxAndNot = _VUMaxV(SrcSize, 2, MaskedAndNot); - OrderedNode *AndGPR = _VExtractToGPR(SrcSize, 2, MaxAnd, 0); - OrderedNode *AndNotGPR = _VExtractToGPR(SrcSize, 2, MaxAndNot, 0); + OrderedNode* AndGPR = _VExtractToGPR(SrcSize, 2, MaxAnd, 0); + OrderedNode* AndNotGPR = _VExtractToGPR(SrcSize, 2, MaxAndNot, 0); - OrderedNode *ZeroConst = _Constant(0); - OrderedNode *OneConst = _Constant(1); + OrderedNode* ZeroConst = _Constant(0); + OrderedNode* OneConst = _Constant(1); - OrderedNode *CFResult = _Select(IR::COND_EQ, AndNotGPR, ZeroConst, - OneConst, ZeroConst); + OrderedNode* CFResult = _Select(IR::COND_EQ, AndNotGPR, ZeroConst, OneConst, ZeroConst); // As in PTest, this sets Z appropriately while zeroing the rest of NZCV. SetNZ_ZeroCV(32, AndGPR); SetRFLAG(CFResult); - ZeroMultipleFlags((1U << X86State::RFLAG_PF_RAW_LOC) | - (1U << X86State::RFLAG_AF_RAW_LOC)); + ZeroMultipleFlags((1U << X86State::RFLAG_PF_RAW_LOC) | (1U << X86State::RFLAG_AF_RAW_LOC)); } -template +template void OpDispatchBuilder::VTESTPOp(OpcodeArgs) { VTESTOpImpl(Op, ElementSize); } -template -void OpDispatchBuilder::VTESTPOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::VTESTPOp<8>(OpcodeArgs); +template void OpDispatchBuilder::VTESTPOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VTESTPOp<8>(OpcodeArgs); OrderedNode* OpDispatchBuilder::PHMINPOSUWOpImpl(OpcodeArgs) { const auto Size = GetSrcSize(Op); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); // Setup a vector swizzle // Initially load a 64-bit mask of immediates @@ -4691,12 +4247,11 @@ OrderedNode* OpDispatchBuilder::PHMINPOSUWOpImpl(OpcodeArgs) { } void OpDispatchBuilder::PHMINPOSUWOp(OpcodeArgs) { - OrderedNode *Result = PHMINPOSUWOpImpl(Op); + OrderedNode* Result = PHMINPOSUWOpImpl(Op); StoreResult(FPRClass, Op, Result, -1); } -OrderedNode* OpDispatchBuilder::DPPOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, - const X86Tables::DecodedOperand& Src2, +OrderedNode* OpDispatchBuilder::DPPOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2, const X86Tables::DecodedOperand& Imm, size_t ElementSize) { LOGMAN_THROW_A_FMT(Imm.IsLiteral(), "Imm needs to be literal here"); const uint8_t Mask = Imm.Data.Literal.Value; @@ -4719,17 +4274,17 @@ OrderedNode* OpDispatchBuilder::DPPOpImpl(OpcodeArgs, const X86Tables::DecodedOp }(); const auto DstSize = GetDstSize(Op); - OrderedNode *ZeroVec = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); + OrderedNode* ZeroVec = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); if (SrcMask == 0 || DstMask == 0) { // What are you even doing here? Go away. return ZeroVec; } - OrderedNode *Src1V = LoadSource(FPRClass, Op, Src1, Op->Flags); - OrderedNode *Src2V = LoadSource(FPRClass, Op, Src2, Op->Flags); + OrderedNode* Src1V = LoadSource(FPRClass, Op, Src1, Op->Flags); + OrderedNode* Src2V = LoadSource(FPRClass, Op, Src2, Op->Flags); // First step is to do an FMUL - OrderedNode *Temp = _VFMul(DstSize, ElementSize, Src1V, Src2V); + OrderedNode* Temp = _VFMul(DstSize, ElementSize, Src1V, Src2V); // Now mask results based on IndexMask. if (SrcMask != SizeMask) { @@ -4744,27 +4299,25 @@ OrderedNode* OpDispatchBuilder::DPPOpImpl(OpcodeArgs, const X86Tables::DecodedOp // It can duplicate and zero results if (ElementSize == 8) { switch (DstMask) { - case 0b01: - // Dest[63:0] = Result - // Dest[127:64] = Zero - return _VZip(DstSize, ElementSize, Temp, ZeroVec); - case 0b10: - // Dest[63:0] = Zero - // Dest[127:64] = Result - return _VZip(DstSize, ElementSize, ZeroVec, Temp); - case 0b11: - // Broadcast - // Dest[63:0] = Result - // Dest[127:64] = Result - return _VDupElement(DstSize, ElementSize, Temp, 0); - case 0: - default: - LOGMAN_MSG_A_FMT("Unsupported"); + case 0b01: + // Dest[63:0] = Result + // Dest[127:64] = Zero + return _VZip(DstSize, ElementSize, Temp, ZeroVec); + case 0b10: + // Dest[63:0] = Zero + // Dest[127:64] = Result + return _VZip(DstSize, ElementSize, ZeroVec, Temp); + case 0b11: + // Broadcast + // Dest[63:0] = Result + // Dest[127:64] = Result + return _VDupElement(DstSize, ElementSize, Temp, 0); + case 0: + default: LOGMAN_MSG_A_FMT("Unsupported"); } - } - else { + } else { auto BadPath = [&]() { - OrderedNode *Result = ZeroVec; + OrderedNode* Result = ZeroVec; for (size_t i = 0; i < (DstSize / ElementSize); ++i) { const auto Bit = 1U << (i % 4); @@ -4777,106 +4330,105 @@ OrderedNode* OpDispatchBuilder::DPPOpImpl(OpcodeArgs, const X86Tables::DecodedOp return Result; }; switch (DstMask) { - case 0b0001: - // Dest[31:0] = Result - // Dest[63:32] = Zero - // Dest[95:64] = Zero - // Dest[127:96] = Zero - return _VZip(DstSize, ElementSize, Temp, ZeroVec); - case 0b0010: - // Dest[31:0] = Zero - // Dest[63:32] = Result - // Dest[95:64] = Zero - // Dest[127:96] = Zero - return _VZip(DstSize / 2, ElementSize, ZeroVec, Temp); - case 0b0011: - // Dest[31:0] = Result - // Dest[63:32] = Result - // Dest[95:64] = Zero - // Dest[127:96] = Zero - return _VDupElement(DstSize / 2, ElementSize, Temp, 0); - case 0b0100: - // Dest[31:0] = Zero - // Dest[63:32] = Zero - // Dest[95:64] = Result - // Dest[127:96] = Zero - return _VZip(DstSize, 8, ZeroVec, Temp); - case 0b0101: - // Dest[31:0] = Result - // Dest[63:32] = Zero - // Dest[95:64] = Result - // Dest[127:96] = Zero - return _VZip(DstSize, 8, Temp, Temp); - case 0b0110: - // Dest[31:0] = Zero - // Dest[63:32] = Result - // Dest[95:64] = Result - // Dest[127:96] = Zero - return BadPath(); - case 0b0111: - // Dest[31:0] = Result - // Dest[63:32] = Result - // Dest[95:64] = Result - // Dest[127:96] = Zero - Temp = _VDupElement(DstSize, ElementSize, Temp, 0); - return _VInsElement(DstSize, ElementSize, 3, 0, Temp, ZeroVec); - case 0b1000: - // Dest[31:0] = Zero - // Dest[63:32] = Zero - // Dest[95:64] = Zero - // Dest[127:96] = Result - return _VExtr(DstSize, 1, Temp, ZeroVec, 4); - case 0b1001: - // Dest[31:0] = Result - // Dest[63:32] = Zero - // Dest[95:64] = Zero - // Dest[127:96] = Result - return BadPath(); - case 0b1010: - // Dest[31:0] = Zero - // Dest[63:32] = Result - // Dest[95:64] = Zero - // Dest[127:96] = Result - Temp = _VDupElement(DstSize, ElementSize, Temp, 0); - return _VZip(DstSize, 4, ZeroVec, Temp); - case 0b1011: - // Dest[31:0] = Result - // Dest[63:32] = Result - // Dest[95:64] = Zero - // Dest[127:96] = Result - Temp = _VDupElement(DstSize, ElementSize, Temp, 0); - return _VInsElement(DstSize, ElementSize, 2, 0, Temp, ZeroVec); - case 0b1100: - // Dest[31:0] = Zero - // Dest[63:32] = Zero - // Dest[95:64] = Result - // Dest[127:96] = Result - Temp = _VDupElement(DstSize, ElementSize, Temp, 0); - return _VZip(DstSize, 8, ZeroVec, Temp); - case 0b1101: - // Dest[31:0] = Result - // Dest[63:32] = Zero - // Dest[95:64] = Result - // Dest[127:96] = Result - Temp = _VDupElement(DstSize, ElementSize, Temp, 0); - return _VInsElement(DstSize, ElementSize, 1, 0, Temp, ZeroVec); - case 0b1110: - // Dest[31:0] = Zero - // Dest[63:32] = Result - // Dest[95:64] = Result - // Dest[127:96] = Result - Temp = _VDupElement(DstSize, ElementSize, Temp, 0); - return _VInsElement(DstSize, ElementSize, 0, 0, Temp, ZeroVec); - case 0b1111: - // Broadcast - // Dest[31:0] = Result - // Dest[63:32] = Zero - // Dest[95:64] = Zero - // Dest[127:96] = Zero - return _VDupElement(DstSize, ElementSize, Temp, 0); - case 0: - default: - LOGMAN_MSG_A_FMT("Unsupported"); + case 0b0001: + // Dest[31:0] = Result + // Dest[63:32] = Zero + // Dest[95:64] = Zero + // Dest[127:96] = Zero + return _VZip(DstSize, ElementSize, Temp, ZeroVec); + case 0b0010: + // Dest[31:0] = Zero + // Dest[63:32] = Result + // Dest[95:64] = Zero + // Dest[127:96] = Zero + return _VZip(DstSize / 2, ElementSize, ZeroVec, Temp); + case 0b0011: + // Dest[31:0] = Result + // Dest[63:32] = Result + // Dest[95:64] = Zero + // Dest[127:96] = Zero + return _VDupElement(DstSize / 2, ElementSize, Temp, 0); + case 0b0100: + // Dest[31:0] = Zero + // Dest[63:32] = Zero + // Dest[95:64] = Result + // Dest[127:96] = Zero + return _VZip(DstSize, 8, ZeroVec, Temp); + case 0b0101: + // Dest[31:0] = Result + // Dest[63:32] = Zero + // Dest[95:64] = Result + // Dest[127:96] = Zero + return _VZip(DstSize, 8, Temp, Temp); + case 0b0110: + // Dest[31:0] = Zero + // Dest[63:32] = Result + // Dest[95:64] = Result + // Dest[127:96] = Zero + return BadPath(); + case 0b0111: + // Dest[31:0] = Result + // Dest[63:32] = Result + // Dest[95:64] = Result + // Dest[127:96] = Zero + Temp = _VDupElement(DstSize, ElementSize, Temp, 0); + return _VInsElement(DstSize, ElementSize, 3, 0, Temp, ZeroVec); + case 0b1000: + // Dest[31:0] = Zero + // Dest[63:32] = Zero + // Dest[95:64] = Zero + // Dest[127:96] = Result + return _VExtr(DstSize, 1, Temp, ZeroVec, 4); + case 0b1001: + // Dest[31:0] = Result + // Dest[63:32] = Zero + // Dest[95:64] = Zero + // Dest[127:96] = Result + return BadPath(); + case 0b1010: + // Dest[31:0] = Zero + // Dest[63:32] = Result + // Dest[95:64] = Zero + // Dest[127:96] = Result + Temp = _VDupElement(DstSize, ElementSize, Temp, 0); + return _VZip(DstSize, 4, ZeroVec, Temp); + case 0b1011: + // Dest[31:0] = Result + // Dest[63:32] = Result + // Dest[95:64] = Zero + // Dest[127:96] = Result + Temp = _VDupElement(DstSize, ElementSize, Temp, 0); + return _VInsElement(DstSize, ElementSize, 2, 0, Temp, ZeroVec); + case 0b1100: + // Dest[31:0] = Zero + // Dest[63:32] = Zero + // Dest[95:64] = Result + // Dest[127:96] = Result + Temp = _VDupElement(DstSize, ElementSize, Temp, 0); + return _VZip(DstSize, 8, ZeroVec, Temp); + case 0b1101: + // Dest[31:0] = Result + // Dest[63:32] = Zero + // Dest[95:64] = Result + // Dest[127:96] = Result + Temp = _VDupElement(DstSize, ElementSize, Temp, 0); + return _VInsElement(DstSize, ElementSize, 1, 0, Temp, ZeroVec); + case 0b1110: + // Dest[31:0] = Zero + // Dest[63:32] = Result + // Dest[95:64] = Result + // Dest[127:96] = Result + Temp = _VDupElement(DstSize, ElementSize, Temp, 0); + return _VInsElement(DstSize, ElementSize, 0, 0, Temp, ZeroVec); + case 0b1111: + // Broadcast + // Dest[31:0] = Result + // Dest[63:32] = Zero + // Dest[95:64] = Zero + // Dest[127:96] = Zero + return _VDupElement(DstSize, ElementSize, Temp, 0); + case 0: + default: LOGMAN_MSG_A_FMT("Unsupported"); } } FEX_UNREACHABLE; @@ -4884,18 +4436,15 @@ OrderedNode* OpDispatchBuilder::DPPOpImpl(OpcodeArgs, const X86Tables::DecodedOp template void OpDispatchBuilder::DPPOp(OpcodeArgs) { - OrderedNode *Result = DPPOpImpl(Op, Op->Dest, Op->Src[0], Op->Src[1], ElementSize); + OrderedNode* Result = DPPOpImpl(Op, Op->Dest, Op->Src[0], Op->Src[1], ElementSize); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::DPPOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::DPPOp<8>(OpcodeArgs); +template void OpDispatchBuilder::DPPOp<4>(OpcodeArgs); +template void OpDispatchBuilder::DPPOp<8>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::VDPPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, - const X86Tables::DecodedOperand& Src2, - const X86Tables::DecodedOperand& Imm) { +OrderedNode* OpDispatchBuilder::VDPPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2, + const X86Tables::DecodedOperand& Imm) { LOGMAN_THROW_A_FMT(Imm.IsLiteral(), "Imm needs to be literal here"); constexpr size_t ElementSize = 4; const uint8_t Mask = Imm.Data.Literal.Value; @@ -4904,13 +4453,13 @@ OrderedNode* OpDispatchBuilder::VDPPSOpImpl(OpcodeArgs, const X86Tables::Decoded const auto DstSize = GetDstSize(Op); - OrderedNode *Src1V = LoadSource(FPRClass, Op, Src1, Op->Flags); - OrderedNode *Src2V = LoadSource(FPRClass, Op, Src2, Op->Flags); + OrderedNode* Src1V = LoadSource(FPRClass, Op, Src1, Op->Flags); + OrderedNode* Src2V = LoadSource(FPRClass, Op, Src2, Op->Flags); - OrderedNode *ZeroVec = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); + OrderedNode* ZeroVec = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); // First step is to do an FMUL - OrderedNode *Temp = _VFMul(DstSize, ElementSize, Src1V, Src2V); + OrderedNode* Temp = _VFMul(DstSize, ElementSize, Src1V, Src2V); // Now we zero out elements based on src mask for (size_t i = 0; i < (DstSize / ElementSize); ++i) { @@ -4932,7 +4481,7 @@ OrderedNode* OpDispatchBuilder::VDPPSOpImpl(OpcodeArgs, const X86Tables::Decoded // Now using the destination mask we choose where the result ends up // It can duplicate and zero results - OrderedNode *Result = ZeroVec; + OrderedNode* Result = ZeroVec; for (size_t i = 0; i < (DstSize / ElementSize); ++i) { const auto Bit = 1U << (i % 4); @@ -4945,16 +4494,15 @@ OrderedNode* OpDispatchBuilder::VDPPSOpImpl(OpcodeArgs, const X86Tables::Decoded return Result; } -template +template void OpDispatchBuilder::VDPPOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); - OrderedNode *Result{}; + OrderedNode* Result {}; if (ElementSize == 4 && DstSize == Core::CPUState::XMM_AVX_REG_SIZE) { // 256-bit DPPS isn't handled by the 128-bit solution. Result = VDPPSOpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2]); - } - else { + } else { Result = DPPOpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2], ElementSize); } @@ -4963,30 +4511,26 @@ void OpDispatchBuilder::VDPPOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VDPPOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::VDPPOp<8>(OpcodeArgs); +template void OpDispatchBuilder::VDPPOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VDPPOp<8>(OpcodeArgs); -OrderedNode* OpDispatchBuilder::MPSADBWOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1Op, - const X86Tables::DecodedOperand& Src2Op, +OrderedNode* OpDispatchBuilder::MPSADBWOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, const X86Tables::DecodedOperand& ImmOp) { - const auto LaneHelper = [&, this](uint32_t Selector_Src1, uint32_t Selector_Src2, - OrderedNode *Src1, OrderedNode *Src2) { + const auto LaneHelper = [&, this](uint32_t Selector_Src1, uint32_t Selector_Src2, OrderedNode* Src1, OrderedNode* Src2) { // Src2 will grab a 32bit element and duplicate it across the 128bits - OrderedNode *DupSrc = _VDupElement(16, 4, Src2, Selector_Src2); + OrderedNode* DupSrc = _VDupElement(16, 4, Src2, Selector_Src2); // Src1/Dest needs a bunch of magic // Shift right by selected bytes // This will give us Dest[15:0], and Dest[79:64] - OrderedNode *Dest1 = _VExtr(16, 1, Src1, Src1, Selector_Src1 + 0); + OrderedNode* Dest1 = _VExtr(16, 1, Src1, Src1, Selector_Src1 + 0); // This will give us Dest[31:16], and Dest[95:80] - OrderedNode *Dest2 = _VExtr(16, 1, Src1, Src1, Selector_Src1 + 1); + OrderedNode* Dest2 = _VExtr(16, 1, Src1, Src1, Selector_Src1 + 1); // This will give us Dest[47:32], and Dest[111:96] - OrderedNode *Dest3 = _VExtr(16, 1, Src1, Src1, Selector_Src1 + 2); + OrderedNode* Dest3 = _VExtr(16, 1, Src1, Src1, Selector_Src1 + 2); // This will give us Dest[63:48], and Dest[127:112] - OrderedNode *Dest4 = _VExtr(16, 1, Src1, Src1, Selector_Src1 + 3); + OrderedNode* Dest4 = _VExtr(16, 1, Src1, Src1, Selector_Src1 + 3); // For each shifted section, we now have two 32-bit values per vector that can be used // Dest1.S[0] and Dest1.S[1] = Bytes - 0,1,2,3:4,5,6,7 @@ -5047,10 +4591,10 @@ OrderedNode* OpDispatchBuilder::MPSADBWOpImpl(OpcodeArgs, const X86Tables::Decod const uint8_t Select_Src1_Low = ((Select & 0b100) >> 2) * 32 / 8; const uint8_t Select_Src2_Low = Select & 0b11; - OrderedNode *Src1 = LoadSource(FPRClass, Op, Src1Op, Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Src2Op, Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Src1Op, Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Src2Op, Op->Flags); - OrderedNode *Lower = LaneHelper(Select_Src1_Low, Select_Src2_Low, Src1, Src2); + OrderedNode* Lower = LaneHelper(Select_Src1_Low, Select_Src2_Low, Src1, Src2); if (Is128Bit) { return Lower; } @@ -5058,54 +4602,52 @@ OrderedNode* OpDispatchBuilder::MPSADBWOpImpl(OpcodeArgs, const X86Tables::Decod const uint8_t Select_Src1_High = ((Select & 0b100000) >> 5) * 32 / 8; const uint8_t Select_Src2_High = (Select & 0b11000) >> 3; - OrderedNode *UpperSrc1 = _VDupElement(32, 16, Src1, 1); - OrderedNode *UpperSrc2 = _VDupElement(32, 16, Src2, 1); - OrderedNode *Upper = LaneHelper(Select_Src1_High, Select_Src2_High, UpperSrc1, UpperSrc2); + OrderedNode* UpperSrc1 = _VDupElement(32, 16, Src1, 1); + OrderedNode* UpperSrc2 = _VDupElement(32, 16, Src2, 1); + OrderedNode* Upper = LaneHelper(Select_Src1_High, Select_Src2_High, UpperSrc1, UpperSrc2); return _VInsElement(32, 16, 1, 0, Lower, Upper); } void OpDispatchBuilder::MPSADBWOp(OpcodeArgs) { - OrderedNode *Result = MPSADBWOpImpl(Op, Op->Dest, Op->Src[0], Op->Src[1]); + OrderedNode* Result = MPSADBWOpImpl(Op, Op->Dest, Op->Src[0], Op->Src[1]); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::VMPSADBWOp(OpcodeArgs) { - OrderedNode *Result = MPSADBWOpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2]); + OrderedNode* Result = MPSADBWOpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2]); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::VINSERTOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], 16, Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], 16, Op->Flags); LOGMAN_THROW_A_FMT(Op->Src[2].IsLiteral(), "Src2 needs to be literal here"); const auto Selector = Op->Src[2].Data.Literal.Value & 1; - OrderedNode *Result = _VInsElement(DstSize, 16, Selector, 0, Src1, Src2); + OrderedNode* Result = _VInsElement(DstSize, 16, Selector, 0, Src1, Src2); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::VPERM2Op(OpcodeArgs) { const auto DstSize = GetDstSize(Op); - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); LOGMAN_THROW_A_FMT(Op->Src[2].IsLiteral(), "Src2 needs to be literal here"); const auto Selector = Op->Src[2].Data.Literal.Value; - OrderedNode *Result = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); + OrderedNode* Result = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); const auto SelectElement = [&](uint64_t Index, uint64_t SelectorIdx) { switch (SelectorIdx) { - case 0: - case 1: - return _VInsElement(DstSize, 16, Index, SelectorIdx, Result, Src1); - case 2: - case 3: - default: - return _VInsElement(DstSize, 16, Index, SelectorIdx - 2, Result, Src2); + case 0: + case 1: return _VInsElement(DstSize, 16, Index, SelectorIdx, Result, Src1); + case 2: + case 3: + default: return _VInsElement(DstSize, 16, Index, SelectorIdx - 2, Result, Src2); } }; @@ -5122,12 +4664,12 @@ void OpDispatchBuilder::VPERM2Op(OpcodeArgs) { void OpDispatchBuilder::VPERMDOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); - OrderedNode *Indices = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Indices = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); // Get rid of any junk unrelated to the relevant selector index bits (bits [2:0]) - OrderedNode *IndexMask = _VectorImm(DstSize, 4, 0b111); - OrderedNode *SanitizedIndices = _VAnd(DstSize, 1, Indices, IndexMask); + OrderedNode* IndexMask = _VectorImm(DstSize, 4, 0b111); + OrderedNode* SanitizedIndices = _VAnd(DstSize, 1, Indices, IndexMask); // Build up the broadcasted index mask. e.g. On x86-64, the selector index // is always in the lower 3 bits of a 32-bit element. However, in order to @@ -5159,8 +4701,8 @@ void OpDispatchBuilder::VPERMDOp(OpcodeArgs) { // // Cool! We now have everything we need to take this further. - OrderedNode *IndexTrn1 = _VTrn(DstSize, 1, SanitizedIndices, SanitizedIndices); - OrderedNode *IndexTrn2 = _VTrn(DstSize, 2, IndexTrn1, IndexTrn1); + OrderedNode* IndexTrn1 = _VTrn(DstSize, 1, SanitizedIndices, SanitizedIndices); + OrderedNode* IndexTrn2 = _VTrn(DstSize, 2, IndexTrn1, IndexTrn1); // Now that we have the indices set up, now we need to multiply each // element by 4 to convert the elements into byte indices rather than @@ -5168,10 +4710,10 @@ void OpDispatchBuilder::VPERMDOp(OpcodeArgs) { // // e.g. We turn our vector into: // ╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗ - // ║ 16 ║║ 16 ║║ 16 ║║ 16 ║║ 4 ║║ 4 ║║ 4 ║║ 4 ║║ 8 ║║ 8 ║║ 8 ║║ 8 ║║ 24 ║║ 24 ║║ 24 ║║ 24 ║║ 28 ║║ 28 ║║ 28 ║║ 28 ║║ 0 ║║ 0 ║║ 0 ║║ 0 ║║ 12 ║║ 12 ║║ 12 ║║ 12 ║║ 20 ║║ 20 ║║ 20 ║║ 20 ║ + // ║ 16 ║║ 16 ║║ 16 ║║ 16 ║║ 4 ║║ 4 ║║ 4 ║║ 4 ║║ 8 ║║ 8 ║║ 8 ║║ 8 ║║ 24 ║║ 24 ║║ 24 ║║ 24 ║║ 28 ║║ 28 ║║ 28 ║║ 28 ║║ 0 ║║ 0 ║║ 0 ║║ 0 ║║ 12 ║║ 12 ║║ 12 ║║ 12 ║║ 20 ║║ 20 ║║ 20 ║║ 20 ║ // ╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝ // - OrderedNode *ShiftedIndices = _VShlI(DstSize, 1, IndexTrn2, 2); + OrderedNode* ShiftedIndices = _VShlI(DstSize, 1, IndexTrn2, 2); // Now we need to add a byte vector containing [3, 2, 1, 0] repeating for the // entire length of it, to the index register, so that we specify the bytes @@ -5180,28 +4722,28 @@ void OpDispatchBuilder::VPERMDOp(OpcodeArgs) { // e.g. Our vector finally looks like so: // // ╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗ - // ║ 19 ║║ 18 ║║ 17 ║║ 16 ║║ 7 ║║ 6 ║║ 5 ║║ 4 ║║ 11 ║║ 10 ║║ 9 ║║ 8 ║║ 27 ║║ 26 ║║ 25 ║║ 24 ║║ 31 ║║ 30 ║║ 29 ║║ 28 ║║ 3 ║║ 2 ║║ 1 ║║ 0 ║║ 15 ║║ 14 ║║ 13 ║║ 12 ║║ 23 ║║ 22 ║║ 21 ║║ 20 ║ + // ║ 19 ║║ 18 ║║ 17 ║║ 16 ║║ 7 ║║ 6 ║║ 5 ║║ 4 ║║ 11 ║║ 10 ║║ 9 ║║ 8 ║║ 27 ║║ 26 ║║ 25 ║║ 24 ║║ 31 ║║ 30 ║║ 29 ║║ 28 ║║ 3 ║║ 2 ║║ 1 ║║ 0 ║║ 15 ║║ 14 ║║ 13 ║║ 12 ║║ 23 ║║ 22 ║║ 21 ║║ 20 ║ // ╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝ // // Which finally lets us permute the source vector and be done with everything. - OrderedNode *AddConst = _Constant(0x03020100); - OrderedNode *AddVector = _VDupFromGPR(DstSize, 4, AddConst); - OrderedNode *FinalIndices = _VAdd(DstSize, 1, ShiftedIndices, AddVector); + OrderedNode* AddConst = _Constant(0x03020100); + OrderedNode* AddVector = _VDupFromGPR(DstSize, 4, AddConst); + OrderedNode* FinalIndices = _VAdd(DstSize, 1, ShiftedIndices, AddVector); // Now lets finally shuffle this bad boy around. - OrderedNode *Result = _VTBL1(DstSize, Src, FinalIndices); + OrderedNode* Result = _VTBL1(DstSize, Src, FinalIndices); StoreResult(FPRClass, Op, Result, -1); } void OpDispatchBuilder::VPERMQOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); const auto Selector = Op->Src[1].Data.Literal.Value; - OrderedNode *Result{}; + OrderedNode* Result {}; // If we're just broadcasting one element in particular across the vector // then this can be done fairly simply without any individual inserts. @@ -5218,11 +4760,11 @@ void OpDispatchBuilder::VPERMQOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -static OrderedNode* VBLENDOpImpl(IREmitter& IR, uint32_t VecSize, uint32_t ElementSize, - OrderedNode *Src1, OrderedNode *Src2, OrderedNode *ZeroRegister, uint64_t Selector) { - const std::array Sources{Src1, Src2}; +static OrderedNode* VBLENDOpImpl(IREmitter& IR, uint32_t VecSize, uint32_t ElementSize, OrderedNode* Src1, OrderedNode* Src2, + OrderedNode* ZeroRegister, uint64_t Selector) { + const std::array Sources {Src1, Src2}; - OrderedNode *Result = ZeroRegister; + OrderedNode* Result = ZeroRegister; const int NumElements = VecSize / ElementSize; for (int i = 0; i < NumElements; i++) { const auto SelectorIndex = (Selector >> i) & 1; @@ -5237,26 +4779,26 @@ void OpDispatchBuilder::VBLENDPDOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); LOGMAN_THROW_A_FMT(Op->Src[2].IsLiteral(), "Src[2] needs to be literal here"); const auto Selector = Op->Src[2].Data.Literal.Value; if (Selector == 0) { - OrderedNode *Result = Is256Bit ? Src1 : _VMov(16, Src1); + OrderedNode* Result = Is256Bit ? Src1 : _VMov(16, Src1); StoreResult(FPRClass, Op, Result, -1); return; } // Only the first four bits of the 8-bit immediate are used, so only check them. if (((Selector & 0b11) == 0b11 && !Is256Bit) || (Selector & 0b1111) == 0b1111) { - OrderedNode *Result = Is256Bit ? Src2 : _VMov(16, Src2); + OrderedNode* Result = Is256Bit ? Src2 : _VMov(16, Src2); StoreResult(FPRClass, Op, Result, -1); return; } const auto ZeroRegister = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); - OrderedNode *Result = VBLENDOpImpl(*this, DstSize, 8, Src1, Src2, ZeroRegister, Selector); + OrderedNode* Result = VBLENDOpImpl(*this, DstSize, 8, Src1, Src2, ZeroRegister, Selector); StoreResult(FPRClass, Op, Result, -1); } @@ -5264,8 +4806,8 @@ void OpDispatchBuilder::VPBLENDDOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); LOGMAN_THROW_A_FMT(Op->Src[2].IsLiteral(), "Src[2] needs to be literal here"); const auto Selector = Op->Src[2].Data.Literal.Value; @@ -5299,7 +4841,7 @@ void OpDispatchBuilder::VPBLENDDOp(OpcodeArgs) { } const auto ZeroRegister = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); - OrderedNode *Result = VBLENDOpImpl(*this, DstSize, 4, Src1, Src2, ZeroRegister, Selector); + OrderedNode* Result = VBLENDOpImpl(*this, DstSize, 4, Src1, Src2, ZeroRegister, Selector); if (!Is256Bit) { Result = _VMov(16, Result); } @@ -5310,19 +4852,19 @@ void OpDispatchBuilder::VPBLENDWOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; - OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); LOGMAN_THROW_A_FMT(Op->Src[2].IsLiteral(), "Src[2] needs to be literal here"); const auto Selector = Op->Src[2].Data.Literal.Value; if (Selector == 0) { - OrderedNode *Result = Is128Bit ? _VMov(16, Src1) : Src1; + OrderedNode* Result = Is128Bit ? _VMov(16, Src1) : Src1; StoreResult(FPRClass, Op, Result, -1); return; } if (Selector == 0xFF) { - OrderedNode *Result = Is128Bit ? _VMov(16, Src2) : Src2; + OrderedNode* Result = Is128Bit ? _VMov(16, Src2) : Src2; StoreResult(FPRClass, Op, Result, -1); return; } @@ -5333,7 +4875,7 @@ void OpDispatchBuilder::VPBLENDWOp(OpcodeArgs) { const auto NewSelector = Selector << 8 | Selector; const auto ZeroRegister = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); - OrderedNode *Result = VBLENDOpImpl(*this, DstSize, 2, Src1, Src2, ZeroRegister, NewSelector); + OrderedNode* Result = VBLENDOpImpl(*this, DstSize, 2, Src1, Src2, ZeroRegister, NewSelector); if (Is128Bit) { Result = _VMov(16, Result); } @@ -5365,7 +4907,7 @@ void OpDispatchBuilder::VZEROOp(OpcodeArgs) { } } -template +template void OpDispatchBuilder::VPERMILImmOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; @@ -5373,8 +4915,8 @@ void OpDispatchBuilder::VPERMILImmOp(OpcodeArgs) { LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); const auto Selector = Op->Src[1].Data.Literal.Value & 0xFF; - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Result = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Result = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); if constexpr (ElementSize == 8) { Result = _VInsElement(DstSize, ElementSize, 0, Selector & 0b0001, Result, Src); @@ -5401,12 +4943,10 @@ void OpDispatchBuilder::VPERMILImmOp(OpcodeArgs) { StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VPERMILImmOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::VPERMILImmOp<8>(OpcodeArgs); +template void OpDispatchBuilder::VPERMILImmOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VPERMILImmOp<8>(OpcodeArgs); -template +template void OpDispatchBuilder::VPERMILRegOp(OpcodeArgs) { // NOTE: See implementation of VPERMD for the gist of what we do to make this work. // @@ -5417,54 +4957,52 @@ void OpDispatchBuilder::VPERMILRegOp(OpcodeArgs) { const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; constexpr auto IsPD = ElementSize == 8; - const auto SanitizeIndices = [&](OrderedNode *Indices) { + const auto SanitizeIndices = [&](OrderedNode* Indices) { const auto ShiftAmount = 0b11 >> static_cast(IsPD); - OrderedNode *IndexMask = _VectorImm(DstSize, ElementSize, ShiftAmount); + OrderedNode* IndexMask = _VectorImm(DstSize, ElementSize, ShiftAmount); return _VAnd(DstSize, 1, Indices, IndexMask); }; - OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - OrderedNode *Indices = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); + OrderedNode* Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* Indices = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); if constexpr (IsPD) { // VPERMILPD stores the selector in the second bit, rather than the // first bit of each element in the index vector. So move it over by one. Indices = _VUShrI(DstSize, ElementSize, Indices, 1); } - OrderedNode *SanitizedIndices = SanitizeIndices(Indices); - OrderedNode *IndexTrn1 = _VTrn(DstSize, 1, SanitizedIndices, SanitizedIndices); - OrderedNode *IndexTrn2 = _VTrn(DstSize, 2, IndexTrn1, IndexTrn1); - OrderedNode *IndexTrn3 = IndexTrn2; + OrderedNode* SanitizedIndices = SanitizeIndices(Indices); + OrderedNode* IndexTrn1 = _VTrn(DstSize, 1, SanitizedIndices, SanitizedIndices); + OrderedNode* IndexTrn2 = _VTrn(DstSize, 2, IndexTrn1, IndexTrn1); + OrderedNode* IndexTrn3 = IndexTrn2; if constexpr (IsPD) { IndexTrn3 = _VTrn(DstSize, 4, IndexTrn2, IndexTrn2); } constexpr auto IndexShift = IsPD ? 3 : 2; - OrderedNode *ShiftedIndices = _VShlI(DstSize, 1, IndexTrn3, IndexShift); + OrderedNode* ShiftedIndices = _VShlI(DstSize, 1, IndexTrn3, IndexShift); constexpr uint64_t VConstant = IsPD ? 0x0706050403020100 : 0x03020100; - OrderedNode *VectorConst = _VDupFromGPR(DstSize, ElementSize, _Constant(VConstant)); - OrderedNode *FinalIndices{}; + OrderedNode* VectorConst = _VDupFromGPR(DstSize, ElementSize, _Constant(VConstant)); + OrderedNode* FinalIndices {}; if (Is256Bit) { const auto ZeroRegister = LoadAndCacheNamedVectorConstant(DstSize, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); - OrderedNode *Vector16 = _VInsElement(DstSize, 16, 1, 0, ZeroRegister, _VectorImm(DstSize, 1, 16)); - OrderedNode *IndexOffsets = _VAdd(DstSize, 1, VectorConst, Vector16); + OrderedNode* Vector16 = _VInsElement(DstSize, 16, 1, 0, ZeroRegister, _VectorImm(DstSize, 1, 16)); + OrderedNode* IndexOffsets = _VAdd(DstSize, 1, VectorConst, Vector16); FinalIndices = _VAdd(DstSize, 1, IndexOffsets, ShiftedIndices); } else { FinalIndices = _VAdd(DstSize, 1, VectorConst, ShiftedIndices); } - OrderedNode *Result = _VTBL1(DstSize, Src, FinalIndices); + OrderedNode* Result = _VTBL1(DstSize, Src, FinalIndices); StoreResult(FPRClass, Op, Result, -1); } -template -void OpDispatchBuilder::VPERMILRegOp<4>(OpcodeArgs); -template -void OpDispatchBuilder::VPERMILRegOp<8>(OpcodeArgs); +template void OpDispatchBuilder::VPERMILRegOp<4>(OpcodeArgs); +template void OpDispatchBuilder::VPERMILRegOp<8>(OpcodeArgs); void OpDispatchBuilder::PCMPXSTRXOpImpl(OpcodeArgs, bool IsExplicit, bool IsMask) { LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src[1] needs to be a literal"); @@ -5480,11 +5018,10 @@ void OpDispatchBuilder::PCMPXSTRXOpImpl(OpcodeArgs, bool IsExplicit, bool IsMask // instructions in the Intel Software Development Manual). // // So, we specify Src2 as having an alignment of 1 to indicate this. - OrderedNode *Src1 = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, 16, Op->Flags); - OrderedNode *Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], 16, Op->Flags, - {.Align = 1}); + OrderedNode* Src1 = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, 16, Op->Flags); + OrderedNode* Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], 16, Op->Flags, {.Align = 1}); - OrderedNode *IntermediateResult{}; + OrderedNode* IntermediateResult {}; if (IsExplicit) { // Will be 4 in the absence of a REX.W bit and 8 in the presence of a REX.W bit. // @@ -5495,15 +5032,15 @@ void OpDispatchBuilder::PCMPXSTRXOpImpl(OpcodeArgs, bool IsExplicit, bool IsMask const auto Is64Bit = SrcSize == 8; const auto NewControl = uint16_t(Control | (uint16_t(Is64Bit) << 8)); - OrderedNode *SrcRAX = LoadGPRRegister(X86State::REG_RAX); - OrderedNode *SrcRDX = LoadGPRRegister(X86State::REG_RDX); + OrderedNode* SrcRAX = LoadGPRRegister(X86State::REG_RAX); + OrderedNode* SrcRDX = LoadGPRRegister(X86State::REG_RDX); IntermediateResult = _VPCMPESTRX(Src1, Src2, SrcRAX, SrcRDX, NewControl); } else { IntermediateResult = _VPCMPISTRX(Src1, Src2, Control); } - OrderedNode *ZeroConst = _Constant(0); + OrderedNode* ZeroConst = _Constant(0); if (IsMask) { // For the masked variant of the instructions, if control[6] is set, then we @@ -5519,12 +5056,12 @@ void OpDispatchBuilder::PCMPXSTRXOpImpl(OpcodeArgs, bool IsExplicit, bool IsMask if (IsExpandedMask) { // We need to iterate over the intermediate result and // expand the mask into XMM0 elements. - const auto ElementSize = 1U << (Control & 1); + const auto ElementSize = 1U << (Control & 1); const auto NumElements = 16U >> (Control & 1); - OrderedNode *Result = LoadAndCacheNamedVectorConstant(Core::CPUState::XMM_SSE_REG_SIZE, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); + OrderedNode* Result = LoadAndCacheNamedVectorConstant(Core::CPUState::XMM_SSE_REG_SIZE, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO); for (uint32_t i = 0; i < NumElements; i++) { - OrderedNode *SignBit = _Sbfe(OpSize::i64Bit, 1, i, IntermediateResult); + OrderedNode* SignBit = _Sbfe(OpSize::i64Bit, 1, i, IntermediateResult); Result = _VInsGPR(Core::CPUState::XMM_SSE_REG_SIZE, ElementSize, i, Result, SignBit); } StoreXMMRegister(0, Result); @@ -5538,14 +5075,12 @@ void OpDispatchBuilder::PCMPXSTRXOpImpl(OpcodeArgs, bool IsExplicit, bool IsMask // then we store the least significant bit. const auto UseMSBIndex = (Control & 0b0100'0000) != 0; - OrderedNode *ResultNoFlags = _Bfe(OpSize::i32Bit, 16, 0, IntermediateResult); + OrderedNode* ResultNoFlags = _Bfe(OpSize::i32Bit, 16, 0, IntermediateResult); - OrderedNode *IfZero = _Constant(16 >> (Control & 1)); - OrderedNode *IfNotZero = UseMSBIndex ? _FindMSB(IR::OpSize::i32Bit, ResultNoFlags) - : _FindLSB(IR::OpSize::i32Bit, ResultNoFlags); + OrderedNode* IfZero = _Constant(16 >> (Control & 1)); + OrderedNode* IfNotZero = UseMSBIndex ? _FindMSB(IR::OpSize::i32Bit, ResultNoFlags) : _FindLSB(IR::OpSize::i32Bit, ResultNoFlags); - OrderedNode *Result = _Select(IR::COND_EQ, ResultNoFlags, ZeroConst, - IfZero, IfNotZero); + OrderedNode* Result = _Select(IR::COND_EQ, ResultNoFlags, ZeroConst, IfZero, IfNotZero); const uint8_t GPRSize = CTX->GetGPRSize(); if (GPRSize == 8) { @@ -5571,9 +5106,7 @@ void OpDispatchBuilder::PCMPXSTRXOpImpl(OpcodeArgs, bool IsExplicit, bool IsMask SetRFLAG(GetFlagBit(18)); SetRFLAG(GetFlagBit(19)); - uint32_t FlagsMaskToZero = - (1U << X86State::RFLAG_PF_RAW_LOC) | - (1U << X86State::RFLAG_AF_RAW_LOC); + uint32_t FlagsMaskToZero = (1U << X86State::RFLAG_PF_RAW_LOC) | (1U << X86State::RFLAG_AF_RAW_LOC); ZeroMultipleFlags(FlagsMaskToZero); } diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp index 39a0871416..9b25f25b96 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp @@ -23,62 +23,59 @@ class OrderedNode; #define OpcodeArgs [[maybe_unused]] FEXCore::X86Tables::DecodedOp Op -OrderedNode *OpDispatchBuilder::GetX87Top() { +OrderedNode* OpDispatchBuilder::GetX87Top() { // Yes, we are storing 3 bits in a single flag register. // Deal with it return _LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, flags) + FEXCore::X86State::X87FLAG_TOP_LOC); } -void OpDispatchBuilder::SetX87ValidTag(OrderedNode *Value, bool Valid) { +void OpDispatchBuilder::SetX87ValidTag(OrderedNode* Value, bool Valid) { // if we are popping then we must first mark this location as empty - OrderedNode *AbridgedFTW = _LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); - OrderedNode *RegMask = _Lshl(OpSize::i32Bit, _Constant(1), Value); - OrderedNode *NewAbridgedFTW = Valid ? _Or(OpSize::i32Bit, AbridgedFTW, RegMask) : _Andn(OpSize::i32Bit, AbridgedFTW, RegMask); + OrderedNode* AbridgedFTW = _LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); + OrderedNode* RegMask = _Lshl(OpSize::i32Bit, _Constant(1), Value); + OrderedNode* NewAbridgedFTW = Valid ? _Or(OpSize::i32Bit, AbridgedFTW, RegMask) : _Andn(OpSize::i32Bit, AbridgedFTW, RegMask); _StoreContext(1, GPRClass, NewAbridgedFTW, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); } -OrderedNode *OpDispatchBuilder::GetX87ValidTag(OrderedNode *Value) { - OrderedNode *AbridgedFTW = _LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); +OrderedNode* OpDispatchBuilder::GetX87ValidTag(OrderedNode* Value) { + OrderedNode* AbridgedFTW = _LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); return _And(OpSize::i32Bit, _Lshr(OpSize::i32Bit, AbridgedFTW, Value), _Constant(1)); } -OrderedNode *OpDispatchBuilder::GetX87Tag(OrderedNode *Value, OrderedNode *AbridgedFTW) { - OrderedNode *RegValid = _And(OpSize::i32Bit, _Lshr(OpSize::i32Bit, AbridgedFTW, Value), _Constant(1)); - OrderedNode *X87Empty = _Constant(static_cast(FPState::X87Tag::Empty)); - OrderedNode *X87Valid = _Constant(static_cast(FPState::X87Tag::Valid)); +OrderedNode* OpDispatchBuilder::GetX87Tag(OrderedNode* Value, OrderedNode* AbridgedFTW) { + OrderedNode* RegValid = _And(OpSize::i32Bit, _Lshr(OpSize::i32Bit, AbridgedFTW, Value), _Constant(1)); + OrderedNode* X87Empty = _Constant(static_cast(FPState::X87Tag::Empty)); + OrderedNode* X87Valid = _Constant(static_cast(FPState::X87Tag::Valid)); - return _Select(FEXCore::IR::COND_EQ, - RegValid, _Constant(0), - X87Empty, X87Valid); + return _Select(FEXCore::IR::COND_EQ, RegValid, _Constant(0), X87Empty, X87Valid); } -OrderedNode *OpDispatchBuilder::GetX87Tag(OrderedNode *Value) { - OrderedNode *AbridgedFTW = _LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); +OrderedNode* OpDispatchBuilder::GetX87Tag(OrderedNode* Value) { + OrderedNode* AbridgedFTW = _LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); return GetX87Tag(Value, AbridgedFTW); } -void OpDispatchBuilder::SetX87FTW(OrderedNode *FTW) { - OrderedNode *X87Empty = _Constant(static_cast(FPState::X87Tag::Empty)); - OrderedNode *NewAbridgedFTW; +void OpDispatchBuilder::SetX87FTW(OrderedNode* FTW) { + OrderedNode* X87Empty = _Constant(static_cast(FPState::X87Tag::Empty)); + OrderedNode* NewAbridgedFTW; for (int i = 0; i < 8; i++) { - OrderedNode *RegTag = _Bfe(OpSize::i32Bit, 2, i * 2, FTW); - OrderedNode *RegValid = _Select(FEXCore::IR::COND_NEQ, - RegTag, X87Empty, - _Constant(1), _Constant(0)); + OrderedNode* RegTag = _Bfe(OpSize::i32Bit, 2, i * 2, FTW); + OrderedNode* RegValid = _Select(FEXCore::IR::COND_NEQ, RegTag, X87Empty, _Constant(1), _Constant(0)); - if (i) + if (i) { NewAbridgedFTW = _Orlshl(OpSize::i32Bit, NewAbridgedFTW, RegValid, i); - else + } else { NewAbridgedFTW = RegValid; + } } _StoreContext(1, GPRClass, NewAbridgedFTW, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); } -OrderedNode *OpDispatchBuilder::GetX87FTW() { - OrderedNode *AbridgedFTW = _LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); - OrderedNode *FTW = _Constant(0); +OrderedNode* OpDispatchBuilder::GetX87FTW() { + OrderedNode* AbridgedFTW = _LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); + OrderedNode* FTW = _Constant(0); for (int i = 0; i < 8; i++) { const auto RegTag = GetX87Tag(_Constant(i), AbridgedFTW); @@ -88,13 +85,13 @@ OrderedNode *OpDispatchBuilder::GetX87FTW() { return FTW; } -void OpDispatchBuilder::SetX87Top(OrderedNode *Value) { +void OpDispatchBuilder::SetX87Top(OrderedNode* Value) { _StoreContext(1, GPRClass, Value, offsetof(FEXCore::Core::CPUState, flags) + FEXCore::X86State::X87FLAG_TOP_LOC); } -OrderedNode *OpDispatchBuilder::ReconstructFSW() { +OrderedNode* OpDispatchBuilder::ReconstructFSW() { // We must construct the FSW from our various bits - OrderedNode *FSW = _Constant(0); + OrderedNode* FSW = _Constant(0); auto Top = GetX87Top(); FSW = _Bfi(OpSize::i64Bit, 3, 11, FSW, Top); @@ -110,12 +107,12 @@ OrderedNode *OpDispatchBuilder::ReconstructFSW() { return FSW; } -OrderedNode *OpDispatchBuilder::ReconstructX87StateFromFSW(OrderedNode *FSW) { +OrderedNode* OpDispatchBuilder::ReconstructX87StateFromFSW(OrderedNode* FSW) { auto Top = _Bfe(OpSize::i32Bit, 3, 11, FSW); SetX87Top(Top); - auto C0 = _Bfe(OpSize::i32Bit, 1, 8, FSW); - auto C1 = _Bfe(OpSize::i32Bit, 1, 9, FSW); + auto C0 = _Bfe(OpSize::i32Bit, 1, 8, FSW); + auto C1 = _Bfe(OpSize::i32Bit, 1, 9, FSW); auto C2 = _Bfe(OpSize::i32Bit, 1, 10, FSW); auto C3 = _Bfe(OpSize::i32Bit, 1, 14, FSW); @@ -134,19 +131,18 @@ void OpDispatchBuilder::FLD(OpcodeArgs) { size_t read_width = (width == 80) ? 16 : width / 8; - OrderedNode *data{}; + OrderedNode* data {}; if (!Op->Src[0].IsNone()) { // Read from memory data = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], read_width, Op->Flags); - } - else { + } else { // Implicit arg auto offset = _Constant(Op->OP & 7); data = _And(OpSize::i32Bit, _Add(OpSize::i32Bit, orig_top, offset), mask); data = _LoadContextIndexed(data, 16, MMBaseOffset(), 16, FPRClass); } - OrderedNode *converted = data; + OrderedNode* converted = data; // Convert to 80bit float if constexpr (width == 32 || width == 64) { @@ -161,12 +157,9 @@ void OpDispatchBuilder::FLD(OpcodeArgs) { //_StoreContext(converted, 16, offsetof(FEXCore::Core::CPUState, mm[7][0])); } -template -void OpDispatchBuilder::FLD<32>(OpcodeArgs); -template -void OpDispatchBuilder::FLD<64>(OpcodeArgs); -template -void OpDispatchBuilder::FLD<80>(OpcodeArgs); +template void OpDispatchBuilder::FLD<32>(OpcodeArgs); +template void OpDispatchBuilder::FLD<64>(OpcodeArgs); +template void OpDispatchBuilder::FLD<80>(OpcodeArgs); void OpDispatchBuilder::FBLD(OpcodeArgs) { // Update TOP @@ -177,8 +170,8 @@ void OpDispatchBuilder::FBLD(OpcodeArgs) { SetX87Top(top); // Read from memory - OrderedNode *data = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], 16, Op->Flags); - OrderedNode *converted = _F80BCDLoad(data); + OrderedNode* data = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], 16, Op->Flags); + OrderedNode* converted = _F80BCDLoad(data); _StoreContextIndexed(converted, top, 16, MMBaseOffset(), 16, FPRClass); } @@ -186,7 +179,7 @@ void OpDispatchBuilder::FBSTP(OpcodeArgs) { auto orig_top = GetX87Top(); auto data = _LoadContextIndexed(orig_top, 16, MMBaseOffset(), 16, FPRClass); - OrderedNode *converted = _F80BCDStore(data); + OrderedNode* converted = _F80BCDStore(data); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, converted, 10, 1); @@ -206,26 +199,19 @@ void OpDispatchBuilder::FLD_Const(OpcodeArgs) { auto low = _Constant(Lower); auto high = _Constant(Upper); - OrderedNode *data = _VCastFromGPR(16, 8, low); + OrderedNode* data = _VCastFromGPR(16, 8, low); data = _VInsGPR(16, 8, 1, data, high); // Write to ST[TOP] _StoreContextIndexed(data, top, 16, MMBaseOffset(), 16, FPRClass); } -template -void OpDispatchBuilder::FLD_Const<0x8000'0000'0000'0000ULL, 0b0'011'1111'1111'1111ULL>(OpcodeArgs); // 1.0 -template -void OpDispatchBuilder::FLD_Const<0xD49A'784B'CD1B'8AFEULL, 0x4000ULL>(OpcodeArgs); // log2l(10) -template -void OpDispatchBuilder::FLD_Const<0xB8AA'3B29'5C17'F0BCULL, 0x3FFFULL>(OpcodeArgs); // log2l(e) -template -void OpDispatchBuilder::FLD_Const<0xC90F'DAA2'2168'C235ULL, 0x4000ULL>(OpcodeArgs); // pi -template -void OpDispatchBuilder::FLD_Const<0x9A20'9A84'FBCF'F799ULL, 0x3FFDULL>(OpcodeArgs); // log10l(2) -template -void OpDispatchBuilder::FLD_Const<0xB172'17F7'D1CF'79ACULL, 0x3FFEULL>(OpcodeArgs); // log(2) -template -void OpDispatchBuilder::FLD_Const<0, 0>(OpcodeArgs); // 0.0 +template void OpDispatchBuilder::FLD_Const<0x8000'0000'0000'0000ULL, 0b0'011'1111'1111'1111ULL>(OpcodeArgs); // 1.0 +template void OpDispatchBuilder::FLD_Const<0xD49A'784B'CD1B'8AFEULL, 0x4000ULL>(OpcodeArgs); // log2l(10) +template void OpDispatchBuilder::FLD_Const<0xB8AA'3B29'5C17'F0BCULL, 0x3FFFULL>(OpcodeArgs); // log2l(e) +template void OpDispatchBuilder::FLD_Const<0xC90F'DAA2'2168'C235ULL, 0x4000ULL>(OpcodeArgs); // pi +template void OpDispatchBuilder::FLD_Const<0x9A20'9A84'FBCF'F799ULL, 0x3FFDULL>(OpcodeArgs); // log10l(2) +template void OpDispatchBuilder::FLD_Const<0xB172'17F7'D1CF'79ACULL, 0x3FFEULL>(OpcodeArgs); // log(2) +template void OpDispatchBuilder::FLD_Const<0, 0>(OpcodeArgs); // 0.0 void OpDispatchBuilder::FILD(OpcodeArgs) { // Update TOP @@ -251,8 +237,8 @@ void OpDispatchBuilder::FILD(OpcodeArgs) { // Extract sign and make interger absolute _SubNZCV(OpSize::i64Bit, data, zero); - auto sign = _NZCVSelect(OpSize::i64Bit, CondClassType{COND_SLT}, _Constant(0x8000), zero); - auto absolute = _Neg(OpSize::i64Bit, data, CondClassType{COND_MI}); + auto sign = _NZCVSelect(OpSize::i64Bit, CondClassType {COND_SLT}, _Constant(0x8000), zero); + auto absolute = _Neg(OpSize::i64Bit, data, CondClassType {COND_MI}); // left justify the absolute interger auto shift = _Sub(OpSize::i64Bit, _Constant(63), _FindMSB(IR::OpSize::i64Bit, absolute)); @@ -263,7 +249,7 @@ void OpDispatchBuilder::FILD(OpcodeArgs) { auto upper = _Or(OpSize::i64Bit, sign, zeroed_exponent); - OrderedNode *converted = _VCastFromGPR(16, 8, shifted); + OrderedNode* converted = _VCastFromGPR(16, 8, shifted); converted = _VInsElement(16, 8, 1, 0, converted, _VCastFromGPR(16, 8, upper)); // Write to ST[TOP] @@ -276,8 +262,7 @@ void OpDispatchBuilder::FST(OpcodeArgs) { auto data = _LoadContextIndexed(orig_top, 16, MMBaseOffset(), 16, FPRClass); if constexpr (width == 80) { StoreResult_WithOpSize(FPRClass, Op, Op->Dest, data, 10, 1); - } - else if constexpr (width == 32 || width == 64) { + } else if constexpr (width == 32 || width == 64) { auto result = _F80CVT(width / 8, data); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, result, width / 8, 1); } @@ -291,19 +276,16 @@ void OpDispatchBuilder::FST(OpcodeArgs) { } } -template -void OpDispatchBuilder::FST<32>(OpcodeArgs); -template -void OpDispatchBuilder::FST<64>(OpcodeArgs); -template -void OpDispatchBuilder::FST<80>(OpcodeArgs); +template void OpDispatchBuilder::FST<32>(OpcodeArgs); +template void OpDispatchBuilder::FST<64>(OpcodeArgs); +template void OpDispatchBuilder::FST<80>(OpcodeArgs); template void OpDispatchBuilder::FIST(OpcodeArgs) { auto Size = GetSrcSize(Op); auto orig_top = GetX87Top(); - OrderedNode *data = _LoadContextIndexed(orig_top, 16, MMBaseOffset(), 16, FPRClass); + OrderedNode* data = _LoadContextIndexed(orig_top, 16, MMBaseOffset(), 16, FPRClass); data = _F80CVTInt(Size, data, Truncate); StoreResult_WithOpSize(GPRClass, Op, Op->Dest, data, Size, 1); @@ -317,18 +299,16 @@ void OpDispatchBuilder::FIST(OpcodeArgs) { } } -template -void OpDispatchBuilder::FIST(OpcodeArgs); -template -void OpDispatchBuilder::FIST(OpcodeArgs); +template void OpDispatchBuilder::FIST(OpcodeArgs); +template void OpDispatchBuilder::FIST(OpcodeArgs); -template +template void OpDispatchBuilder::FADD(OpcodeArgs) { auto top = GetX87Top(); - OrderedNode *StackLocation = top; + OrderedNode* StackLocation = top; - OrderedNode *arg{}; - OrderedNode *b{}; + OrderedNode* arg {}; + OrderedNode* b {}; auto mask = _Constant(7); @@ -338,8 +318,7 @@ void OpDispatchBuilder::FADD(OpcodeArgs) { if constexpr (Integer) { arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); b = _F80CVTToInt(arg, width / 8); - } - else { + } else { arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); b = _F80CVTTo(arg, width / 8); } @@ -369,26 +348,20 @@ void OpDispatchBuilder::FADD(OpcodeArgs) { _StoreContextIndexed(result, StackLocation, 16, MMBaseOffset(), 16, FPRClass); } -template -void OpDispatchBuilder::FADD<32, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FADD<64, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FADD<80, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FADD<80, false, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); +template void OpDispatchBuilder::FADD<32, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FADD<64, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FADD<80, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FADD<80, false, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); -template -void OpDispatchBuilder::FADD<16, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FADD<32, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FADD<16, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FADD<32, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); template void OpDispatchBuilder::FMUL(OpcodeArgs) { auto top = GetX87Top(); - OrderedNode *StackLocation = top; - OrderedNode *arg{}; - OrderedNode *b{}; + OrderedNode* StackLocation = top; + OrderedNode* arg {}; + OrderedNode* b {}; auto mask = _Constant(7); @@ -399,8 +372,7 @@ void OpDispatchBuilder::FMUL(OpcodeArgs) { if constexpr (Integer) { arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); b = _F80CVTToInt(arg, width / 8); - } - else { + } else { arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); b = _F80CVTTo(arg, width / 8); } @@ -432,26 +404,20 @@ void OpDispatchBuilder::FMUL(OpcodeArgs) { _StoreContextIndexed(result, StackLocation, 16, MMBaseOffset(), 16, FPRClass); } -template -void OpDispatchBuilder::FMUL<32, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FMUL<64, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FMUL<80, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FMUL<80, false, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); +template void OpDispatchBuilder::FMUL<32, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FMUL<64, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FMUL<80, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FMUL<80, false, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); -template -void OpDispatchBuilder::FMUL<16, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FMUL<32, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FMUL<16, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FMUL<32, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); template void OpDispatchBuilder::FDIV(OpcodeArgs) { auto top = GetX87Top(); - OrderedNode *StackLocation = top; - OrderedNode *arg{}; - OrderedNode *b{}; + OrderedNode* StackLocation = top; + OrderedNode* arg {}; + OrderedNode* b {}; auto mask = _Constant(7); @@ -462,8 +428,7 @@ void OpDispatchBuilder::FDIV(OpcodeArgs) { if constexpr (Integer) { arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); b = _F80CVTToInt(arg, width / 8); - } - else { + } else { arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); b = _F80CVTTo(arg, width / 8); } @@ -481,11 +446,10 @@ void OpDispatchBuilder::FDIV(OpcodeArgs) { auto a = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass); - OrderedNode *result{}; + OrderedNode* result {}; if constexpr (reverse) { result = _F80Div(b, a); - } - else { + } else { result = _F80Div(a, b); } @@ -501,42 +465,30 @@ void OpDispatchBuilder::FDIV(OpcodeArgs) { _StoreContextIndexed(result, StackLocation, 16, MMBaseOffset(), 16, FPRClass); } -template -void OpDispatchBuilder::FDIV<32, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIV<32, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIV<32, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIV<32, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIV<64, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIV<64, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIV<64, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIV<64, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIV<80, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIV<80, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIV<80, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIV<80, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIV<80, false, false, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); -template -void OpDispatchBuilder::FDIV<80, false, true, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); +template void OpDispatchBuilder::FDIV<80, false, false, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); +template void OpDispatchBuilder::FDIV<80, false, true, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); -template -void OpDispatchBuilder::FDIV<16, true, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIV<16, true, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIV<16, true, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIV<16, true, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIV<32, true, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIV<32, true, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIV<32, true, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIV<32, true, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); template void OpDispatchBuilder::FSUB(OpcodeArgs) { auto top = GetX87Top(); - OrderedNode *StackLocation = top; - OrderedNode *arg{}; - OrderedNode *b{}; + OrderedNode* StackLocation = top; + OrderedNode* arg {}; + OrderedNode* b {}; auto mask = _Constant(7); @@ -547,8 +499,7 @@ void OpDispatchBuilder::FSUB(OpcodeArgs) { if constexpr (Integer) { arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); b = _F80CVTToInt(arg, width / 8); - } - else { + } else { arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); b = _F80CVTTo(arg, width / 8); } @@ -565,11 +516,10 @@ void OpDispatchBuilder::FSUB(OpcodeArgs) { auto a = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass); - OrderedNode *result{}; + OrderedNode* result {}; if constexpr (reverse) { result = _F80Sub(b, a); - } - else { + } else { result = _F80Sub(a, b); } @@ -586,35 +536,23 @@ void OpDispatchBuilder::FSUB(OpcodeArgs) { _StoreContextIndexed(result, StackLocation, 16, MMBaseOffset(), 16, FPRClass); } -template -void OpDispatchBuilder::FSUB<32, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUB<32, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUB<32, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUB<32, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUB<64, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUB<64, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUB<64, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUB<64, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUB<80, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUB<80, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUB<80, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUB<80, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUB<80, false, false, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); -template -void OpDispatchBuilder::FSUB<80, false, true, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); +template void OpDispatchBuilder::FSUB<80, false, false, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); +template void OpDispatchBuilder::FSUB<80, false, true, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); -template -void OpDispatchBuilder::FSUB<16, true, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUB<16, true, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUB<16, true, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUB<16, true, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUB<32, true, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUB<32, true, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUB<32, true, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUB<32, true, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); void OpDispatchBuilder::FCHS(OpcodeArgs) { auto top = GetX87Top(); @@ -622,7 +560,7 @@ void OpDispatchBuilder::FCHS(OpcodeArgs) { auto low = _Constant(0); auto high = _Constant(0b1'000'0000'0000'0000ULL); - OrderedNode *data = _VCastFromGPR(16, 8, low); + OrderedNode* data = _VCastFromGPR(16, 8, low); data = _VInsGPR(16, 8, 1, data, high); auto result = _VXor(16, 1, a, data); @@ -637,7 +575,7 @@ void OpDispatchBuilder::FABS(OpcodeArgs) { auto low = _Constant(~0ULL); auto high = _Constant(0b0'111'1111'1111'1111ULL); - OrderedNode *data = _VCastFromGPR(16, 8, low); + OrderedNode* data = _VCastFromGPR(16, 8, low); data = _VInsGPR(16, 8, 1, data, high); auto result = _VAnd(16, 1, a, data); @@ -651,16 +589,13 @@ void OpDispatchBuilder::FTST(OpcodeArgs) { auto a = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass); auto low = _Constant(0); - OrderedNode *data = _VCastFromGPR(16, 8, low); + OrderedNode* data = _VCastFromGPR(16, 8, low); - OrderedNode *Res = _F80Cmp(a, data, - (1 << FCMP_FLAG_EQ) | - (1 << FCMP_FLAG_LT) | - (1 << FCMP_FLAG_UNORDERED)); + OrderedNode* Res = _F80Cmp(a, data, (1 << FCMP_FLAG_EQ) | (1 << FCMP_FLAG_LT) | (1 << FCMP_FLAG_UNORDERED)); - OrderedNode *HostFlag_CF = _GetHostFlag(Res, FCMP_FLAG_LT); - OrderedNode *HostFlag_ZF = _GetHostFlag(Res, FCMP_FLAG_EQ); - OrderedNode *HostFlag_Unordered = _GetHostFlag(Res, FCMP_FLAG_UNORDERED); + OrderedNode* HostFlag_CF = _GetHostFlag(Res, FCMP_FLAG_LT); + OrderedNode* HostFlag_ZF = _GetHostFlag(Res, FCMP_FLAG_EQ); + OrderedNode* HostFlag_Unordered = _GetHostFlag(Res, FCMP_FLAG_UNORDERED); HostFlag_CF = _Or(OpSize::i32Bit, HostFlag_CF, HostFlag_Unordered); HostFlag_ZF = _Or(OpSize::i32Bit, HostFlag_ZF, HostFlag_Unordered); @@ -719,8 +654,8 @@ void OpDispatchBuilder::FCOMI(OpcodeArgs) { auto top = GetX87Top(); auto mask = _Constant(7); - OrderedNode *arg{}; - OrderedNode *b{}; + OrderedNode* arg {}; + OrderedNode* b {}; if (!Op->Src[0].IsNone()) { // Memory arg @@ -728,8 +663,7 @@ void OpDispatchBuilder::FCOMI(OpcodeArgs) { if constexpr (Integer) { arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); b = _F80CVTToInt(arg, width / 8); - } - else { + } else { arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); b = _F80CVTTo(arg, width / 8); } @@ -743,14 +677,11 @@ void OpDispatchBuilder::FCOMI(OpcodeArgs) { auto a = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass); - OrderedNode *Res = _F80Cmp(a, b, - (1 << FCMP_FLAG_EQ) | - (1 << FCMP_FLAG_LT) | - (1 << FCMP_FLAG_UNORDERED)); + OrderedNode* Res = _F80Cmp(a, b, (1 << FCMP_FLAG_EQ) | (1 << FCMP_FLAG_LT) | (1 << FCMP_FLAG_UNORDERED)); - OrderedNode *HostFlag_CF = _GetHostFlag(Res, FCMP_FLAG_LT); - OrderedNode *HostFlag_ZF = _GetHostFlag(Res, FCMP_FLAG_EQ); - OrderedNode *HostFlag_Unordered = _GetHostFlag(Res, FCMP_FLAG_UNORDERED); + OrderedNode* HostFlag_CF = _GetHostFlag(Res, FCMP_FLAG_LT); + OrderedNode* HostFlag_ZF = _GetHostFlag(Res, FCMP_FLAG_EQ); + OrderedNode* HostFlag_Unordered = _GetHostFlag(Res, FCMP_FLAG_UNORDERED); HostFlag_CF = _Or(OpSize::i32Bit, HostFlag_CF, HostFlag_Unordered); HostFlag_ZF = _Or(OpSize::i32Bit, HostFlag_ZF, HostFlag_Unordered); @@ -759,8 +690,7 @@ void OpDispatchBuilder::FCOMI(OpcodeArgs) { SetRFLAG(_Constant(0)); SetRFLAG(HostFlag_Unordered); SetRFLAG(HostFlag_ZF); - } - else { + } else { // Invalidate deferred flags early // OF, SF, AF, PF all undefined InvalidateDeferredFlags(); @@ -782,8 +712,7 @@ void OpDispatchBuilder::FCOMI(OpcodeArgs) { // Set the new top now top = _And(OpSize::i32Bit, _Add(OpSize::i32Bit, top, _Constant(1)), mask); SetX87Top(top); - } - else if ((Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) != 0) { + } else if ((Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) != 0) { // if we are popping then we must first mark this location as empty SetX87ValidTag(top, false); // Set the new top now @@ -792,24 +721,17 @@ void OpDispatchBuilder::FCOMI(OpcodeArgs) { } } -template -void OpDispatchBuilder::FCOMI<32, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); +template void OpDispatchBuilder::FCOMI<32, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); -template -void OpDispatchBuilder::FCOMI<64, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); +template void OpDispatchBuilder::FCOMI<64, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); -template -void OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); -template -void OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>(OpcodeArgs); -template -void OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>(OpcodeArgs); +template void OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); +template void OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>(OpcodeArgs); +template void OpDispatchBuilder::FCOMI<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>(OpcodeArgs); -template -void OpDispatchBuilder::FCOMI<16, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); +template void OpDispatchBuilder::FCOMI<16, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); -template -void OpDispatchBuilder::FCOMI<32, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); +template void OpDispatchBuilder::FCOMI<32, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); void OpDispatchBuilder::FXCH(OpcodeArgs) { @@ -860,8 +782,7 @@ void OpDispatchBuilder::X87UnaryOp(OpcodeArgs) { DeriveOp(result, IROp, _F80Round(a)); - if constexpr (IROp == IR::OP_F80SIN || - IROp == IR::OP_F80COS) { + if constexpr (IROp == IR::OP_F80SIN || IROp == IR::OP_F80COS) { // TODO: ACCURACY: should check source is in range –2^63 to +2^63 SetRFLAG(_Constant(0)); } @@ -870,30 +791,25 @@ void OpDispatchBuilder::X87UnaryOp(OpcodeArgs) { _StoreContextIndexed(result, top, 16, MMBaseOffset(), 16, FPRClass); } -template -void OpDispatchBuilder::X87UnaryOp(OpcodeArgs); -template -void OpDispatchBuilder::X87UnaryOp(OpcodeArgs); -template -void OpDispatchBuilder::X87UnaryOp(OpcodeArgs); -template -void OpDispatchBuilder::X87UnaryOp(OpcodeArgs); +template void OpDispatchBuilder::X87UnaryOp(OpcodeArgs); +template void OpDispatchBuilder::X87UnaryOp(OpcodeArgs); +template void OpDispatchBuilder::X87UnaryOp(OpcodeArgs); +template void OpDispatchBuilder::X87UnaryOp(OpcodeArgs); template void OpDispatchBuilder::X87BinaryOp(OpcodeArgs) { auto top = GetX87Top(); auto mask = _Constant(7); - OrderedNode *st1 = _And(OpSize::i32Bit, _Add(OpSize::i32Bit, top, _Constant(1)), mask); + OrderedNode* st1 = _And(OpSize::i32Bit, _Add(OpSize::i32Bit, top, _Constant(1)), mask); auto a = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass); st1 = _LoadContextIndexed(st1, 16, MMBaseOffset(), 16, FPRClass); DeriveOp(result, IROp, _F80Add(a, st1)); - if constexpr (IROp == IR::OP_F80FPREM || - IROp == IR::OP_F80FPREM1) { - //TODO: Set C0 to Q2, C3 to Q1, C1 to Q0 + if constexpr (IROp == IR::OP_F80FPREM || IROp == IR::OP_F80FPREM1) { + // TODO: Set C0 to Q2, C3 to Q1, C1 to Q0 SetRFLAG(_Constant(0)); } @@ -901,12 +817,9 @@ void OpDispatchBuilder::X87BinaryOp(OpcodeArgs) { _StoreContextIndexed(result, top, 16, MMBaseOffset(), 16, FPRClass); } -template -void OpDispatchBuilder::X87BinaryOp(OpcodeArgs); -template -void OpDispatchBuilder::X87BinaryOp(OpcodeArgs); -template -void OpDispatchBuilder::X87BinaryOp(OpcodeArgs); +template void OpDispatchBuilder::X87BinaryOp(OpcodeArgs); +template void OpDispatchBuilder::X87BinaryOp(OpcodeArgs); +template void OpDispatchBuilder::X87BinaryOp(OpcodeArgs); template void OpDispatchBuilder::X87ModifySTP(OpcodeArgs) { @@ -914,17 +827,14 @@ void OpDispatchBuilder::X87ModifySTP(OpcodeArgs) { if (Inc) { auto top = _And(OpSize::i32Bit, _Add(OpSize::i32Bit, orig_top, _Constant(1)), _Constant(7)); SetX87Top(top); - } - else { + } else { auto top = _And(OpSize::i32Bit, _Sub(OpSize::i32Bit, orig_top, _Constant(1)), _Constant(7)); SetX87Top(top); } } -template -void OpDispatchBuilder::X87ModifySTP(OpcodeArgs); -template -void OpDispatchBuilder::X87ModifySTP(OpcodeArgs); +template void OpDispatchBuilder::X87ModifySTP(OpcodeArgs); +template void OpDispatchBuilder::X87ModifySTP(OpcodeArgs); void OpDispatchBuilder::X87SinCos(OpcodeArgs) { auto orig_top = GetX87Top(); @@ -954,13 +864,13 @@ void OpDispatchBuilder::X87FYL2X(OpcodeArgs) { auto top = _And(OpSize::i32Bit, _Add(OpSize::i32Bit, orig_top, _Constant(1)), _Constant(7)); SetX87Top(top); - OrderedNode *st0 = _LoadContextIndexed(orig_top, 16, MMBaseOffset(), 16, FPRClass); - OrderedNode *st1 = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass); + OrderedNode* st0 = _LoadContextIndexed(orig_top, 16, MMBaseOffset(), 16, FPRClass); + OrderedNode* st1 = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass); if (Plus1) { auto low = _Constant(0x8000'0000'0000'0000ULL); auto high = _Constant(0b0'011'1111'1111'1111); - OrderedNode *data = _VCastFromGPR(16, 8, low); + OrderedNode* data = _VCastFromGPR(16, 8, low); data = _VInsGPR(16, 8, 1, data, high); st0 = _F80Add(st0, data); } @@ -983,7 +893,7 @@ void OpDispatchBuilder::X87TAN(OpcodeArgs) { auto low = _Constant(0x8000'0000'0000'0000ULL); auto high = _Constant(0b0'011'1111'1111'1111ULL); - OrderedNode *data = _VCastFromGPR(16, 8, low); + OrderedNode* data = _VCastFromGPR(16, 8, low); data = _VInsGPR(16, 8, 1, data, high); // TODO: ACCURACY: should check source is in range –2^63 to +2^63 @@ -1002,7 +912,7 @@ void OpDispatchBuilder::X87ATAN(OpcodeArgs) { SetX87Top(top); auto a = _LoadContextIndexed(orig_top, 16, MMBaseOffset(), 16, FPRClass); - OrderedNode *st1 = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass); + OrderedNode* st1 = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass); auto result = _F80ATAN(st1, a); @@ -1012,19 +922,19 @@ void OpDispatchBuilder::X87ATAN(OpcodeArgs) { void OpDispatchBuilder::X87LDENV(OpcodeArgs) { auto Size = GetSrcSize(Op); - OrderedNode *Mem = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.LoadData = false}); + OrderedNode* Mem = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.LoadData = false}); Mem = AppendSegmentOffset(Mem, Op->Flags); auto NewFCW = _LoadMem(GPRClass, 2, Mem, 2); _StoreContext(2, GPRClass, NewFCW, offsetof(FEXCore::Core::CPUState, FCW)); - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 1)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 1)); auto NewFSW = _LoadMem(GPRClass, Size, MemLocation, Size); ReconstructX87StateFromFSW(NewFSW); { // FTW - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 2)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 2)); SetX87FTW(_LoadMem(GPRClass, Size, MemLocation, Size)); } } @@ -1050,7 +960,7 @@ void OpDispatchBuilder::X87FNSTENV(OpcodeArgs) { // 4 bytes : data pointer selector auto Size = GetDstSize(Op); - OrderedNode *Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); Mem = AppendSegmentOffset(Mem, Op->Flags); { @@ -1059,7 +969,7 @@ void OpDispatchBuilder::X87FNSTENV(OpcodeArgs) { } { - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 1)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 1)); _StoreMem(GPRClass, Size, MemLocation, ReconstructFSW(), Size); } @@ -1067,37 +977,37 @@ void OpDispatchBuilder::X87FNSTENV(OpcodeArgs) { { // FTW - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 2)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 2)); _StoreMem(GPRClass, Size, MemLocation, GetX87FTW(), Size); } { // Instruction Offset - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 3)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 3)); _StoreMem(GPRClass, Size, MemLocation, ZeroConst, Size); } { // Instruction CS selector (+ Opcode) - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 4)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 4)); _StoreMem(GPRClass, Size, MemLocation, ZeroConst, Size); } { // Data pointer offset - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 5)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 5)); _StoreMem(GPRClass, Size, MemLocation, ZeroConst, Size); } { // Data pointer selector - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 6)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 6)); _StoreMem(GPRClass, Size, MemLocation, ZeroConst, Size); } } void OpDispatchBuilder::X87FLDCW(OpcodeArgs) { - OrderedNode *NewFCW = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* NewFCW = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); _StoreContext(2, GPRClass, NewFCW, offsetof(FEXCore::Core::CPUState, FCW)); } @@ -1108,7 +1018,7 @@ void OpDispatchBuilder::X87FSTCW(OpcodeArgs) { } void OpDispatchBuilder::X87LDSW(OpcodeArgs) { - OrderedNode *NewFSW = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + OrderedNode* NewFSW = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); ReconstructX87StateFromFSW(NewFSW); } @@ -1137,17 +1047,17 @@ void OpDispatchBuilder::X87FNSAVE(OpcodeArgs) { // 4 bytes : data pointer selector auto Size = GetDstSize(Op); - OrderedNode *Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); Mem = AppendSegmentOffset(Mem, Op->Flags); - OrderedNode *Top = GetX87Top(); + OrderedNode* Top = GetX87Top(); { auto FCW = _LoadContext(2, GPRClass, offsetof(FEXCore::Core::CPUState, FCW)); _StoreMem(GPRClass, Size, Mem, FCW, Size); } { - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 1)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 1)); _StoreMem(GPRClass, Size, MemLocation, ReconstructFSW(), Size); } @@ -1155,35 +1065,35 @@ void OpDispatchBuilder::X87FNSAVE(OpcodeArgs) { { // FTW - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 2)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 2)); _StoreMem(GPRClass, Size, MemLocation, GetX87FTW(), Size); } { // Instruction Offset - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 3)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 3)); _StoreMem(GPRClass, Size, MemLocation, ZeroConst, Size); } { // Instruction CS selector (+ Opcode) - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 4)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 4)); _StoreMem(GPRClass, Size, MemLocation, ZeroConst, Size); } { // Data pointer offset - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 5)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 5)); _StoreMem(GPRClass, Size, MemLocation, ZeroConst, Size); } { // Data pointer selector - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 6)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 6)); _StoreMem(GPRClass, Size, MemLocation, ZeroConst, Size); } - OrderedNode *ST0Location = _Add(OpSize::i64Bit, Mem, _Constant(Size * 7)); + OrderedNode* ST0Location = _Add(OpSize::i64Bit, Mem, _Constant(Size * 7)); auto OneConst = _Constant(1); auto SevenConst = _Constant(7); @@ -1211,22 +1121,22 @@ void OpDispatchBuilder::X87FNSAVE(OpcodeArgs) { void OpDispatchBuilder::X87FRSTOR(OpcodeArgs) { auto Size = GetSrcSize(Op); - OrderedNode *Mem = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.LoadData = false}); + OrderedNode* Mem = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.LoadData = false}); Mem = AppendSegmentOffset(Mem, Op->Flags); auto NewFCW = _LoadMem(GPRClass, 2, Mem, 2); _StoreContext(2, GPRClass, NewFCW, offsetof(FEXCore::Core::CPUState, FCW)); - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 1)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 1)); auto NewFSW = _LoadMem(GPRClass, Size, MemLocation, Size); auto Top = ReconstructX87StateFromFSW(NewFSW); { // FTW - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 2)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 2)); SetX87FTW(_LoadMem(GPRClass, Size, MemLocation, Size)); } - OrderedNode *ST0Location = _Add(OpSize::i64Bit, Mem, _Constant(Size * 7)); + OrderedNode* ST0Location = _Add(OpSize::i64Bit, Mem, _Constant(Size * 7)); auto OneConst = _Constant(1); auto SevenConst = _Constant(7); @@ -1234,11 +1144,11 @@ void OpDispatchBuilder::X87FRSTOR(OpcodeArgs) { auto low = _Constant(~0ULL); auto high = _Constant(0xFFFF); - OrderedNode *Mask = _VCastFromGPR(16, 8, low); + OrderedNode* Mask = _VCastFromGPR(16, 8, low); Mask = _VInsGPR(16, 8, 1, Mask, high); for (int i = 0; i < 7; ++i) { - OrderedNode *Reg = _LoadMem(FPRClass, 16, ST0Location, 1); + OrderedNode* Reg = _LoadMem(FPRClass, 16, ST0Location, 1); // Mask off the top bits Reg = _VAnd(16, 16, Reg, Mask); @@ -1253,9 +1163,9 @@ void OpDispatchBuilder::X87FRSTOR(OpcodeArgs) { // Lower 64bits [63:0] // upper 16 bits [79:64] - OrderedNode *Reg = _LoadMem(FPRClass, 8, ST0Location, 1); + OrderedNode* Reg = _LoadMem(FPRClass, 8, ST0Location, 1); ST0Location = _Add(OpSize::i64Bit, ST0Location, _Constant(8)); - OrderedNode *RegHigh = _LoadMem(FPRClass, 2, ST0Location, 1); + OrderedNode* RegHigh = _LoadMem(FPRClass, 2, ST0Location, 1); Reg = _VInsElement(16, 2, 4, 0, Reg, RegHigh); _StoreContextIndexed(Reg, Top, 16, MMBaseOffset(), 16, FPRClass); } @@ -1263,7 +1173,7 @@ void OpDispatchBuilder::X87FRSTOR(OpcodeArgs) { void OpDispatchBuilder::X87FXAM(OpcodeArgs) { auto top = GetX87Top(); auto a = _LoadContextIndexed(top, 16, MMBaseOffset(), 16, FPRClass); - OrderedNode *Result = _VExtractToGPR(16, 8, a, 1); + OrderedNode* Result = _VExtractToGPR(16, 8, a, 1); // Extract the sign bit Result = _Bfe(OpSize::i64Bit, 1, 15, Result); @@ -1276,9 +1186,7 @@ void OpDispatchBuilder::X87FXAM(OpcodeArgs) { auto OneConst = _Constant(1); // In the case of top being invalid then C3:C2:C0 is 0b101 - auto C3 = _Select(FEXCore::IR::COND_NEQ, - TopValid, OneConst, - OneConst, ZeroConst); + auto C3 = _Select(FEXCore::IR::COND_NEQ, TopValid, OneConst, OneConst, ZeroConst); auto C2 = TopValid; auto C0 = C3; // Mirror C3 until something other than zero is supported @@ -1296,38 +1204,36 @@ void OpDispatchBuilder::X87FCMOV(OpcodeArgs) { switch (Opcode) { case 0x3'C0: CC = 0x3; // JNC - break; + break; case 0x2'C0: CC = 0x2; // JC - break; + break; case 0x2'C8: CC = 0x4; // JE - break; + break; case 0x3'C8: CC = 0x5; // JNE - break; + break; case 0x2'D0: CC = 0x6; // JNA - break; + break; case 0x3'D0: CC = 0x7; // JA - break; + break; case 0x2'D8: CC = 0xA; // JP - break; + break; case 0x3'D8: CC = 0xB; // JNP - break; - default: - LOGMAN_MSG_A_FMT("Unhandled FCMOV op: 0x{:x}", Opcode); - break; + break; + default: LOGMAN_MSG_A_FMT("Unhandled FCMOV op: 0x{:x}", Opcode); break; } auto ZeroConst = _Constant(0); auto AllOneConst = _Constant(0xffff'ffff'ffff'ffffull); - OrderedNode *SrcCond = SelectCC(CC, OpSize::i64Bit, AllOneConst, ZeroConst); - OrderedNode *VecCond = _VDupFromGPR(16, 8, SrcCond); + OrderedNode* SrcCond = SelectCC(CC, OpSize::i64Bit, AllOneConst, ZeroConst); + OrderedNode* VecCond = _VDupFromGPR(16, 8, SrcCond); auto top = GetX87Top(); OrderedNode* arg; @@ -1353,7 +1259,7 @@ void OpDispatchBuilder::X87EMMS(OpcodeArgs) { void OpDispatchBuilder::X87FFREE(OpcodeArgs) { // Only sets the selected stack register's tag bits to EMPTY - OrderedNode *top = GetX87Top(); + OrderedNode* top = GetX87Top(); // Implicit arg auto offset = _Constant(Op->OP & 7); @@ -1363,4 +1269,4 @@ void OpDispatchBuilder::X87FFREE(OpcodeArgs) { SetX87ValidTag(top, false); } -} +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp index 5f758027ea..fd9c6fa285 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp @@ -22,24 +22,24 @@ class OrderedNode; #define OpcodeArgs [[maybe_unused]] FEXCore::X86Tables::DecodedOp Op -//Functions in X87.cpp (no change required) -//GetX87Top -//SetX87ValidTag -//GetX87ValidTag -//GetX87Tag (will need changing once special tag handling is implemented) -//SetX87FTW -//GetX87FTW (will need changing once special tag handling is implemented) -//SetX87Top -//X87ModifySTP -//EMMS -//FFREE -//FNSTENV -//FSTCW -//LDSW -//FNSTSW -//FXCH -//FCMOV -//FST(register to register) +// Functions in X87.cpp (no change required) +// GetX87Top +// SetX87ValidTag +// GetX87ValidTag +// GetX87Tag (will need changing once special tag handling is implemented) +// SetX87FTW +// GetX87FTW (will need changing once special tag handling is implemented) +// SetX87Top +// X87ModifySTP +// EMMS +// FFREE +// FNSTENV +// FSTCW +// LDSW +// FNSTSW +// FXCH +// FCMOV +// FST(register to register) // State loading duplicated from X87.cpp, setting host rounding mode // See issue @@ -65,33 +65,33 @@ void OpDispatchBuilder::FNINITF64(OpcodeArgs) { void OpDispatchBuilder::X87LDENVF64(OpcodeArgs) { auto Size = GetSrcSize(Op); - OrderedNode *Mem = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.LoadData = false}); + OrderedNode* Mem = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.LoadData = false}); Mem = AppendSegmentOffset(Mem, Op->Flags); auto NewFCW = _LoadMem(GPRClass, 2, Mem, 2); - //ignore the rounding precision, we're always 64-bit in F64. - //extract rounding mode - OrderedNode *roundingMode = _Bfe(OpSize::i32Bit, 3, 10, NewFCW); + // ignore the rounding precision, we're always 64-bit in F64. + // extract rounding mode + OrderedNode* roundingMode = _Bfe(OpSize::i32Bit, 3, 10, NewFCW); _SetRoundingMode(roundingMode); _StoreContext(2, GPRClass, NewFCW, offsetof(FEXCore::Core::CPUState, FCW)); - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 1)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 1)); auto NewFSW = _LoadMem(GPRClass, Size, MemLocation, Size); ReconstructX87StateFromFSW(NewFSW); { // FTW - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 2)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 2)); SetX87FTW(_LoadMem(GPRClass, Size, MemLocation, Size)); } } void OpDispatchBuilder::X87FLDCWF64(OpcodeArgs) { - OrderedNode *NewFCW = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - //ignore the rounding precision, we're always 64-bit in F64. - //extract rounding mode - OrderedNode *roundingMode = _Bfe(OpSize::i32Bit, 3, 10, NewFCW); + OrderedNode* NewFCW = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); + // ignore the rounding precision, we're always 64-bit in F64. + // extract rounding mode + OrderedNode* roundingMode = _Bfe(OpSize::i32Bit, 3, 10, NewFCW); _SetRoundingMode(roundingMode); _StoreContext(2, GPRClass, NewFCW, offsetof(FEXCore::Core::CPUState, FCW)); } @@ -106,13 +106,13 @@ void OpDispatchBuilder::FLDF64(OpcodeArgs) { size_t read_width = (width == 80) ? 16 : width / 8; - OrderedNode *data{}; - OrderedNode *converted{}; + OrderedNode* data {}; + OrderedNode* converted {}; if (!Op->Src[0].IsNone()) { // Read from memory data = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], read_width, Op->Flags); - // Convert to 64bit float + // Convert to 64bit float if constexpr (width == 32) { converted = _Float_FToF(8, 4, data); } else if constexpr (width == 80) { @@ -120,8 +120,7 @@ void OpDispatchBuilder::FLDF64(OpcodeArgs) { } else { converted = data; } - } - else { + } else { // Implicit arg (does this need to change with width?) auto offset = _Constant(Op->OP & 7); data = _And(OpSize::i32Bit, _Add(OpSize::i32Bit, orig_top, offset), mask); @@ -136,12 +135,9 @@ void OpDispatchBuilder::FLDF64(OpcodeArgs) { _StoreContextIndexed(converted, top, 8, MMBaseOffset(), 16, FPRClass); } -template -void OpDispatchBuilder::FLDF64<32>(OpcodeArgs); -template -void OpDispatchBuilder::FLDF64<64>(OpcodeArgs); -template -void OpDispatchBuilder::FLDF64<80>(OpcodeArgs); +template void OpDispatchBuilder::FLDF64<32>(OpcodeArgs); +template void OpDispatchBuilder::FLDF64<64>(OpcodeArgs); +template void OpDispatchBuilder::FLDF64<80>(OpcodeArgs); void OpDispatchBuilder::FBLDF64(OpcodeArgs) { // Update TOP @@ -152,8 +148,8 @@ void OpDispatchBuilder::FBLDF64(OpcodeArgs) { SetX87Top(top); // Read from memory - OrderedNode *data = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], 16, Op->Flags); - OrderedNode *converted = _F80BCDLoad(data); + OrderedNode* data = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], 16, Op->Flags); + OrderedNode* converted = _F80BCDLoad(data); converted = _F80CVT(8, converted); _StoreContextIndexed(converted, top, 8, MMBaseOffset(), 16, FPRClass); } @@ -162,7 +158,7 @@ void OpDispatchBuilder::FBSTPF64(OpcodeArgs) { auto orig_top = GetX87Top(); auto data = _LoadContextIndexed(orig_top, 8, MMBaseOffset(), 16, FPRClass); - OrderedNode *converted = _F80CVTTo(data, 8); + OrderedNode* converted = _F80CVTTo(data, 8); converted = _F80BCDStore(converted); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, converted, 10, 1); @@ -185,20 +181,13 @@ void OpDispatchBuilder::FLDF64_Const(OpcodeArgs) { _StoreContextIndexed(data, top, 8, MMBaseOffset(), 16, FPRClass); } -template -void OpDispatchBuilder::FLDF64_Const<0x3FF0000000000000>(OpcodeArgs); // 1.0 -template -void OpDispatchBuilder::FLDF64_Const<0x400A934F0979A372>(OpcodeArgs); // log2l(10) -template -void OpDispatchBuilder::FLDF64_Const<0x3FF71547652B82FE>(OpcodeArgs); // log2l(e) -template -void OpDispatchBuilder::FLDF64_Const<0x400921FB54442D18>(OpcodeArgs); // pi -template -void OpDispatchBuilder::FLDF64_Const<0x3FD34413509F79FF>(OpcodeArgs); // log10l(2) -template -void OpDispatchBuilder::FLDF64_Const<0x3FE62E42FEFA39EF>(OpcodeArgs); // log(2) -template -void OpDispatchBuilder::FLDF64_Const<0>(OpcodeArgs); // 0.0 +template void OpDispatchBuilder::FLDF64_Const<0x3FF0000000000000>(OpcodeArgs); // 1.0 +template void OpDispatchBuilder::FLDF64_Const<0x400A934F0979A372>(OpcodeArgs); // log2l(10) +template void OpDispatchBuilder::FLDF64_Const<0x3FF71547652B82FE>(OpcodeArgs); // log2l(e) +template void OpDispatchBuilder::FLDF64_Const<0x400921FB54442D18>(OpcodeArgs); // pi +template void OpDispatchBuilder::FLDF64_Const<0x3FD34413509F79FF>(OpcodeArgs); // log10l(2) +template void OpDispatchBuilder::FLDF64_Const<0x3FE62E42FEFA39EF>(OpcodeArgs); // log(2) +template void OpDispatchBuilder::FLDF64_Const<0>(OpcodeArgs); // 0.0 void OpDispatchBuilder::FILDF64(OpcodeArgs) { // Update TOP @@ -210,7 +199,7 @@ void OpDispatchBuilder::FILDF64(OpcodeArgs) { size_t read_width = GetSrcSize(Op); // Read from memory auto data = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], read_width, Op->Flags); - if(read_width == 2) { + if (read_width == 2) { data = _Sbfe(OpSize::i64Bit, read_width * 8, 0, data); } auto converted = _Float_FromGPR_S(8, read_width == 4 ? 4 : 8, data); @@ -223,14 +212,14 @@ void OpDispatchBuilder::FSTF64(OpcodeArgs) { auto orig_top = GetX87Top(); auto data = _LoadContextIndexed(orig_top, 8, MMBaseOffset(), 16, FPRClass); if constexpr (width == 64) { - //Store 64-bit float directly + // Store 64-bit float directly StoreResult_WithOpSize(FPRClass, Op, Op->Dest, data, 8, 1); } else if constexpr (width == 32) { - //Convert to 32-bit float and store + // Convert to 32-bit float and store auto result = _Float_FToF(4, 8, data); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, result, 4, 1); } else if constexpr (width == 80) { - //Convert to 80-bit float + // Convert to 80-bit float auto result = _F80CVTTo(data, 8); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, result, 10, 1); } @@ -244,19 +233,16 @@ void OpDispatchBuilder::FSTF64(OpcodeArgs) { } } -template -void OpDispatchBuilder::FSTF64<32>(OpcodeArgs); -template -void OpDispatchBuilder::FSTF64<64>(OpcodeArgs); -template -void OpDispatchBuilder::FSTF64<80>(OpcodeArgs); +template void OpDispatchBuilder::FSTF64<32>(OpcodeArgs); +template void OpDispatchBuilder::FSTF64<64>(OpcodeArgs); +template void OpDispatchBuilder::FSTF64<80>(OpcodeArgs); template void OpDispatchBuilder::FISTF64(OpcodeArgs) { auto Size = GetSrcSize(Op); auto orig_top = GetX87Top(); - OrderedNode *data = _LoadContextIndexed(orig_top, 8, MMBaseOffset(), 16, FPRClass); + OrderedNode* data = _LoadContextIndexed(orig_top, 8, MMBaseOffset(), 16, FPRClass); if constexpr (Truncate) { data = _Float_ToGPR_ZS(Size == 4 ? 4 : 8, 8, data); } else { @@ -273,18 +259,16 @@ void OpDispatchBuilder::FISTF64(OpcodeArgs) { } } -template -void OpDispatchBuilder::FISTF64(OpcodeArgs); -template -void OpDispatchBuilder::FISTF64(OpcodeArgs); +template void OpDispatchBuilder::FISTF64(OpcodeArgs); +template void OpDispatchBuilder::FISTF64(OpcodeArgs); -template +template void OpDispatchBuilder::FADDF64(OpcodeArgs) { auto top = GetX87Top(); - OrderedNode *StackLocation = top; + OrderedNode* StackLocation = top; - OrderedNode *arg{}; - OrderedNode *b{}; + OrderedNode* arg {}; + OrderedNode* b {}; auto mask = _Constant(7); @@ -292,7 +276,7 @@ void OpDispatchBuilder::FADDF64(OpcodeArgs) { // Memory arg if constexpr (Integer) { arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - if(width == 16) { + if (width == 16) { arg = _Sbfe(OpSize::i64Bit, 16, 0, arg); } b = _Float_FromGPR_S(8, width == 64 ? 8 : 4, arg); @@ -326,26 +310,20 @@ void OpDispatchBuilder::FADDF64(OpcodeArgs) { _StoreContextIndexed(result, StackLocation, 8, MMBaseOffset(), 16, FPRClass); } -template -void OpDispatchBuilder::FADDF64<32, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FADDF64<64, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FADDF64<80, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FADDF64<80, false, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); +template void OpDispatchBuilder::FADDF64<32, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FADDF64<64, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FADDF64<80, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FADDF64<80, false, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); -template -void OpDispatchBuilder::FADDF64<16, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FADDF64<32, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FADDF64<16, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FADDF64<32, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); template void OpDispatchBuilder::FMULF64(OpcodeArgs) { auto top = GetX87Top(); - OrderedNode *StackLocation = top; - OrderedNode *arg{}; - OrderedNode *b{}; + OrderedNode* StackLocation = top; + OrderedNode* arg {}; + OrderedNode* b {}; auto mask = _Constant(7); @@ -353,7 +331,7 @@ void OpDispatchBuilder::FMULF64(OpcodeArgs) { // Memory arg if constexpr (Integer) { arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - if(width == 16) { + if (width == 16) { arg = _Sbfe(OpSize::i64Bit, 16, 0, arg); } b = _Float_FromGPR_S(8, width == 64 ? 8 : 4, arg); @@ -390,34 +368,28 @@ void OpDispatchBuilder::FMULF64(OpcodeArgs) { _StoreContextIndexed(result, StackLocation, 8, MMBaseOffset(), 16, FPRClass); } -template -void OpDispatchBuilder::FMULF64<32, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FMULF64<64, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FMULF64<80, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FMULF64<80, false, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); +template void OpDispatchBuilder::FMULF64<32, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FMULF64<64, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FMULF64<80, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FMULF64<80, false, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); -template -void OpDispatchBuilder::FMULF64<16, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FMULF64<32, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FMULF64<16, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FMULF64<32, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); template void OpDispatchBuilder::FDIVF64(OpcodeArgs) { auto top = GetX87Top(); - OrderedNode *StackLocation = top; - OrderedNode *arg{}; - OrderedNode *b{}; + OrderedNode* StackLocation = top; + OrderedNode* arg {}; + OrderedNode* b {}; auto mask = _Constant(7); - if (!Op->Src[0].IsNone()) { + if (!Op->Src[0].IsNone()) { // Memory arg if constexpr (Integer) { arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - if(width == 16) { + if (width == 16) { arg = _Sbfe(OpSize::i64Bit, 16, 0, arg); } b = _Float_FromGPR_S(8, width == 64 ? 8 : 4, arg); @@ -440,11 +412,10 @@ void OpDispatchBuilder::FDIVF64(OpcodeArgs) { auto a = _LoadContextIndexed(top, 8, MMBaseOffset(), 16, FPRClass); - OrderedNode *result{}; + OrderedNode* result {}; if constexpr (reverse) { result = _VFDiv(8, 8, b, a); - } - else { + } else { result = _VFDiv(8, 8, a, b); } @@ -460,50 +431,38 @@ void OpDispatchBuilder::FDIVF64(OpcodeArgs) { _StoreContextIndexed(result, StackLocation, 8, MMBaseOffset(), 16, FPRClass); } -template -void OpDispatchBuilder::FDIVF64<32, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIVF64<32, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIVF64<32, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIVF64<32, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIVF64<64, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIVF64<64, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIVF64<64, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIVF64<64, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIVF64<80, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIVF64<80, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIVF64<80, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIVF64<80, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIVF64<80, false, false, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); -template -void OpDispatchBuilder::FDIVF64<80, false, true, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); +template void OpDispatchBuilder::FDIVF64<80, false, false, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); +template void OpDispatchBuilder::FDIVF64<80, false, true, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); -template -void OpDispatchBuilder::FDIVF64<16, true, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIVF64<16, true, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIVF64<16, true, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIVF64<16, true, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIVF64<32, true, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FDIVF64<32, true, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIVF64<32, true, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FDIVF64<32, true, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); template void OpDispatchBuilder::FSUBF64(OpcodeArgs) { auto top = GetX87Top(); - OrderedNode *StackLocation = top; - OrderedNode *arg{}; - OrderedNode *b{}; + OrderedNode* StackLocation = top; + OrderedNode* arg {}; + OrderedNode* b {}; auto mask = _Constant(7); - if (!Op->Src[0].IsNone()) { + if (!Op->Src[0].IsNone()) { // Memory arg if constexpr (Integer) { arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - if(width == 16) { + if (width == 16) { arg = _Sbfe(OpSize::i64Bit, 16, 0, arg); } b = _Float_FromGPR_S(8, width == 64 ? 8 : 4, arg); @@ -526,11 +485,10 @@ void OpDispatchBuilder::FSUBF64(OpcodeArgs) { auto a = _LoadContextIndexed(top, 8, MMBaseOffset(), 16, FPRClass); - OrderedNode *result{}; + OrderedNode* result {}; if constexpr (reverse) { result = _VFSub(8, 8, b, a); - } - else { + } else { result = _VFSub(8, 8, a, b); } @@ -547,35 +505,23 @@ void OpDispatchBuilder::FSUBF64(OpcodeArgs) { _StoreContextIndexed(result, StackLocation, 8, MMBaseOffset(), 16, FPRClass); } -template -void OpDispatchBuilder::FSUBF64<32, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUBF64<32, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUBF64<32, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUBF64<32, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUBF64<64, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUBF64<64, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUBF64<64, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUBF64<64, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUBF64<80, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUBF64<80, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUBF64<80, false, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUBF64<80, false, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUBF64<80, false, false, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); -template -void OpDispatchBuilder::FSUBF64<80, false, true, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); +template void OpDispatchBuilder::FSUBF64<80, false, false, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); +template void OpDispatchBuilder::FSUBF64<80, false, true, OpDispatchBuilder::OpResult::RES_STI>(OpcodeArgs); -template -void OpDispatchBuilder::FSUBF64<16, true, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUBF64<16, true, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUBF64<16, true, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUBF64<16, true, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUBF64<32, true, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); -template -void OpDispatchBuilder::FSUBF64<32, true, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUBF64<32, true, false, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); +template void OpDispatchBuilder::FSUBF64<32, true, true, OpDispatchBuilder::OpResult::RES_ST0>(OpcodeArgs); void OpDispatchBuilder::FCHSF64(OpcodeArgs) { auto top = GetX87Top(); @@ -598,7 +544,7 @@ void OpDispatchBuilder::FTSTF64(OpcodeArgs) { auto a = _LoadContextIndexed(top, 8, MMBaseOffset(), 16, FPRClass); auto low = _Constant(0); - OrderedNode *data = _VCastFromGPR(8, 8, low); + OrderedNode* data = _VCastFromGPR(8, 8, low); // We are going to clobber NZCV, make sure it's in a GPR first. GetNZCV(); @@ -609,7 +555,7 @@ void OpDispatchBuilder::FTSTF64(OpcodeArgs) { ConvertNZCVToX87(); } -//TODO: This should obey rounding mode +// TODO: This should obey rounding mode void OpDispatchBuilder::FRNDINTF64(OpcodeArgs) { auto top = GetX87Top(); auto a = _LoadContextIndexed(top, 8, MMBaseOffset(), 16, FPRClass); @@ -646,14 +592,14 @@ void OpDispatchBuilder::FCOMIF64(OpcodeArgs) { auto top = GetX87Top(); auto mask = _Constant(7); - OrderedNode *arg{}; - OrderedNode *b{}; + OrderedNode* arg {}; + OrderedNode* b {}; if (!Op->Src[0].IsNone()) { // Memory arg if constexpr (Integer) { arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - if(width == 16) { + if (width == 16) { arg = _Sbfe(OpSize::i64Bit, 16, 0, arg); } b = _Float_FromGPR_S(8, width == 64 ? 8 : 4, arg); @@ -679,8 +625,7 @@ void OpDispatchBuilder::FCOMIF64(OpcodeArgs) { _FCmp(8, a, b); PossiblySetNZCVBits = ~0; ConvertNZCVToX87(); - } - else { + } else { // Invalidate deferred flags early // OF, SF, AF, PF all undefined InvalidateDeferredFlags(); @@ -698,8 +643,7 @@ void OpDispatchBuilder::FCOMIF64(OpcodeArgs) { // Set the new top now top = _And(OpSize::i32Bit, _Add(OpSize::i32Bit, top, _Constant(1)), mask); SetX87Top(top); - } - else if ((Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) != 0) { + } else if ((Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) != 0) { // if we are popping then we must first mark this location as empty SetX87ValidTag(top, false); // Set the new top now @@ -708,24 +652,17 @@ void OpDispatchBuilder::FCOMIF64(OpcodeArgs) { } } -template -void OpDispatchBuilder::FCOMIF64<32, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); +template void OpDispatchBuilder::FCOMIF64<32, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); -template -void OpDispatchBuilder::FCOMIF64<64, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); +template void OpDispatchBuilder::FCOMIF64<64, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); -template -void OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); -template -void OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>(OpcodeArgs); -template -void OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>(OpcodeArgs); +template void OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); +template void OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>(OpcodeArgs); +template void OpDispatchBuilder::FCOMIF64<80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>(OpcodeArgs); -template -void OpDispatchBuilder::FCOMIF64<16, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); +template void OpDispatchBuilder::FCOMIF64<16, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); -template -void OpDispatchBuilder::FCOMIF64<32, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); +template void OpDispatchBuilder::FCOMIF64<32, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>(OpcodeArgs); void OpDispatchBuilder::FSQRTF64(OpcodeArgs) { @@ -746,8 +683,7 @@ void OpDispatchBuilder::X87UnaryOpF64(OpcodeArgs) { DeriveOp(result, IROp, _F64SIN(a)); - if constexpr (IROp == IR::OP_F64SIN || - IROp == IR::OP_F64COS) { + if constexpr (IROp == IR::OP_F64SIN || IROp == IR::OP_F64COS) { // TODO: ACCURACY: should check source is in range –2^63 to +2^63 SetRFLAG(_Constant(0)); } @@ -756,12 +692,9 @@ void OpDispatchBuilder::X87UnaryOpF64(OpcodeArgs) { _StoreContextIndexed(result, top, 8, MMBaseOffset(), 16, FPRClass); } -template -void OpDispatchBuilder::X87UnaryOpF64(OpcodeArgs); -template -void OpDispatchBuilder::X87UnaryOpF64(OpcodeArgs); -template -void OpDispatchBuilder::X87UnaryOpF64(OpcodeArgs); +template void OpDispatchBuilder::X87UnaryOpF64(OpcodeArgs); +template void OpDispatchBuilder::X87UnaryOpF64(OpcodeArgs); +template void OpDispatchBuilder::X87UnaryOpF64(OpcodeArgs); template @@ -769,16 +702,15 @@ void OpDispatchBuilder::X87BinaryOpF64(OpcodeArgs) { auto top = GetX87Top(); auto mask = _Constant(7); - OrderedNode *st1 = _And(OpSize::i32Bit, _Add(OpSize::i32Bit, top, _Constant(1)), mask); + OrderedNode* st1 = _And(OpSize::i32Bit, _Add(OpSize::i32Bit, top, _Constant(1)), mask); auto a = _LoadContextIndexed(top, 8, MMBaseOffset(), 16, FPRClass); st1 = _LoadContextIndexed(st1, 8, MMBaseOffset(), 16, FPRClass); DeriveOp(result, IROp, _F64ATAN(a, st1)); - if constexpr (IROp == IR::OP_F64FPREM || - IROp == IR::OP_F64FPREM1) { - //TODO: Set C0 to Q2, C3 to Q1, C1 to Q0 + if constexpr (IROp == IR::OP_F64FPREM || IROp == IR::OP_F64FPREM1) { + // TODO: Set C0 to Q2, C3 to Q1, C1 to Q0 SetRFLAG(_Constant(0)); } @@ -786,12 +718,9 @@ void OpDispatchBuilder::X87BinaryOpF64(OpcodeArgs) { _StoreContextIndexed(result, top, 8, MMBaseOffset(), 16, FPRClass); } -template -void OpDispatchBuilder::X87BinaryOpF64(OpcodeArgs); -template -void OpDispatchBuilder::X87BinaryOpF64(OpcodeArgs); -template -void OpDispatchBuilder::X87BinaryOpF64(OpcodeArgs); +template void OpDispatchBuilder::X87BinaryOpF64(OpcodeArgs); +template void OpDispatchBuilder::X87BinaryOpF64(OpcodeArgs); +template void OpDispatchBuilder::X87BinaryOpF64(OpcodeArgs); void OpDispatchBuilder::X87SinCosF64(OpcodeArgs) { auto orig_top = GetX87Top(); @@ -821,8 +750,8 @@ void OpDispatchBuilder::X87FYL2XF64(OpcodeArgs) { auto top = _And(OpSize::i32Bit, _Add(OpSize::i32Bit, orig_top, _Constant(1)), _Constant(7)); SetX87Top(top); - OrderedNode *st0 = _LoadContextIndexed(orig_top, 8, MMBaseOffset(), 16, FPRClass); - OrderedNode *st1 = _LoadContextIndexed(top, 8, MMBaseOffset(), 16, FPRClass); + OrderedNode* st0 = _LoadContextIndexed(orig_top, 8, MMBaseOffset(), 16, FPRClass); + OrderedNode* st1 = _LoadContextIndexed(top, 8, MMBaseOffset(), 16, FPRClass); if (Plus1) { auto one = _VCastFromGPR(8, 8, _Constant(0x3FF0000000000000)); @@ -863,7 +792,7 @@ void OpDispatchBuilder::X87ATANF64(OpcodeArgs) { SetX87Top(top); auto a = _LoadContextIndexed(orig_top, 8, MMBaseOffset(), 16, FPRClass); - OrderedNode *st1 = _LoadContextIndexed(top, 8, MMBaseOffset(), 16, FPRClass); + OrderedNode* st1 = _LoadContextIndexed(top, 8, MMBaseOffset(), 16, FPRClass); auto result = _F64ATAN(st1, a); @@ -871,7 +800,7 @@ void OpDispatchBuilder::X87ATANF64(OpcodeArgs) { _StoreContextIndexed(result, top, 8, MMBaseOffset(), 16, FPRClass); } -//This function converts to F80 on save for compatibility +// This function converts to F80 on save for compatibility void OpDispatchBuilder::X87FNSAVEF64(OpcodeArgs) { // 14 bytes for 16bit @@ -894,17 +823,17 @@ void OpDispatchBuilder::X87FNSAVEF64(OpcodeArgs) { // 4 bytes : data pointer selector auto Size = GetDstSize(Op); - OrderedNode *Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); + OrderedNode* Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); Mem = AppendSegmentOffset(Mem, Op->Flags); - OrderedNode *Top = GetX87Top(); + OrderedNode* Top = GetX87Top(); { auto FCW = _LoadContext(2, GPRClass, offsetof(FEXCore::Core::CPUState, FCW)); _StoreMem(GPRClass, Size, Mem, FCW, Size); } { - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 1)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 1)); _StoreMem(GPRClass, Size, MemLocation, ReconstructFSW(), Size); } @@ -912,35 +841,35 @@ void OpDispatchBuilder::X87FNSAVEF64(OpcodeArgs) { { // FTW - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 2)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 2)); _StoreMem(GPRClass, Size, MemLocation, GetX87FTW(), Size); } { // Instruction Offset - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 3)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 3)); _StoreMem(GPRClass, Size, MemLocation, ZeroConst, Size); } { // Instruction CS selector (+ Opcode) - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 4)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 4)); _StoreMem(GPRClass, Size, MemLocation, ZeroConst, Size); } { // Data pointer offset - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 5)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 5)); _StoreMem(GPRClass, Size, MemLocation, ZeroConst, Size); } { // Data pointer selector - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 6)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 6)); _StoreMem(GPRClass, Size, MemLocation, ZeroConst, Size); } - OrderedNode *ST0Location = _Add(OpSize::i64Bit, Mem, _Constant(Size * 7)); + OrderedNode* ST0Location = _Add(OpSize::i64Bit, Mem, _Constant(Size * 7)); auto OneConst = _Constant(1); auto SevenConst = _Constant(7); @@ -968,17 +897,17 @@ void OpDispatchBuilder::X87FNSAVEF64(OpcodeArgs) { FNINIT(Op); } -//This function converts from F80 on load for compatibility +// This function converts from F80 on load for compatibility void OpDispatchBuilder::X87FRSTORF64(OpcodeArgs) { auto Size = GetSrcSize(Op); - OrderedNode *Mem = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.LoadData = false}); + OrderedNode* Mem = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.LoadData = false}); Mem = AppendSegmentOffset(Mem, Op->Flags); auto NewFCW = _LoadMem(GPRClass, 2, Mem, 2); - //ignore the rounding precision, we're always 64-bit in F64. - //extract rounding mode - OrderedNode *roundingMode = NewFCW; + // ignore the rounding precision, we're always 64-bit in F64. + // extract rounding mode + OrderedNode* roundingMode = NewFCW; auto roundShift = _Constant(10); auto roundMask = _Constant(3); roundingMode = _Lshr(OpSize::i32Bit, roundingMode, roundShift); @@ -987,17 +916,17 @@ void OpDispatchBuilder::X87FRSTORF64(OpcodeArgs) { _StoreContext(2, GPRClass, NewFCW, offsetof(FEXCore::Core::CPUState, FCW)); _StoreContext(2, GPRClass, NewFCW, offsetof(FEXCore::Core::CPUState, FCW)); - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 1)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 1)); auto NewFSW = _LoadMem(GPRClass, Size, MemLocation, Size); auto Top = ReconstructX87StateFromFSW(NewFSW); { // FTW - OrderedNode *MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 2)); + OrderedNode* MemLocation = _Add(OpSize::i64Bit, Mem, _Constant(Size * 2)); SetX87FTW(_LoadMem(GPRClass, Size, MemLocation, Size)); } - OrderedNode *ST0Location = _Add(OpSize::i64Bit, Mem, _Constant(Size * 7)); + OrderedNode* ST0Location = _Add(OpSize::i64Bit, Mem, _Constant(Size * 7)); auto OneConst = _Constant(1); auto SevenConst = _Constant(7); @@ -1005,14 +934,14 @@ void OpDispatchBuilder::X87FRSTORF64(OpcodeArgs) { auto low = _Constant(~0ULL); auto high = _Constant(0xFFFF); - OrderedNode *Mask = _VCastFromGPR(16, 8, low); + OrderedNode* Mask = _VCastFromGPR(16, 8, low); Mask = _VInsGPR(16, 8, 1, Mask, high); for (int i = 0; i < 7; ++i) { - OrderedNode *Reg = _LoadMem(FPRClass, 16, ST0Location, 1); + OrderedNode* Reg = _LoadMem(FPRClass, 16, ST0Location, 1); // Mask off the top bits Reg = _VAnd(16, 16, Reg, Mask); - //Convert to double precision + // Convert to double precision Reg = _F80CVT(8, Reg); _StoreContextIndexed(Reg, Top, 8, MMBaseOffset(), 16, FPRClass); @@ -1025,20 +954,20 @@ void OpDispatchBuilder::X87FRSTORF64(OpcodeArgs) { // Lower 64bits [63:0] // upper 16 bits [79:64] - OrderedNode *Reg = _LoadMem(FPRClass, 8, ST0Location, 1); + OrderedNode* Reg = _LoadMem(FPRClass, 8, ST0Location, 1); ST0Location = _Add(OpSize::i64Bit, ST0Location, _Constant(8)); - OrderedNode *RegHigh = _LoadMem(FPRClass, 2, ST0Location, 1); + OrderedNode* RegHigh = _LoadMem(FPRClass, 2, ST0Location, 1); Reg = _VInsElement(16, 2, 4, 0, Reg, RegHigh); - Reg = _F80CVT(8, Reg); //Convert to double precision + Reg = _F80CVT(8, Reg); // Convert to double precision _StoreContextIndexed(Reg, Top, 8, MMBaseOffset(), 16, FPRClass); } -//FXAM needs change +// FXAM needs change void OpDispatchBuilder::X87FXAMF64(OpcodeArgs) { auto top = GetX87Top(); auto a = _LoadContextIndexed(top, 8, MMBaseOffset(), 16, FPRClass); - OrderedNode *Result = _VExtractToGPR(8, 8, a, 0); + OrderedNode* Result = _VExtractToGPR(8, 8, a, 0); // Extract the sign bit Result = _Bfe(OpSize::i64Bit, 1, 63, Result); @@ -1051,9 +980,7 @@ void OpDispatchBuilder::X87FXAMF64(OpcodeArgs) { auto OneConst = _Constant(1); // In the case of top being invalid then C3:C2:C0 is 0b101 - auto C3 = _Select(FEXCore::IR::COND_EQ, - TopValid, OneConst, - ZeroConst, OneConst); + auto C3 = _Select(FEXCore::IR::COND_EQ, TopValid, OneConst, ZeroConst, OneConst); auto C2 = TopValid; auto C0 = C3; // Mirror C3 until something other than zero is supported @@ -1063,4 +990,4 @@ void OpDispatchBuilder::X87FXAMF64(OpcodeArgs) { } -} +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/Core/X86HelperGen.cpp b/FEXCore/Source/Interface/Core/X86HelperGen.cpp index f59982e772..357835ef62 100644 --- a/FEXCore/Source/Interface/Core/X86HelperGen.cpp +++ b/FEXCore/Source/Interface/Core/X86HelperGen.cpp @@ -39,15 +39,15 @@ X86GeneratedCode::X86GeneratedCode() { // Falling back to this generated code segment still allows a backtrace to work, just might not show // the symbol as VDSO since there is no ELF to parse. constexpr std::array sigreturn_32_code = { - 0x58, // pop eax + 0x58, // pop eax 0xb8, 0x77, 0x00, 0x00, 0x00, // mov eax, 0x77 - 0xcd, 0x80, // int 0x80 - 0x90, // nop + 0xcd, 0x80, // int 0x80 + 0x90, // nop }; constexpr std::array rt_sigreturn_32_code = { 0xb8, 0xad, 0x00, 0x00, 0x00, // mov eax, 0xad - 0xcd, 0x80, // int 0x80 + 0xcd, 0x80, // int 0x80 }; CallbackReturn = reinterpret_cast(CodePtr); @@ -84,10 +84,9 @@ void* X86GeneratedCode::AllocateGuestCodeSpace(size_t Size) { // We need to have the sigret handler in the lower 32bits of memory space // Scan top down and try to allocate a location for (size_t Location = 0xFFFF'E000; Location != 0x0; Location -= 0x1000) { - void *Ptr = ::mmap(reinterpret_cast(Location), Size, PROT_READ | PROT_WRITE, MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + void* Ptr = ::mmap(reinterpret_cast(Location), Size, PROT_READ | PROT_WRITE, MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (Ptr != MAP_FAILED && - reinterpret_cast(Ptr) >= LOCATION_MAX) { + if (Ptr != MAP_FAILED && reinterpret_cast(Ptr) >= LOCATION_MAX) { // Failed to map in the lower 32bits // Try again // Can happen in the case that host kernel ignores MAP_FIXED_NOREPLACE @@ -108,5 +107,4 @@ void* X86GeneratedCode::AllocateGuestCodeSpace(size_t Size) { #endif } -} - +} // namespace FEXCore diff --git a/FEXCore/Source/Interface/Core/X86Tables.cpp b/FEXCore/Source/Interface/Core/X86Tables.cpp index 430de34730..be634acd9b 100644 --- a/FEXCore/Source/Interface/Core/X86Tables.cpp +++ b/FEXCore/Source/Interface/Core/X86Tables.cpp @@ -24,4 +24,4 @@ void InitializeInfoTables(Context::OperatingMode Mode) { InitializeH0F3ATables(Mode); } -} +} // namespace FEXCore::X86Tables diff --git a/FEXCore/Source/Interface/GDBJIT/GDBJIT.cpp b/FEXCore/Source/Interface/GDBJIT/GDBJIT.cpp index 0488806a84..bc572b6a71 100644 --- a/FEXCore/Source/Interface/GDBJIT/GDBJIT.cpp +++ b/FEXCore/Source/Interface/GDBJIT/GDBJIT.cpp @@ -14,9 +14,9 @@ extern "C" { enum jit_actions_t { JIT_NOACTION = 0, JIT_REGISTER_FN, JIT_UNREGISTER_FN }; struct jit_code_entry { - jit_code_entry *next_entry; - jit_code_entry *prev_entry; - const char *symfile_addr; + jit_code_entry* next_entry; + jit_code_entry* prev_entry; + const char* symfile_addr; uint64_t symfile_size; }; @@ -25,8 +25,8 @@ struct jit_descriptor { /* This type should be jit_actions_t, but we use uint32_t to be explicit about the bitwidth. */ uint32_t action_flag; - jit_code_entry *relevant_entry; - jit_code_entry *first_entry; + jit_code_entry* relevant_entry; + jit_code_entry* first_entry; }; /* Make sure to specify the version statically, because the @@ -42,9 +42,8 @@ void __attribute__((noinline)) __jit_debug_register_code() { namespace FEXCore { -void GDBJITRegister(FEXCore::IR::AOTIRCacheEntry *Entry, uintptr_t VAFileStart, - uint64_t GuestRIP, uintptr_t HostEntry, - FEXCore::Core::DebugData *DebugData) { +void GDBJITRegister(FEXCore::IR::AOTIRCacheEntry* Entry, uintptr_t VAFileStart, uint64_t GuestRIP, uintptr_t HostEntry, + FEXCore::Core::DebugData* DebugData) { auto map = Entry->SourcecodeMap.get(); if (map) { @@ -52,32 +51,28 @@ void GDBJITRegister(FEXCore::IR::AOTIRCacheEntry *Entry, uintptr_t VAFileStart, auto Sym = map->FindSymbolMapping(FileOffset); - auto SymName = HLE::SourcecodeSymbolMapping::SymName( - Sym, Entry->Filename, HostEntry, FileOffset); + auto SymName = HLE::SourcecodeSymbolMapping::SymName(Sym, Entry->Filename, HostEntry, FileOffset); fextl::vector Lines; - for (const auto &GuestOpcode : DebugData->GuestOpcodes) { - auto Line = map->FindLineMapping(GuestRIP + GuestOpcode.GuestEntryOffset - - VAFileStart); + for (const auto& GuestOpcode : DebugData->GuestOpcodes) { + auto Line = map->FindLineMapping(GuestRIP + GuestOpcode.GuestEntryOffset - VAFileStart); if (Line) { - Lines.push_back( - {Line->LineNumber, HostEntry + GuestOpcode.HostEntryOffset}); + Lines.push_back({Line->LineNumber, HostEntry + GuestOpcode.HostEntryOffset}); } } - size_t size = sizeof(info_t) + 1 * sizeof(blocks_t) + - Lines.size() * sizeof(gdb_line_mapping); + size_t size = sizeof(info_t) + 1 * sizeof(blocks_t) + Lines.size() * sizeof(gdb_line_mapping); - auto mem = (uint8_t *)malloc(size); + auto mem = (uint8_t*)malloc(size); auto base = mem; - info_t *info = (info_t *)mem; + info_t* info = (info_t*)mem; mem += sizeof(info_t); strncpy(info->filename, map->SourceFile.c_str(), 511); info->nblocks = 1; - auto blocks = (blocks_t *)mem; + auto blocks = (blocks_t*)mem; info->blocks_ofs = mem - base; mem += info->nblocks * sizeof(blocks_t); @@ -90,7 +85,7 @@ void GDBJITRegister(FEXCore::IR::AOTIRCacheEntry *Entry, uintptr_t VAFileStart, info->nlines = Lines.size(); - auto lines = (gdb_line_mapping *)mem; + auto lines = (gdb_line_mapping*)mem; info->lines_ofs = mem - base; mem += info->nlines * sizeof(gdb_line_mapping); @@ -98,9 +93,9 @@ void GDBJITRegister(FEXCore::IR::AOTIRCacheEntry *Entry, uintptr_t VAFileStart, memcpy(lines, &Lines.at(0), info->nlines * sizeof(gdb_line_mapping)); } - auto entry = new jit_code_entry{0, 0, 0, 0}; + auto entry = new jit_code_entry {0, 0, 0, 0}; - entry->symfile_addr = (const char *)info; + entry->symfile_addr = (const char*)info; entry->symfile_size = size; if (__jit_debug_descriptor.first_entry) { @@ -118,11 +113,8 @@ void GDBJITRegister(FEXCore::IR::AOTIRCacheEntry *Entry, uintptr_t VAFileStart, } // namespace FEXCore #else namespace FEXCore { -void GDBJITRegister([[maybe_unused]] FEXCore::IR::AOTIRCacheEntry *Entry, - [[maybe_unused]] uintptr_t VAFileStart, - [[maybe_unused]] uint64_t GuestRIP, - [[maybe_unused]] uintptr_t HostEntry, - [[maybe_unused]] FEXCore::Core::DebugData *DebugData) { +void GDBJITRegister([[maybe_unused]] FEXCore::IR::AOTIRCacheEntry* Entry, [[maybe_unused]] uintptr_t VAFileStart, [[maybe_unused]] uint64_t GuestRIP, + [[maybe_unused]] uintptr_t HostEntry, [[maybe_unused]] FEXCore::Core::DebugData* DebugData) { ERROR_AND_DIE_FMT("GDBSymbols support not compiled in"); } } // namespace FEXCore diff --git a/FEXCore/Source/Interface/HLE/Thunks/Thunks.cpp b/FEXCore/Source/Interface/HLE/Thunks/Thunks.cpp index 48faeee99a..9d62de199d 100644 --- a/FEXCore/Source/Interface/HLE/Thunks/Thunks.cpp +++ b/FEXCore/Source/Interface/HLE/Thunks/Thunks.cpp @@ -39,19 +39,18 @@ extern "C" { #define JEMALLOC_NOTHROW __attribute__((nothrow)) // Forward declare jemalloc functions because we can't include the headers from the glibc jemalloc project. // This is because we can't simultaneously set up include paths for both of our internal jemalloc modules. -FEX_DEFAULT_VISIBILITY JEMALLOC_NOTHROW extern int glibc_je_is_known_allocation(void *ptr); +FEX_DEFAULT_VISIBILITY JEMALLOC_NOTHROW extern int glibc_je_is_known_allocation(void* ptr); } #endif #ifndef _WIN32 static __attribute__((aligned(16), naked, section("HostToGuestTrampolineTemplate"))) void HostToGuestTrampolineTemplate() { #if defined(_M_X86_64) - asm( - "lea 0f(%rip), %r11 \n" - "jmpq *0f(%rip) \n" - ".align 8 \n" - "0: \n" - ".quad 0, 0, 0, 0 \n" // TrampolineInstanceInfo + asm("lea 0f(%rip), %r11 \n" + "jmpq *0f(%rip) \n" + ".align 8 \n" + "0: \n" + ".quad 0, 0, 0, 0 \n" // TrampolineInstanceInfo ); #elif defined(_M_ARM_64) asm( @@ -76,441 +75,431 @@ extern char __stop_HostToGuestTrampolineTemplate[]; namespace FEXCore { #ifndef _WIN32 - struct LoadlibArgs { - const char *Name; +struct LoadlibArgs { + const char* Name; +}; + +static thread_local FEXCore::Core::InternalThreadState* Thread = nullptr; + + +struct ExportEntry { + uint8_t* sha256; + ThunkedFunction* Fn; +}; + +struct TrampolineInstanceInfo { + void* HostPacker; + uintptr_t CallCallback; + uintptr_t GuestUnpacker; + uintptr_t GuestTarget; +}; + +// Opaque type pointing to an instance of HostToGuestTrampolineTemplate and its +// embedded TrampolineInstanceInfo +struct HostToGuestTrampolinePtr; +const auto HostToGuestTrampolineSize = __stop_HostToGuestTrampolineTemplate - __start_HostToGuestTrampolineTemplate; + +static TrampolineInstanceInfo& GetInstanceInfo(HostToGuestTrampolinePtr* Trampoline) { + const auto Length = __stop_HostToGuestTrampolineTemplate - __start_HostToGuestTrampolineTemplate; + const auto InstanceInfoOffset = Length - sizeof(TrampolineInstanceInfo); + return *reinterpret_cast(reinterpret_cast(Trampoline) + InstanceInfoOffset); +} + +struct GuestcallInfo { + uintptr_t GuestUnpacker; + uintptr_t GuestTarget; + + bool operator==(const GuestcallInfo&) const noexcept = default; +}; + +struct GuestcallInfoHash { + size_t operator()(const GuestcallInfo& x) const noexcept { + // Hash only the target address, which is generally unique. + // For the unlikely case of a hash collision, fextl::unordered_map still picks the correct bucket entry. + return std::hash {}(x.GuestTarget); + } +}; + +// Bits in a SHA256 sum are already randomly distributed, so truncation yields a suitable hash function +struct TruncatingSHA256Hash { + size_t operator()(const FEXCore::IR::SHA256Sum& SHA256Sum) const noexcept { + return (const size_t&)SHA256Sum; + } +}; + +HostToGuestTrampolinePtr* MakeHostTrampolineForGuestFunction(void* HostPacker, uintptr_t GuestTarget, uintptr_t GuestUnpacker); + +struct ThunkHandler_impl final : public ThunkHandler { + std::shared_mutex ThunksMutex; + + fextl::unordered_map Thunks = { + {// sha256(fex:loadlib) + {0x27, 0x7e, 0xb7, 0x69, 0x5b, 0xe9, 0xab, 0x12, 0x6e, 0xf7, 0x85, 0x9d, 0x4b, 0xc9, 0xa2, 0x44, + 0x46, 0xcf, 0xbd, 0xb5, 0x87, 0x43, 0xef, 0x28, 0xa2, 0x65, 0xba, 0xfc, 0x89, 0x0f, 0x77, 0x80}, + &LoadLib}, + {// sha256(fex:is_lib_loaded) + {0xee, 0x57, 0xba, 0x0c, 0x5f, 0x6e, 0xef, 0x2a, 0x8c, 0xb5, 0x19, 0x81, 0xc9, 0x23, 0xe6, 0x51, + 0xae, 0x65, 0x02, 0x8f, 0x2b, 0x5d, 0x59, 0x90, 0x6a, 0x7e, 0xe2, 0xe7, 0x1c, 0x33, 0x8a, 0xff}, + &IsLibLoaded}, + {// sha256(fex:is_host_heap_allocation) + {0xf5, 0x77, 0x68, 0x43, 0xbb, 0x6b, 0x28, 0x18, 0x40, 0xb0, 0xdb, 0x8a, 0x66, 0xfb, 0x0e, 0x2d, + 0x98, 0xc2, 0xad, 0xe2, 0x5a, 0x18, 0x5a, 0x37, 0x2e, 0x13, 0xc9, 0xe7, 0xb9, 0x8c, 0xa9, 0x3e}, + &IsHostHeapAllocation}, + {// sha256(fex:link_address_to_function) + {0xe6, 0xa8, 0xec, 0x1c, 0x7b, 0x74, 0x35, 0x27, 0xe9, 0x4f, 0x5b, 0x6e, 0x2d, 0xc9, 0xa0, 0x27, + 0xd6, 0x1f, 0x2b, 0x87, 0x8f, 0x2d, 0x35, 0x50, 0xea, 0x16, 0xb8, 0xc4, 0x5e, 0x42, 0xfd, 0x77}, + &LinkAddressToGuestFunction}, + {// sha256(fex:allocate_host_trampoline_for_guest_function) + {0x9b, 0xb2, 0xf4, 0xb4, 0x83, 0x7d, 0x28, 0x93, 0x40, 0xcb, 0xf4, 0x7a, 0x0b, 0x47, 0x85, 0x87, + 0xf9, 0xbc, 0xb5, 0x27, 0xca, 0xa6, 0x93, 0xa5, 0xc0, 0x73, 0x27, 0x24, 0xae, 0xc8, 0xb8, 0x5a}, + &AllocateHostTrampolineForGuestFunction}, }; - static thread_local FEXCore::Core::InternalThreadState *Thread = nullptr; + // Can't be a string_view. We need to keep a copy of the library name in-case string_view pointer goes away. + // Ideally we track when a library has been unloaded and remove it from this set before the memory backing goes away. + fextl::set Libs; + fextl::unordered_map GuestcallToHostTrampoline; - struct ExportEntry { uint8_t *sha256; ThunkedFunction* Fn; }; + uint8_t* HostTrampolineInstanceDataPtr; + size_t HostTrampolineInstanceDataAvailable = 0; - struct TrampolineInstanceInfo { - void* HostPacker; - uintptr_t CallCallback; - uintptr_t GuestUnpacker; - uintptr_t GuestTarget; + + /* + Set arg0/1 to arg regs, use CTX::HandleCallback to handle the callback + */ + static void CallCallback(void* callback, void* arg0, void* arg1) { + if (!Thread) { + ERROR_AND_DIE_FMT("Thunked library attempted to invoke guest callback asynchronously"); + } + + auto CTX = static_cast(Thread->CTX); + if (CTX->Config.Is64BitMode) { + Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RDI] = (uintptr_t)arg0; + Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSI] = (uintptr_t)arg1; + } else { + if ((reinterpret_cast(arg1) >> 32) != 0) { + ERROR_AND_DIE_FMT("Tried to call guest function with arguments packed to a 64-bit address"); + } + Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RCX] = (uintptr_t)arg0; + Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RDX] = (uintptr_t)arg1; + } + + Thread->CTX->HandleCallback(Thread, (uintptr_t)callback); + } + + /** + * Instructs the Core to redirect calls to functions at the given + * address to another function. The original callee address is passed + * to the target function through an implicit argument stored in r11. + * + * For 32-bit the implicit argument is stored in the lower 32-bits of mm0. + * + * The primary use case of this is ensuring that host function pointers + * returned from thunked APIs can safely be called by the guest. + */ + static void LinkAddressToGuestFunction(void* argsv) { + struct args_t { + uintptr_t original_callee; + uintptr_t target_addr; // Guest function to call when branching to original_callee }; - // Opaque type pointing to an instance of HostToGuestTrampolineTemplate and its - // embedded TrampolineInstanceInfo - struct HostToGuestTrampolinePtr; - const auto HostToGuestTrampolineSize = __stop_HostToGuestTrampolineTemplate - __start_HostToGuestTrampolineTemplate; + auto args = reinterpret_cast(argsv); + auto CTX = static_cast(Thread->CTX); - static TrampolineInstanceInfo& GetInstanceInfo(HostToGuestTrampolinePtr* Trampoline) { - const auto Length = __stop_HostToGuestTrampolineTemplate - __start_HostToGuestTrampolineTemplate; - const auto InstanceInfoOffset = Length - sizeof(TrampolineInstanceInfo); - return *reinterpret_cast(reinterpret_cast(Trampoline) + InstanceInfoOffset); + LOGMAN_THROW_AA_FMT(args->original_callee, "Tried to link null pointer address to guest function"); + LOGMAN_THROW_AA_FMT(args->target_addr, "Tried to link address to null pointer guest function"); + if (!CTX->Config.Is64BitMode) { + LOGMAN_THROW_AA_FMT((args->original_callee >> 32) == 0, "Tried to link 64-bit address in 32-bit mode"); + LOGMAN_THROW_AA_FMT((args->target_addr >> 32) == 0, "Tried to link 64-bit address in 32-bit mode"); } - struct GuestcallInfo { - uintptr_t GuestUnpacker; - uintptr_t GuestTarget; + LogMan::Msg::DFmt("Thunks: Adding guest trampoline from address {:#x} to guest function {:#x}", args->original_callee, args->target_addr); - bool operator==(const GuestcallInfo&) const noexcept = default; - }; + auto Result = CTX->AddCustomIREntrypoint( + args->original_callee, + [CTX, GuestThunkEntrypoint = args->target_addr](uintptr_t Entrypoint, FEXCore::IR::IREmitter* emit) { + auto IRHeader = emit->_IRHeader(emit->Invalid(), Entrypoint, 0, 0); + auto Block = emit->CreateCodeNode(); + IRHeader.first->Blocks = emit->WrapNode(Block); + emit->SetCurrentCodeBlock(Block); - struct GuestcallInfoHash { - size_t operator()(const GuestcallInfo& x) const noexcept { - // Hash only the target address, which is generally unique. - // For the unlikely case of a hash collision, fextl::unordered_map still picks the correct bucket entry. - return std::hash{}(x.GuestTarget); - } - }; + const uint8_t GPRSize = CTX->GetGPRSize(); - // Bits in a SHA256 sum are already randomly distributed, so truncation yields a suitable hash function - struct TruncatingSHA256Hash { - size_t operator()(const FEXCore::IR::SHA256Sum& SHA256Sum) const noexcept { - return (const size_t&)SHA256Sum; + if (GPRSize == 8) { + emit->_StoreRegister(emit->_Constant(Entrypoint), false, offsetof(Core::CPUState, gregs[X86State::REG_R11]), IR::GPRClass, + IR::GPRFixedClass, GPRSize); + } else { + emit->_StoreContext(GPRSize, IR::FPRClass, emit->_VCastFromGPR(8, 8, emit->_Constant(Entrypoint)), offsetof(Core::CPUState, mm[0][0])); } - }; + emit->_ExitFunction(emit->_Constant(GuestThunkEntrypoint)); + }, + CTX->ThunkHandler.get(), (void*)args->target_addr); - HostToGuestTrampolinePtr* MakeHostTrampolineForGuestFunction(void* HostPacker, uintptr_t GuestTarget, uintptr_t GuestUnpacker); - - struct ThunkHandler_impl final: public ThunkHandler { - std::shared_mutex ThunksMutex; - - fextl::unordered_map Thunks = { - { - // sha256(fex:loadlib) - { 0x27, 0x7e, 0xb7, 0x69, 0x5b, 0xe9, 0xab, 0x12, 0x6e, 0xf7, 0x85, 0x9d, 0x4b, 0xc9, 0xa2, 0x44, 0x46, 0xcf, 0xbd, 0xb5, 0x87, 0x43, 0xef, 0x28, 0xa2, 0x65, 0xba, 0xfc, 0x89, 0x0f, 0x77, 0x80 }, - &LoadLib - }, - { - // sha256(fex:is_lib_loaded) - { 0xee, 0x57, 0xba, 0x0c, 0x5f, 0x6e, 0xef, 0x2a, 0x8c, 0xb5, 0x19, 0x81, 0xc9, 0x23, 0xe6, 0x51, 0xae, 0x65, 0x02, 0x8f, 0x2b, 0x5d, 0x59, 0x90, 0x6a, 0x7e, 0xe2, 0xe7, 0x1c, 0x33, 0x8a, 0xff }, - &IsLibLoaded - }, - { - // sha256(fex:is_host_heap_allocation) - { 0xf5, 0x77, 0x68, 0x43, 0xbb, 0x6b, 0x28, 0x18, 0x40, 0xb0, 0xdb, 0x8a, 0x66, 0xfb, 0x0e, 0x2d, 0x98, 0xc2, 0xad, 0xe2, 0x5a, 0x18, 0x5a, 0x37, 0x2e, 0x13, 0xc9, 0xe7, 0xb9, 0x8c, 0xa9, 0x3e }, - &IsHostHeapAllocation - }, - { - // sha256(fex:link_address_to_function) - { 0xe6, 0xa8, 0xec, 0x1c, 0x7b, 0x74, 0x35, 0x27, 0xe9, 0x4f, 0x5b, 0x6e, 0x2d, 0xc9, 0xa0, 0x27, 0xd6, 0x1f, 0x2b, 0x87, 0x8f, 0x2d, 0x35, 0x50, 0xea, 0x16, 0xb8, 0xc4, 0x5e, 0x42, 0xfd, 0x77 }, - &LinkAddressToGuestFunction - }, - { - // sha256(fex:allocate_host_trampoline_for_guest_function) - { 0x9b, 0xb2, 0xf4, 0xb4, 0x83, 0x7d, 0x28, 0x93, 0x40, 0xcb, 0xf4, 0x7a, 0x0b, 0x47, 0x85, 0x87, 0xf9, 0xbc, 0xb5, 0x27, 0xca, 0xa6, 0x93, 0xa5, 0xc0, 0x73, 0x27, 0x24, 0xae, 0xc8, 0xb8, 0x5a }, - &AllocateHostTrampolineForGuestFunction - }, - }; - - // Can't be a string_view. We need to keep a copy of the library name in-case string_view pointer goes away. - // Ideally we track when a library has been unloaded and remove it from this set before the memory backing goes away. - fextl::set Libs; - - fextl::unordered_map GuestcallToHostTrampoline; - - uint8_t *HostTrampolineInstanceDataPtr; - size_t HostTrampolineInstanceDataAvailable = 0; - - - /* - Set arg0/1 to arg regs, use CTX::HandleCallback to handle the callback - */ - static void CallCallback(void *callback, void *arg0, void* arg1) { - if (!Thread) { - ERROR_AND_DIE_FMT("Thunked library attempted to invoke guest callback asynchronously"); - } - - auto CTX = static_cast(Thread->CTX); - if (CTX->Config.Is64BitMode) { - Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RDI] = (uintptr_t)arg0; - Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSI] = (uintptr_t)arg1; - } else { - if ((reinterpret_cast(arg1) >> 32) != 0) { - ERROR_AND_DIE_FMT("Tried to call guest function with arguments packed to a 64-bit address"); - } - Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RCX] = (uintptr_t)arg0; - Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RDX] = (uintptr_t)arg1; - } - - Thread->CTX->HandleCallback(Thread, (uintptr_t)callback); - } - - /** - * Instructs the Core to redirect calls to functions at the given - * address to another function. The original callee address is passed - * to the target function through an implicit argument stored in r11. - * - * For 32-bit the implicit argument is stored in the lower 32-bits of mm0. - * - * The primary use case of this is ensuring that host function pointers - * returned from thunked APIs can safely be called by the guest. - */ - static void LinkAddressToGuestFunction(void* argsv) { - struct args_t { - uintptr_t original_callee; - uintptr_t target_addr; // Guest function to call when branching to original_callee - }; - - auto args = reinterpret_cast(argsv); - auto CTX = static_cast(Thread->CTX); - - LOGMAN_THROW_AA_FMT(args->original_callee, "Tried to link null pointer address to guest function"); - LOGMAN_THROW_AA_FMT(args->target_addr, "Tried to link address to null pointer guest function"); - if (!CTX->Config.Is64BitMode) { - LOGMAN_THROW_AA_FMT((args->original_callee >> 32) == 0, "Tried to link 64-bit address in 32-bit mode"); - LOGMAN_THROW_AA_FMT((args->target_addr >> 32) == 0, "Tried to link 64-bit address in 32-bit mode"); - } - - LogMan::Msg::DFmt("Thunks: Adding guest trampoline from address {:#x} to guest function {:#x}", - args->original_callee, args->target_addr); - - auto Result = CTX->AddCustomIREntrypoint( - args->original_callee, - [CTX, GuestThunkEntrypoint = args->target_addr](uintptr_t Entrypoint, FEXCore::IR::IREmitter *emit) { - auto IRHeader = emit->_IRHeader(emit->Invalid(), Entrypoint, 0, 0); - auto Block = emit->CreateCodeNode(); - IRHeader.first->Blocks = emit->WrapNode(Block); - emit->SetCurrentCodeBlock(Block); - - const uint8_t GPRSize = CTX->GetGPRSize(); - - if (GPRSize == 8) { - emit->_StoreRegister(emit->_Constant(Entrypoint), false, offsetof(Core::CPUState, gregs[X86State::REG_R11]), IR::GPRClass, IR::GPRFixedClass, GPRSize); - } - else { - emit->_StoreContext(GPRSize, IR::FPRClass, emit->_VCastFromGPR(8, 8, emit->_Constant(Entrypoint)), offsetof(Core::CPUState, mm[0][0])); - } - emit->_ExitFunction(emit->_Constant(GuestThunkEntrypoint)); - }, CTX->ThunkHandler.get(), (void*)args->target_addr); - - if (!Result) { - if (Result.Creator != CTX->ThunkHandler.get()) { - ERROR_AND_DIE_FMT("Input address for LinkAddressToGuestFunction is already linked by another module"); - } - if (Result.Data != (void*)args->target_addr) { - // NOTE: This may happen in Vulkan thunks if the Vulkan driver resolves two different symbols - // to the same function (e.g. vkGetPhysicalDeviceFeatures2/vkGetPhysicalDeviceFeatures2KHR) - LogMan::Msg::EFmt("Input address for LinkAddressToGuestFunction is already linked elsewhere"); - } - } - } - - /** - * Guest-side helper to initiate creation of a host trampoline for - * calling guest functions. This must be followed by a host-side call - * to FinalizeHostTrampolineForGuestFunction to make the trampoline - * usable. - * - * This two-step initialization is equivalent to a host-side call to - * MakeHostTrampolineForGuestFunction. The split is needed if the - * host doesn't have all information needed to create the trampoline - * on its own. - */ - static void AllocateHostTrampolineForGuestFunction(void* ArgsRV) { - struct ArgsRV_t { - uintptr_t GuestUnpacker; - uintptr_t GuestTarget; - uintptr_t rv; // Pointer to host trampoline + TrampolineInstanceInfo - } *args = reinterpret_cast(ArgsRV); - - args->rv = (uintptr_t)MakeHostTrampolineForGuestFunction(nullptr, args->GuestTarget, args->GuestUnpacker); - } - - /** - * Checks if the given pointer is allocated on the host heap. - * - * This is useful for thunking APIs that need to work with both guest - * and host heap pointers. - */ - static void IsHostHeapAllocation(void* ArgsRV) { + if (!Result) { + if (Result.Creator != CTX->ThunkHandler.get()) { + ERROR_AND_DIE_FMT("Input address for LinkAddressToGuestFunction is already linked by another module"); + } + if (Result.Data != (void*)args->target_addr) { + // NOTE: This may happen in Vulkan thunks if the Vulkan driver resolves two different symbols + // to the same function (e.g. vkGetPhysicalDeviceFeatures2/vkGetPhysicalDeviceFeatures2KHR) + LogMan::Msg::EFmt("Input address for LinkAddressToGuestFunction is already linked elsewhere"); + } + } + } + + /** + * Guest-side helper to initiate creation of a host trampoline for + * calling guest functions. This must be followed by a host-side call + * to FinalizeHostTrampolineForGuestFunction to make the trampoline + * usable. + * + * This two-step initialization is equivalent to a host-side call to + * MakeHostTrampolineForGuestFunction. The split is needed if the + * host doesn't have all information needed to create the trampoline + * on its own. + */ + static void AllocateHostTrampolineForGuestFunction(void* ArgsRV) { + struct ArgsRV_t { + uintptr_t GuestUnpacker; + uintptr_t GuestTarget; + uintptr_t rv; // Pointer to host trampoline + TrampolineInstanceInfo + }* args = reinterpret_cast(ArgsRV); + + args->rv = (uintptr_t)MakeHostTrampolineForGuestFunction(nullptr, args->GuestTarget, args->GuestUnpacker); + } + + /** + * Checks if the given pointer is allocated on the host heap. + * + * This is useful for thunking APIs that need to work with both guest + * and host heap pointers. + */ + static void IsHostHeapAllocation(void* ArgsRV) { #ifdef ENABLE_JEMALLOC_GLIBC - struct ArgsRV_t { - void* ptr; - bool rv; - } *args = reinterpret_cast(ArgsRV); + struct ArgsRV_t { + void* ptr; + bool rv; + }* args = reinterpret_cast(ArgsRV); - args->rv = glibc_je_is_known_allocation(args->ptr); + args->rv = glibc_je_is_known_allocation(args->ptr); #else - // Thunks usage without jemalloc isn't supported - ERROR_AND_DIE_FMT("Unsupported: Thunks querying for host heap allocation information"); + // Thunks usage without jemalloc isn't supported + ERROR_AND_DIE_FMT("Unsupported: Thunks querying for host heap allocation information"); #endif - } + } - static void LoadLib(void *ArgsV) { - auto CTX = static_cast(Thread->CTX); + static void LoadLib(void* ArgsV) { + auto CTX = static_cast(Thread->CTX); - auto Args = reinterpret_cast(ArgsV); + auto Args = reinterpret_cast(ArgsV); - std::string_view Name = Args->Name; + std::string_view Name = Args->Name; - auto SOName = (CTX->Config.Is64BitMode() ? - CTX->Config.ThunkHostLibsPath() : - CTX->Config.ThunkHostLibsPath32()) - + "/" + Name.data() + "-host.so"; + auto SOName = (CTX->Config.Is64BitMode() ? CTX->Config.ThunkHostLibsPath() : CTX->Config.ThunkHostLibsPath32()) + "/" + Name.data() + "-host.so"; - LogMan::Msg::DFmt("LoadLib: {} -> {}", Name, SOName); + LogMan::Msg::DFmt("LoadLib: {} -> {}", Name, SOName); - auto Handle = dlopen(SOName.c_str(), RTLD_LOCAL | RTLD_NOW); - if (!Handle) { - ERROR_AND_DIE_FMT("LoadLib: Failed to dlopen thunk library {}: {}", SOName, dlerror()); - } + auto Handle = dlopen(SOName.c_str(), RTLD_LOCAL | RTLD_NOW); + if (!Handle) { + ERROR_AND_DIE_FMT("LoadLib: Failed to dlopen thunk library {}: {}", SOName, dlerror()); + } - // Library names often include dashes, which may not be used in C++ identifiers. - // They are replaced with underscores hence. - auto InitSym = "fexthunks_exports_" + fextl::string { Name }; - std::replace(InitSym.begin(), InitSym.end(), '-', '_'); + // Library names often include dashes, which may not be used in C++ identifiers. + // They are replaced with underscores hence. + auto InitSym = "fexthunks_exports_" + fextl::string {Name}; + std::replace(InitSym.begin(), InitSym.end(), '-', '_'); - ExportEntry* (*InitFN)(); - (void*&)InitFN = dlsym(Handle, InitSym.c_str()); - if (!InitFN) { - ERROR_AND_DIE_FMT("LoadLib: Failed to find export {}", InitSym); - } + ExportEntry* (*InitFN)(); + (void*&)InitFN = dlsym(Handle, InitSym.c_str()); + if (!InitFN) { + ERROR_AND_DIE_FMT("LoadLib: Failed to find export {}", InitSym); + } - auto Exports = InitFN(); - if (!Exports) { - ERROR_AND_DIE_FMT("LoadLib: Failed to initialize thunk library {}. " - "Check if the corresponding host library is installed " - "or disable thunking of this library.", Name); - } + auto Exports = InitFN(); + if (!Exports) { + ERROR_AND_DIE_FMT("LoadLib: Failed to initialize thunk library {}. " + "Check if the corresponding host library is installed " + "or disable thunking of this library.", + Name); + } - auto That = reinterpret_cast(CTX->ThunkHandler.get()); + auto That = reinterpret_cast(CTX->ThunkHandler.get()); - { - std::lock_guard lk(That->ThunksMutex); + { + std::lock_guard lk(That->ThunksMutex); - That->Libs.insert(fextl::string { Name }); + That->Libs.insert(fextl::string {Name}); - int i; - for (i = 0; Exports[i].sha256; i++) { - That->Thunks[*reinterpret_cast(Exports[i].sha256)] = Exports[i].Fn; - } + int i; + for (i = 0; Exports[i].sha256; i++) { + That->Thunks[*reinterpret_cast(Exports[i].sha256)] = Exports[i].Fn; + } - LogMan::Msg::DFmt("Loaded {} syms", i); - } - } + LogMan::Msg::DFmt("Loaded {} syms", i); + } + } - static void IsLibLoaded(void* ArgsRV) { - struct ArgsRV_t { - const char *Name; - bool rv; - }; + static void IsLibLoaded(void* ArgsRV) { + struct ArgsRV_t { + const char* Name; + bool rv; + }; - auto &[Name, rv] = *reinterpret_cast(ArgsRV); + auto& [Name, rv] = *reinterpret_cast(ArgsRV); - auto CTX = static_cast(Thread->CTX); - auto That = reinterpret_cast(CTX->ThunkHandler.get()); + auto CTX = static_cast(Thread->CTX); + auto That = reinterpret_cast(CTX->ThunkHandler.get()); - { - std::shared_lock lk(That->ThunksMutex); - rv = That->Libs.contains(Name); - } - } + { + std::shared_lock lk(That->ThunksMutex); + rv = That->Libs.contains(Name); + } + } - ThunkedFunction* LookupThunk(const IR::SHA256Sum &sha256) override { + ThunkedFunction* LookupThunk(const IR::SHA256Sum& sha256) override { - std::shared_lock lk(ThunksMutex); + std::shared_lock lk(ThunksMutex); - auto it = Thunks.find(sha256); + auto it = Thunks.find(sha256); - if (it != Thunks.end()) { - return it->second; - } else { - return nullptr; - } - } + if (it != Thunks.end()) { + return it->second; + } else { + return nullptr; + } + } - void RegisterTLSState(FEXCore::Core::InternalThreadState *_Thread) override { - Thread = _Thread; - } + void RegisterTLSState(FEXCore::Core::InternalThreadState* _Thread) override { + Thread = _Thread; + } - void AppendThunkDefinitions(fextl::vector const& Definitions) override { - for (auto & Definition : Definitions) { - Thunks.emplace(Definition.Sum, Definition.ThunkFunction); - } - } - }; + void AppendThunkDefinitions(const fextl::vector& Definitions) override { + for (auto& Definition : Definitions) { + Thunks.emplace(Definition.Sum, Definition.ThunkFunction); + } + } +}; + +fextl::unique_ptr ThunkHandler::Create() { + return fextl::make_unique(); +} - fextl::unique_ptr ThunkHandler::Create() { - return fextl::make_unique(); +/** + * Generates a host-callable trampoline to call guest functions via the host ABI. + * + * This trampoline uses the same calling convention as the given HostPacker. Trampolines + * are cached, so it's safe to call this function repeatedly on the same arguments without + * leaking memory. + * + * Invoking the returned trampoline has the effect of: + * - packing the arguments (using the HostPacker identified by its SHA256) + * - performing a host->guest transition + * - unpacking the arguments via GuestUnpacker + * - calling the function at GuestTarget + * + * The primary use case of this is ensuring that guest function pointers ("callbacks") + * passed to thunked APIs can safely be called by the native host library. + * + * Returns a pointer to the generated host trampoline and its TrampolineInstanceInfo. + * + * If HostPacker is zero, the trampoline will be partially initialized and needs to be + * finalized with a call to FinalizeHostTrampolineForGuestFunction. A typical use case + * is to allocate the trampoline for a given GuestTarget/GuestUnpacker on the guest-side, + * and provide the HostPacker host-side. + */ +FEX_DEFAULT_VISIBILITY HostToGuestTrampolinePtr* +MakeHostTrampolineForGuestFunction(void* HostPacker, uintptr_t GuestTarget, uintptr_t GuestUnpacker) { + LOGMAN_THROW_AA_FMT(GuestTarget, "Tried to create host-trampoline to null pointer guest function"); + + const auto CTX = static_cast(Thread->CTX); + const auto ThunkHandler = reinterpret_cast(CTX->ThunkHandler.get()); + + const GuestcallInfo gci = {GuestUnpacker, GuestTarget}; + + // Try first with shared_lock + { + std::shared_lock lk(ThunkHandler->ThunksMutex); + + auto found = ThunkHandler->GuestcallToHostTrampoline.find(gci); + if (found != ThunkHandler->GuestcallToHostTrampoline.end()) { + return found->second; } + } - /** - * Generates a host-callable trampoline to call guest functions via the host ABI. - * - * This trampoline uses the same calling convention as the given HostPacker. Trampolines - * are cached, so it's safe to call this function repeatedly on the same arguments without - * leaking memory. - * - * Invoking the returned trampoline has the effect of: - * - packing the arguments (using the HostPacker identified by its SHA256) - * - performing a host->guest transition - * - unpacking the arguments via GuestUnpacker - * - calling the function at GuestTarget - * - * The primary use case of this is ensuring that guest function pointers ("callbacks") - * passed to thunked APIs can safely be called by the native host library. - * - * Returns a pointer to the generated host trampoline and its TrampolineInstanceInfo. - * - * If HostPacker is zero, the trampoline will be partially initialized and needs to be - * finalized with a call to FinalizeHostTrampolineForGuestFunction. A typical use case - * is to allocate the trampoline for a given GuestTarget/GuestUnpacker on the guest-side, - * and provide the HostPacker host-side. - */ - FEX_DEFAULT_VISIBILITY - HostToGuestTrampolinePtr* MakeHostTrampolineForGuestFunction(void* HostPacker, uintptr_t GuestTarget, uintptr_t GuestUnpacker) { - LOGMAN_THROW_AA_FMT(GuestTarget, "Tried to create host-trampoline to null pointer guest function"); - - const auto CTX = static_cast(Thread->CTX); - const auto ThunkHandler = reinterpret_cast(CTX->ThunkHandler.get()); - - const GuestcallInfo gci = { GuestUnpacker, GuestTarget }; - - // Try first with shared_lock - { - std::shared_lock lk(ThunkHandler->ThunksMutex); - - auto found = ThunkHandler->GuestcallToHostTrampoline.find(gci); - if (found != ThunkHandler->GuestcallToHostTrampoline.end()) { - return found->second; - } - } + std::lock_guard lk(ThunkHandler->ThunksMutex); - std::lock_guard lk(ThunkHandler->ThunksMutex); + // Retry lookup with full lock before making a new trampoline to avoid double trampolines + { + auto found = ThunkHandler->GuestcallToHostTrampoline.find(gci); + if (found != ThunkHandler->GuestcallToHostTrampoline.end()) { + return found->second; + } + } - // Retry lookup with full lock before making a new trampoline to avoid double trampolines - { - auto found = ThunkHandler->GuestcallToHostTrampoline.find(gci); - if (found != ThunkHandler->GuestcallToHostTrampoline.end()) { - return found->second; - } - } + LogMan::Msg::DFmt("Thunks: Adding host trampoline for guest function {:#x} via unpacker {:#x}", GuestTarget, GuestUnpacker); - LogMan::Msg::DFmt("Thunks: Adding host trampoline for guest function {:#x} via unpacker {:#x}", - GuestTarget, GuestUnpacker); + if (ThunkHandler->HostTrampolineInstanceDataAvailable < HostToGuestTrampolineSize) { + const auto allocation_step = 16 * 1024; + ThunkHandler->HostTrampolineInstanceDataAvailable = allocation_step; + ThunkHandler->HostTrampolineInstanceDataPtr = (uint8_t*)mmap(0, ThunkHandler->HostTrampolineInstanceDataAvailable, + PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (ThunkHandler->HostTrampolineInstanceDataAvailable < HostToGuestTrampolineSize) { - const auto allocation_step = 16 * 1024; - ThunkHandler->HostTrampolineInstanceDataAvailable = allocation_step; - ThunkHandler->HostTrampolineInstanceDataPtr = (uint8_t *)mmap( - 0, ThunkHandler->HostTrampolineInstanceDataAvailable, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + LOGMAN_THROW_AA_FMT(ThunkHandler->HostTrampolineInstanceDataPtr != MAP_FAILED, "Failed to mmap HostTrampolineInstanceDataPtr"); + } - LOGMAN_THROW_AA_FMT(ThunkHandler->HostTrampolineInstanceDataPtr != MAP_FAILED, "Failed to mmap HostTrampolineInstanceDataPtr"); - } + auto HostTrampoline = reinterpret_cast(ThunkHandler->HostTrampolineInstanceDataPtr); + ThunkHandler->HostTrampolineInstanceDataAvailable -= HostToGuestTrampolineSize; + ThunkHandler->HostTrampolineInstanceDataPtr += HostToGuestTrampolineSize; + memcpy(HostTrampoline, (void*)&HostToGuestTrampolineTemplate, HostToGuestTrampolineSize); + GetInstanceInfo(HostTrampoline) = TrampolineInstanceInfo { + .HostPacker = HostPacker, .CallCallback = (uintptr_t)&ThunkHandler_impl::CallCallback, .GuestUnpacker = GuestUnpacker, .GuestTarget = GuestTarget}; - auto HostTrampoline = reinterpret_cast(ThunkHandler->HostTrampolineInstanceDataPtr); - ThunkHandler->HostTrampolineInstanceDataAvailable -= HostToGuestTrampolineSize; - ThunkHandler->HostTrampolineInstanceDataPtr += HostToGuestTrampolineSize; - memcpy(HostTrampoline, (void*)&HostToGuestTrampolineTemplate, HostToGuestTrampolineSize); - GetInstanceInfo(HostTrampoline) = TrampolineInstanceInfo { - .HostPacker = HostPacker, - .CallCallback = (uintptr_t)&ThunkHandler_impl::CallCallback, - .GuestUnpacker = GuestUnpacker, - .GuestTarget = GuestTarget - }; - - ThunkHandler->GuestcallToHostTrampoline[gci] = HostTrampoline; - return HostTrampoline; - } + ThunkHandler->GuestcallToHostTrampoline[gci] = HostTrampoline; + return HostTrampoline; +} - FEX_DEFAULT_VISIBILITY - void FinalizeHostTrampolineForGuestFunction(HostToGuestTrampolinePtr* TrampolineAddress, void* HostPacker) { +FEX_DEFAULT_VISIBILITY void FinalizeHostTrampolineForGuestFunction(HostToGuestTrampolinePtr* TrampolineAddress, void* HostPacker) { - if (TrampolineAddress == nullptr) return; + if (TrampolineAddress == nullptr) { + return; + } - auto& Trampoline = GetInstanceInfo(TrampolineAddress); + auto& Trampoline = GetInstanceInfo(TrampolineAddress); - LOGMAN_THROW_A_FMT(Trampoline.CallCallback == (uintptr_t)&ThunkHandler_impl::CallCallback, - "Invalid trampoline at {} passed to {}", fmt::ptr(TrampolineAddress), __FUNCTION__); + LOGMAN_THROW_A_FMT(Trampoline.CallCallback == (uintptr_t)&ThunkHandler_impl::CallCallback, "Invalid trampoline at {} passed to {}", + fmt::ptr(TrampolineAddress), __FUNCTION__); - if (!Trampoline.HostPacker) { - LogMan::Msg::DFmt("Thunks: Finalizing trampoline at {} with host packer {}", fmt::ptr(TrampolineAddress), fmt::ptr(HostPacker)); - Trampoline.HostPacker = HostPacker; - } - } + if (!Trampoline.HostPacker) { + LogMan::Msg::DFmt("Thunks: Finalizing trampoline at {} with host packer {}", fmt::ptr(TrampolineAddress), fmt::ptr(HostPacker)); + Trampoline.HostPacker = HostPacker; + } +} - FEX_DEFAULT_VISIBILITY void* GetGuestStack() { - if (!Thread) { - ERROR_AND_DIE_FMT("Thunked library attempted to query guest stack pointer asynchronously"); - } +FEX_DEFAULT_VISIBILITY void* GetGuestStack() { + if (!Thread) { + ERROR_AND_DIE_FMT("Thunked library attempted to query guest stack pointer asynchronously"); + } - return (void*)(uintptr_t)((Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP])); - } + return (void*)(uintptr_t)((Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP])); +} - FEX_DEFAULT_VISIBILITY void MoveGuestStack(uintptr_t NewAddress) { - if (!Thread) { - ERROR_AND_DIE_FMT("Thunked library attempted to query guest stack pointer asynchronously"); - } +FEX_DEFAULT_VISIBILITY void MoveGuestStack(uintptr_t NewAddress) { + if (!Thread) { + ERROR_AND_DIE_FMT("Thunked library attempted to query guest stack pointer asynchronously"); + } - if (NewAddress >> 32) { - ERROR_AND_DIE_FMT("Tried to set stack pointer for 32-bit guest to a 64-bit address"); - } + if (NewAddress >> 32) { + ERROR_AND_DIE_FMT("Tried to set stack pointer for 32-bit guest to a 64-bit address"); + } - Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP] = NewAddress; - } + Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP] = NewAddress; +} #else - fextl::unique_ptr ThunkHandler::Create() { - ERROR_AND_DIE_FMT("Unsupported"); - } +fextl::unique_ptr ThunkHandler::Create() { + ERROR_AND_DIE_FMT("Unsupported"); +} #endif -} +} // namespace FEXCore diff --git a/FEXCore/Source/Interface/IR/AOTIR.cpp b/FEXCore/Source/Interface/IR/AOTIR.cpp index c16ab231a4..944d9981b6 100644 --- a/FEXCore/Source/Interface/IR/AOTIR.cpp +++ b/FEXCore/Source/Interface/IR/AOTIR.cpp @@ -22,399 +22,399 @@ namespace FEXCore::IR { - AOTIRInlineEntry *AOTIRInlineIndex::GetInlineEntry(uint64_t DataOffset) { - uintptr_t This = (uintptr_t)this; +AOTIRInlineEntry* AOTIRInlineIndex::GetInlineEntry(uint64_t DataOffset) { + uintptr_t This = (uintptr_t)this; - return (AOTIRInlineEntry*)(This + DataBase + DataOffset); - } + return (AOTIRInlineEntry*)(This + DataBase + DataOffset); +} - AOTIRInlineEntry *AOTIRInlineIndex::Find(uint64_t GuestStart) { - ssize_t l = 0; - ssize_t r = Count - 1; +AOTIRInlineEntry* AOTIRInlineIndex::Find(uint64_t GuestStart) { + ssize_t l = 0; + ssize_t r = Count - 1; - while (l <= r) { - size_t m = l + (r - l) / 2; + while (l <= r) { + size_t m = l + (r - l) / 2; - if (Entries[m].GuestStart == GuestStart) - return GetInlineEntry(Entries[m].DataOffset); - else if (Entries[m].GuestStart < GuestStart) - l = m + 1; - else - r = m - 1; + if (Entries[m].GuestStart == GuestStart) { + return GetInlineEntry(Entries[m].DataOffset); + } else if (Entries[m].GuestStart < GuestStart) { + l = m + 1; + } else { + r = m - 1; } - - return nullptr; } - IR::RegisterAllocationData *AOTIRInlineEntry::GetRAData() { - return (IR::RegisterAllocationData *)InlineData; - } + return nullptr; +} + +IR::RegisterAllocationData* AOTIRInlineEntry::GetRAData() { + return (IR::RegisterAllocationData*)InlineData; +} - IR::IRListView *AOTIRInlineEntry::GetIRData() { - auto RAData = GetRAData(); - auto Offset = RAData->Size(RAData->MapCount); +IR::IRListView* AOTIRInlineEntry::GetIRData() { + auto RAData = GetRAData(); + auto Offset = RAData->Size(RAData->MapCount); - return (IR::IRListView *)&InlineData[Offset]; - } + return (IR::IRListView*)&InlineData[Offset]; +} - void AOTIRCaptureCacheEntry::AppendAOTIRCaptureCache(uint64_t GuestRIP, uint64_t Start, uint64_t Length, uint64_t Hash, FEXCore::IR::IRListView *IRList, FEXCore::IR::RegisterAllocationData *RAData) { - auto Inserted = Index.emplace(GuestRIP, Stream->Offset()); +void AOTIRCaptureCacheEntry::AppendAOTIRCaptureCache(uint64_t GuestRIP, uint64_t Start, uint64_t Length, uint64_t Hash, + FEXCore::IR::IRListView* IRList, FEXCore::IR::RegisterAllocationData* RAData) { + auto Inserted = Index.emplace(GuestRIP, Stream->Offset()); - if (Inserted.second) { - //GuestHash - Stream->Write((const char*)&Hash, sizeof(Hash)); + if (Inserted.second) { + // GuestHash + Stream->Write((const char*)&Hash, sizeof(Hash)); - //GuestLength - Stream->Write((const char*)&Length, sizeof(Length)); + // GuestLength + Stream->Write((const char*)&Length, sizeof(Length)); - RAData->Serialize(*Stream); + RAData->Serialize(*Stream); - // IRData (inline) - IRList->Serialize(*Stream); - } + // IRData (inline) + IRList->Serialize(*Stream); } +} - static bool readAll(int fd, void *data, size_t size) { - int rv = read(fd, data, size); +static bool readAll(int fd, void* data, size_t size) { + int rv = read(fd, data, size); - if (rv != size) - return false; - else - return true; + if (rv != size) { + return false; + } else { + return true; } +} - static bool LoadAOTIRCache(AOTIRCacheEntry *Entry, int streamfd) { +static bool LoadAOTIRCache(AOTIRCacheEntry* Entry, int streamfd) { #ifndef _WIN32 - uint64_t tag; + uint64_t tag; - if (!readAll(streamfd, (char*)&tag, sizeof(tag)) || tag != FEXCore::IR::AOTIR_COOKIE) - return false; + if (!readAll(streamfd, (char*)&tag, sizeof(tag)) || tag != FEXCore::IR::AOTIR_COOKIE) { + return false; + } - fextl::string Module; - uint64_t ModSize; - uint64_t IndexSize; + fextl::string Module; + uint64_t ModSize; + uint64_t IndexSize; - lseek(streamfd, -sizeof(ModSize), SEEK_END); + lseek(streamfd, -sizeof(ModSize), SEEK_END); - if (!readAll(streamfd, (char*)&ModSize, sizeof(ModSize))) - return false; + if (!readAll(streamfd, (char*)&ModSize, sizeof(ModSize))) { + return false; + } - Module.resize(ModSize); + Module.resize(ModSize); - lseek(streamfd, -sizeof(ModSize) - ModSize, SEEK_END); + lseek(streamfd, -sizeof(ModSize) - ModSize, SEEK_END); - if (!readAll(streamfd, (char*)&Module[0], Module.size())) - return false; + if (!readAll(streamfd, (char*)&Module[0], Module.size())) { + return false; + } - if (Entry->FileId != Module) { - return false; - } + if (Entry->FileId != Module) { + return false; + } - lseek(streamfd, -sizeof(ModSize) - ModSize - sizeof(IndexSize), SEEK_END); + lseek(streamfd, -sizeof(ModSize) - ModSize - sizeof(IndexSize), SEEK_END); - if (!readAll(streamfd, (char*)&IndexSize, sizeof(IndexSize))) - return false; + if (!readAll(streamfd, (char*)&IndexSize, sizeof(IndexSize))) { + return false; + } - struct stat fileinfo; - if (fstat(streamfd, &fileinfo) < 0) - return false; - size_t Size = (fileinfo.st_size + 4095) & ~4095; + struct stat fileinfo; + if (fstat(streamfd, &fileinfo) < 0) { + return false; + } + size_t Size = (fileinfo.st_size + 4095) & ~4095; - size_t IndexOffset = fileinfo.st_size - IndexSize -sizeof(ModSize) - ModSize - sizeof(IndexSize); + size_t IndexOffset = fileinfo.st_size - IndexSize - sizeof(ModSize) - ModSize - sizeof(IndexSize); - void *FilePtr = FEXCore::Allocator::mmap(nullptr, Size, PROT_READ, MAP_SHARED, streamfd, 0); + void* FilePtr = FEXCore::Allocator::mmap(nullptr, Size, PROT_READ, MAP_SHARED, streamfd, 0); - if (FilePtr == MAP_FAILED) { - return false; - } + if (FilePtr == MAP_FAILED) { + return false; + } - auto Array = (AOTIRInlineIndex *)((char*)FilePtr + IndexOffset); + auto Array = (AOTIRInlineIndex*)((char*)FilePtr + IndexOffset); - LOGMAN_THROW_AA_FMT(Entry->Array == nullptr && Entry->FilePtr == nullptr, "Entry must not be initialized here"); - Entry->Array = Array; - Entry->FilePtr = FilePtr; - Entry->Size = Size; + LOGMAN_THROW_AA_FMT(Entry->Array == nullptr && Entry->FilePtr == nullptr, "Entry must not be initialized here"); + Entry->Array = Array; + Entry->FilePtr = FilePtr; + Entry->Size = Size; - LogMan::Msg::DFmt("AOTIR: Module {} has {} functions", Module, Array->Count); + LogMan::Msg::DFmt("AOTIR: Module {} has {} functions", Module, Array->Count); - return true; + return true; #else - return false; + return false; #endif - } +} - void AOTIRCaptureCache::FinalizeAOTIRCache() { - AOTIRCaptureCacheWriteoutQueue_Flush(); +void AOTIRCaptureCache::FinalizeAOTIRCache() { + AOTIRCaptureCacheWriteoutQueue_Flush(); - std::unique_lock lk(AOTIRCacheLock); + std::unique_lock lk(AOTIRCacheLock); - for (auto& [String, Entry] : AOTIRCaptureCacheMap) { - if (!Entry.Stream) { - continue; - } + for (auto& [String, Entry] : AOTIRCaptureCacheMap) { + if (!Entry.Stream) { + continue; + } - const auto ModSize = String.size(); - auto &stream = Entry.Stream; + const auto ModSize = String.size(); + auto& stream = Entry.Stream; - // pad to 32 bytes - constexpr char Zero = 0; - while(stream->Offset() & 31) - stream->Write(&Zero, 1); + // pad to 32 bytes + constexpr char Zero = 0; + while (stream->Offset() & 31) { + stream->Write(&Zero, 1); + } - // AOTIRInlineIndex - const auto FnCount = Entry.Index.size(); - const size_t DataBase = -stream->Offset(); + // AOTIRInlineIndex + const auto FnCount = Entry.Index.size(); + const size_t DataBase = -stream->Offset(); - stream->Write((const char*)&FnCount, sizeof(FnCount)); - stream->Write((const char*)&DataBase, sizeof(DataBase)); + stream->Write((const char*)&FnCount, sizeof(FnCount)); + stream->Write((const char*)&DataBase, sizeof(DataBase)); - for (const auto& [GuestStart, DataOffset] : Entry.Index) { - //AOTIRInlineIndexEntry + for (const auto& [GuestStart, DataOffset] : Entry.Index) { + // AOTIRInlineIndexEntry - // GuestStart - stream->Write((const char*)&GuestStart, sizeof(GuestStart)); + // GuestStart + stream->Write((const char*)&GuestStart, sizeof(GuestStart)); - // DataOffset - stream->Write((const char*)&DataOffset, sizeof(DataOffset)); - } + // DataOffset + stream->Write((const char*)&DataOffset, sizeof(DataOffset)); + } - // End of file header - const auto IndexSize = FnCount * sizeof(FEXCore::IR::AOTIRInlineIndexEntry) + sizeof(DataBase) + sizeof(FnCount); - stream->Write((const char*)&IndexSize, sizeof(IndexSize)); - stream->Write(String.c_str(), ModSize); - stream->Write((const char*)&ModSize, sizeof(ModSize)); + // End of file header + const auto IndexSize = FnCount * sizeof(FEXCore::IR::AOTIRInlineIndexEntry) + sizeof(DataBase) + sizeof(FnCount); + stream->Write((const char*)&IndexSize, sizeof(IndexSize)); + stream->Write(String.c_str(), ModSize); + stream->Write((const char*)&ModSize, sizeof(ModSize)); - // Close the stream - stream->Close(); + // Close the stream + stream->Close(); - // Rename the file to atomically update the cache with the temporary file - AOTIRRenamer(String); + // Rename the file to atomically update the cache with the temporary file + AOTIRRenamer(String); + } +} + +void AOTIRCaptureCache::AOTIRCaptureCacheWriteoutQueue_Flush() { + { + std::shared_lock lk {AOTIRCaptureCacheWriteoutLock}; + if (AOTIRCaptureCacheWriteoutQueue.size() == 0) { + AOTIRCaptureCacheWriteoutFlusing.store(false); + return; } } - void AOTIRCaptureCache::AOTIRCaptureCacheWriteoutQueue_Flush() { - { - std::shared_lock lk{AOTIRCaptureCacheWriteoutLock}; + for (;;) { + // This code is tricky to refactor so it doesn't allocate memory through glibc. + // The moved std::function object deallocates memory at the end of scope. + FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator glibc; + + AOTIRCaptureCacheWriteoutLock.lock(); + WriteOutFn fn = std::move(AOTIRCaptureCacheWriteoutQueue.front()); + bool MaybeEmpty = false; + AOTIRCaptureCacheWriteoutQueue.pop(); + MaybeEmpty = AOTIRCaptureCacheWriteoutQueue.size() == 0; + AOTIRCaptureCacheWriteoutLock.unlock(); + + fn(); + if (MaybeEmpty) { + std::shared_lock lk {AOTIRCaptureCacheWriteoutLock}; if (AOTIRCaptureCacheWriteoutQueue.size() == 0) { AOTIRCaptureCacheWriteoutFlusing.store(false); return; } } - - for (;;) { - // This code is tricky to refactor so it doesn't allocate memory through glibc. - // The moved std::function object deallocates memory at the end of scope. - FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator glibc; - - AOTIRCaptureCacheWriteoutLock.lock(); - WriteOutFn fn = std::move(AOTIRCaptureCacheWriteoutQueue.front()); - bool MaybeEmpty = false; - AOTIRCaptureCacheWriteoutQueue.pop(); - MaybeEmpty = AOTIRCaptureCacheWriteoutQueue.size() == 0; - AOTIRCaptureCacheWriteoutLock.unlock(); - - fn(); - if (MaybeEmpty) { - std::shared_lock lk{AOTIRCaptureCacheWriteoutLock}; - if (AOTIRCaptureCacheWriteoutQueue.size() == 0) { - AOTIRCaptureCacheWriteoutFlusing.store(false); - return; - } - } - } - - LOGMAN_MSG_A_FMT("Must never get here"); } - void AOTIRCaptureCache::AOTIRCaptureCacheWriteoutQueue_Append(const WriteOutFn &fn) { - bool Flush = false; + LOGMAN_MSG_A_FMT("Must never get here"); +} - { - std::unique_lock lk{AOTIRCaptureCacheWriteoutLock}; - AOTIRCaptureCacheWriteoutQueue.push(fn); - if (AOTIRCaptureCacheWriteoutQueue.size() > 10000) { - Flush = true; - } - } +void AOTIRCaptureCache::AOTIRCaptureCacheWriteoutQueue_Append(const WriteOutFn& fn) { + bool Flush = false; - bool test_val = false; - if (Flush && AOTIRCaptureCacheWriteoutFlusing.compare_exchange_strong(test_val, true)) { - AOTIRCaptureCacheWriteoutQueue_Flush(); + { + std::unique_lock lk {AOTIRCaptureCacheWriteoutLock}; + AOTIRCaptureCacheWriteoutQueue.push(fn); + if (AOTIRCaptureCacheWriteoutQueue.size() > 10000) { + Flush = true; } } - void AOTIRCaptureCache::WriteFilesWithCode(const Context::AOTIRCodeFileWriterFn &Writer) { - std::shared_lock lk(AOTIRCacheLock); - for( const auto &Entry: AOTIRCache) { - if (Entry.second.ContainsCode) { - Writer(Entry.second.FileId, Entry.second.Filename); - } + bool test_val = false; + if (Flush && AOTIRCaptureCacheWriteoutFlusing.compare_exchange_strong(test_val, true)) { + AOTIRCaptureCacheWriteoutQueue_Flush(); + } +} + +void AOTIRCaptureCache::WriteFilesWithCode(const Context::AOTIRCodeFileWriterFn& Writer) { + std::shared_lock lk(AOTIRCacheLock); + for (const auto& Entry : AOTIRCache) { + if (Entry.second.ContainsCode) { + Writer(Entry.second.FileId, Entry.second.Filename); } } +} - AOTIRCaptureCache::PreGenerateIRFetchResult AOTIRCaptureCache::PreGenerateIRFetch(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP, FEXCore::IR::IRListView *IRList) { - auto AOTIRCacheEntry = CTX->SyscallHandler->LookupAOTIRCacheEntry(Thread, GuestRIP); +AOTIRCaptureCache::PreGenerateIRFetchResult +AOTIRCaptureCache::PreGenerateIRFetch(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestRIP, FEXCore::IR::IRListView* IRList) { + auto AOTIRCacheEntry = CTX->SyscallHandler->LookupAOTIRCacheEntry(Thread, GuestRIP); - PreGenerateIRFetchResult Result{}; + PreGenerateIRFetchResult Result {}; - if (AOTIRCacheEntry.Entry) { - AOTIRCacheEntry.Entry->ContainsCode = true; - - if (IRList == nullptr && CTX->Config.AOTIRLoad()) { - auto Mod = AOTIRCacheEntry.Entry->Array; - - if (Mod != nullptr) - { - auto AOTEntry = Mod->Find(GuestRIP - AOTIRCacheEntry.VAFileStart); - - if (AOTEntry) { - // verify hash - auto MappedStart = GuestRIP; - auto hash = XXH3_64bits((void*)MappedStart, AOTEntry->GuestLength); - if (hash == AOTEntry->GuestHash) { - Result.IRList = AOTEntry->GetIRData(); - //LogMan::Msg::DFmt("using {} + {:x} -> {:x}\n", file->second.fileid, AOTEntry->first, GuestRIP); - - Result.RAData = AOTEntry->GetRAData()->CreateCopy(); - Result.DebugData = new FEXCore::Core::DebugData(); - Result.StartAddr = MappedStart; - Result.Length = AOTEntry->GuestLength; - Result.GeneratedIR = true; - } else { - LogMan::Msg::IFmt("AOTIR: hash check failed {:x}\n", MappedStart); - } + if (AOTIRCacheEntry.Entry) { + AOTIRCacheEntry.Entry->ContainsCode = true; + + if (IRList == nullptr && CTX->Config.AOTIRLoad()) { + auto Mod = AOTIRCacheEntry.Entry->Array; + + if (Mod != nullptr) { + auto AOTEntry = Mod->Find(GuestRIP - AOTIRCacheEntry.VAFileStart); + + if (AOTEntry) { + // verify hash + auto MappedStart = GuestRIP; + auto hash = XXH3_64bits((void*)MappedStart, AOTEntry->GuestLength); + if (hash == AOTEntry->GuestHash) { + Result.IRList = AOTEntry->GetIRData(); + // LogMan::Msg::DFmt("using {} + {:x} -> {:x}\n", file->second.fileid, AOTEntry->first, GuestRIP); + + Result.RAData = AOTEntry->GetRAData()->CreateCopy(); + Result.DebugData = new FEXCore::Core::DebugData(); + Result.StartAddr = MappedStart; + Result.Length = AOTEntry->GuestLength; + Result.GeneratedIR = true; } else { - //LogMan::Msg::IFmt("AOTIR: Failed to find {:x}, {:x}, {}\n", GuestRIP, GuestRIP - file->second.Start + file->second.Offset, file->second.fileid); + LogMan::Msg::IFmt("AOTIR: hash check failed {:x}\n", MappedStart); } + } else { + // LogMan::Msg::IFmt("AOTIR: Failed to find {:x}, {:x}, {}\n", GuestRIP, GuestRIP - file->second.Start + file->second.Offset, file->second.fileid); } } } - - return Result; } - bool AOTIRCaptureCache::PostCompileCode( - FEXCore::Core::InternalThreadState *Thread, - void* CodePtr, - uint64_t GuestRIP, - uint64_t StartAddr, - uint64_t Length, - FEXCore::IR::RegisterAllocationData::UniquePtr RAData, - FEXCore::IR::IRListView *IRList, - FEXCore::Core::DebugData *DebugData, - bool GeneratedIR) { - - // Both generated ir and LibraryJITName need a named region lookup - if (GeneratedIR || CTX->Config.LibraryJITNaming() || CTX->Config.GDBSymbols()) { - - auto AOTIRCacheEntry = CTX->SyscallHandler->LookupAOTIRCacheEntry(Thread, GuestRIP); - - if (AOTIRCacheEntry.Entry) { - if (DebugData && CTX->Config.LibraryJITNaming()) { - CTX->Symbols.RegisterNamedRegion(Thread->SymbolBuffer.get(), CodePtr, DebugData->HostCodeSize, AOTIRCacheEntry.Entry->Filename); - } + return Result; +} - if (CTX->Config.GDBSymbols()) { - GDBJITRegister(AOTIRCacheEntry.Entry, AOTIRCacheEntry.VAFileStart, GuestRIP, (uintptr_t)CodePtr, DebugData); - } +bool AOTIRCaptureCache::PostCompileCode(FEXCore::Core::InternalThreadState* Thread, void* CodePtr, uint64_t GuestRIP, uint64_t StartAddr, + uint64_t Length, FEXCore::IR::RegisterAllocationData::UniquePtr RAData, + FEXCore::IR::IRListView* IRList, FEXCore::Core::DebugData* DebugData, bool GeneratedIR) { + + // Both generated ir and LibraryJITName need a named region lookup + if (GeneratedIR || CTX->Config.LibraryJITNaming() || CTX->Config.GDBSymbols()) { + + auto AOTIRCacheEntry = CTX->SyscallHandler->LookupAOTIRCacheEntry(Thread, GuestRIP); - // Add to AOT cache if aot generation is enabled - if (GeneratedIR && RAData && - (CTX->Config.AOTIRCapture() || CTX->Config.AOTIRGenerate())) { - - auto hash = XXH3_64bits((void*)StartAddr, Length); - - auto LocalRIP = GuestRIP - AOTIRCacheEntry.VAFileStart; - auto LocalStartAddr = StartAddr - AOTIRCacheEntry.VAFileStart; - auto FileId = AOTIRCacheEntry.Entry->FileId; - // The underlying pointer and the unique_ptr deleter for RAData must - // be marshalled separately to the lambda below. Otherwise, the - // lambda can't be used as an std::function due to being non-copyable - auto RADataCopy = RAData->CreateCopy(); - auto RADataCopyDeleter = RADataCopy.get_deleter(); - auto IRListCopy = IRList->CreateCopy(); - - // The lambda is converted to std::function. This is tricky to refactor so it doesn't allocate memory through glibc. - FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator glibc; - AOTIRCaptureCacheWriteoutQueue_Append([this, LocalRIP, LocalStartAddr, Length, hash, IRListCopy, RADataCopy=RADataCopy.release(), RADataCopyDeleter, FileId]() { - - // It is guaranteed via AOTIRCaptureCacheWriteoutLock and AOTIRCaptureCacheWriteoutFlusing that this will not run concurrently - // Memory coherency is guaranteed via AOTIRCaptureCacheWriteoutLock - - auto *AotFile = &AOTIRCaptureCacheMap[FileId]; - - if (!AotFile->Stream) { - AotFile->Stream = AOTIRWriter(FileId); - uint64_t tag = FEXCore::IR::AOTIR_COOKIE; - AotFile->Stream->Write(&tag, sizeof(tag)); - } - AotFile->AppendAOTIRCaptureCache(LocalRIP, LocalStartAddr, Length, hash, IRListCopy, RADataCopy); - RADataCopyDeleter(RADataCopy); - delete IRListCopy; - }); - - if (CTX->Config.AOTIRGenerate()) { - // cleanup memory and early exit here -- we're not running the application - Thread->CPUBackend->ClearCache(); - return true; + if (AOTIRCacheEntry.Entry) { + if (DebugData && CTX->Config.LibraryJITNaming()) { + CTX->Symbols.RegisterNamedRegion(Thread->SymbolBuffer.get(), CodePtr, DebugData->HostCodeSize, AOTIRCacheEntry.Entry->Filename); + } + + if (CTX->Config.GDBSymbols()) { + GDBJITRegister(AOTIRCacheEntry.Entry, AOTIRCacheEntry.VAFileStart, GuestRIP, (uintptr_t)CodePtr, DebugData); + } + + // Add to AOT cache if aot generation is enabled + if (GeneratedIR && RAData && (CTX->Config.AOTIRCapture() || CTX->Config.AOTIRGenerate())) { + + auto hash = XXH3_64bits((void*)StartAddr, Length); + + auto LocalRIP = GuestRIP - AOTIRCacheEntry.VAFileStart; + auto LocalStartAddr = StartAddr - AOTIRCacheEntry.VAFileStart; + auto FileId = AOTIRCacheEntry.Entry->FileId; + // The underlying pointer and the unique_ptr deleter for RAData must + // be marshalled separately to the lambda below. Otherwise, the + // lambda can't be used as an std::function due to being non-copyable + auto RADataCopy = RAData->CreateCopy(); + auto RADataCopyDeleter = RADataCopy.get_deleter(); + auto IRListCopy = IRList->CreateCopy(); + + // The lambda is converted to std::function. This is tricky to refactor so it doesn't allocate memory through glibc. + FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator glibc; + AOTIRCaptureCacheWriteoutQueue_Append( + [this, LocalRIP, LocalStartAddr, Length, hash, IRListCopy, RADataCopy = RADataCopy.release(), RADataCopyDeleter, FileId]() { + // It is guaranteed via AOTIRCaptureCacheWriteoutLock and AOTIRCaptureCacheWriteoutFlusing that this will not run concurrently + // Memory coherency is guaranteed via AOTIRCaptureCacheWriteoutLock + + auto* AotFile = &AOTIRCaptureCacheMap[FileId]; + + if (!AotFile->Stream) { + AotFile->Stream = AOTIRWriter(FileId); + uint64_t tag = FEXCore::IR::AOTIR_COOKIE; + AotFile->Stream->Write(&tag, sizeof(tag)); } + AotFile->AppendAOTIRCaptureCache(LocalRIP, LocalStartAddr, Length, hash, IRListCopy, RADataCopy); + RADataCopyDeleter(RADataCopy); + delete IRListCopy; + }); + + if (CTX->Config.AOTIRGenerate()) { + // cleanup memory and early exit here -- we're not running the application + Thread->CPUBackend->ClearCache(); + return true; } } + } - // Insert to caches if we generated IR - if (GeneratedIR) { - // If the IR doesn't need to be retained then we can just delete it now - delete DebugData; - if (IRList->IsCopy()) delete IRList; + // Insert to caches if we generated IR + if (GeneratedIR) { + // If the IR doesn't need to be retained then we can just delete it now + delete DebugData; + if (IRList->IsCopy()) { + delete IRList; } } - - return false; } - AOTIRCacheEntry *AOTIRCaptureCache::LoadAOTIRCacheEntry(const fextl::string &filename) { - fextl::string base_filename = FHU::Filesystem::GetFilename(filename); + return false; +} + +AOTIRCacheEntry* AOTIRCaptureCache::LoadAOTIRCacheEntry(const fextl::string& filename) { + fextl::string base_filename = FHU::Filesystem::GetFilename(filename); - if (!base_filename.empty()) { - auto filename_hash = XXH3_64bits(filename.c_str(), filename.size()); + if (!base_filename.empty()) { + auto filename_hash = XXH3_64bits(filename.c_str(), filename.size()); - auto fileid = fextl::fmt::format("{}-{}-{}{}{}", - base_filename, - filename_hash, - (CTX->Config.SMCChecks == FEXCore::Config::CONFIG_SMC_FULL) ? 'S' : 's', - CTX->Config.TSOEnabled ? 'T' : 't', - CTX->Config.ABILocalFlags ? 'L' : 'l'); + auto fileid = fextl::fmt::format("{}-{}-{}{}{}", base_filename, filename_hash, + (CTX->Config.SMCChecks == FEXCore::Config::CONFIG_SMC_FULL) ? 'S' : 's', + CTX->Config.TSOEnabled ? 'T' : 't', CTX->Config.ABILocalFlags ? 'L' : 'l'); - std::unique_lock lk(AOTIRCacheLock); + std::unique_lock lk(AOTIRCacheLock); - auto Inserted = AOTIRCache.insert({fileid, AOTIRCacheEntry { .FileId = fileid, .Filename = filename }}); - auto Entry = &(Inserted.first->second); + auto Inserted = AOTIRCache.insert({fileid, AOTIRCacheEntry {.FileId = fileid, .Filename = filename}}); + auto Entry = &(Inserted.first->second); - LOGMAN_THROW_AA_FMT(Entry->Array == nullptr, "Duplicate LoadAOTIRCacheEntry"); + LOGMAN_THROW_AA_FMT(Entry->Array == nullptr, "Duplicate LoadAOTIRCacheEntry"); - if (CTX->Config.AOTIRLoad && AOTIRLoader) { - auto streamfd = AOTIRLoader(fileid); - if (streamfd != -1) { - FEXCore::IR::LoadAOTIRCache(Entry, streamfd); - close(streamfd); - } + if (CTX->Config.AOTIRLoad && AOTIRLoader) { + auto streamfd = AOTIRLoader(fileid); + if (streamfd != -1) { + FEXCore::IR::LoadAOTIRCache(Entry, streamfd); + close(streamfd); } - return Entry; } - - return nullptr; + return Entry; } - void AOTIRCaptureCache::UnloadAOTIRCacheEntry(AOTIRCacheEntry *Entry) { + return nullptr; +} + +void AOTIRCaptureCache::UnloadAOTIRCacheEntry(AOTIRCacheEntry* Entry) { #ifndef _WIN32 - LOGMAN_THROW_AA_FMT(Entry != nullptr, "Removing not existing entry"); + LOGMAN_THROW_AA_FMT(Entry != nullptr, "Removing not existing entry"); - if (Entry->Array) { - FEXCore::Allocator::munmap(Entry->FilePtr, Entry->Size); - Entry->Array = nullptr; - Entry->FilePtr = nullptr; - Entry->Size = 0; - } -#endif + if (Entry->Array) { + FEXCore::Allocator::munmap(Entry->FilePtr, Entry->Size); + Entry->Array = nullptr; + Entry->FilePtr = nullptr; + Entry->Size = 0; } +#endif } +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/IR/IRDumper.cpp b/FEXCore/Source/Interface/IR/IRDumper.cpp index 5cd4dcce07..f775a1cffa 100644 --- a/FEXCore/Source/Interface/IR/IRDumper.cpp +++ b/FEXCore/Source/Interface/IR/IRDumper.cpp @@ -16,7 +16,7 @@ tags: ir|dumper #include #include #include -#include +#include namespace FEXCore::IR { #define IROP_GETNAME_IMPL @@ -28,56 +28,36 @@ namespace FEXCore::IR { #include -static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, const SHA256Sum &Arg) { +static void PrintArg(fextl::stringstream* out, [[maybe_unused]] const IRListView* IR, const SHA256Sum& Arg) { *out << "sha256:"; - for(auto byte: Arg.data) + for (auto byte : Arg.data) { *out << std::hex << std::setfill('0') << std::setw(2) << (unsigned int)byte; + } } -static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, uint64_t Arg) { +static void PrintArg(fextl::stringstream* out, [[maybe_unused]] const IRListView* IR, uint64_t Arg) { *out << "#0x" << std::hex << Arg; } [[maybe_unused]] -static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, const char* Arg) { - *out << Arg; +static void PrintArg(fextl::stringstream* out, [[maybe_unused]] const IRListView* IR, const char* Arg) { + *out << Arg; } -static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, CondClassType Arg) { +static void PrintArg(fextl::stringstream* out, [[maybe_unused]] const IRListView* IR, CondClassType Arg) { if (Arg == COND_AL) { *out << "ALWAYS"; return; } - static constexpr std::array CondNames = { - "EQ", - "NEQ", - "UGE", - "ULT", - "MI", - "PL", - "VS", - "VC", - "UGT", - "ULE", - "SGE", - "SLT", - "SGT", - "SLE", - "ANDZ", - "ANDNZ", - "FLU", - "FGE", - "FLEU", - "FGT", - "FU", - "FNU" - }; + static constexpr std::array CondNames = {"EQ", "NEQ", "UGE", "ULT", "MI", "PL", "VS", "VC", + "UGT", "ULE", "SGE", "SLT", "SGT", "SLE", "ANDZ", "ANDNZ", + "FLU", "FGE", "FLEU", "FGT", "FU", "FNU"}; *out << CondNames[Arg]; } -static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, MemOffsetType Arg) { +static void PrintArg(fextl::stringstream* out, [[maybe_unused]] const IRListView* IR, MemOffsetType Arg) { static constexpr std::array Names = { "SXTX", "UXTW", @@ -87,22 +67,23 @@ static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const *out << Names[Arg]; } -static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, RegisterClassType Arg) { - if (Arg == GPRClass.Val) +static void PrintArg(fextl::stringstream* out, [[maybe_unused]] const IRListView* IR, RegisterClassType Arg) { + if (Arg == GPRClass.Val) { *out << "GPR"; - else if (Arg == GPRFixedClass.Val) + } else if (Arg == GPRFixedClass.Val) { *out << "GPRFixed"; - else if (Arg == FPRClass.Val) + } else if (Arg == FPRClass.Val) { *out << "FPR"; - else if (Arg == FPRFixedClass.Val) + } else if (Arg == FPRFixedClass.Val) { *out << "FPRFixed"; - else if (Arg == GPRPairClass.Val) + } else if (Arg == GPRPairClass.Val) { *out << "GPRPair"; - else + } else { *out << "Unknown Registerclass " << Arg; + } } -static void PrintArg(fextl::stringstream *out, IRListView const* IR, OrderedNodeWrapper Arg, IR::RegisterAllocationData *RAData) { +static void PrintArg(fextl::stringstream* out, const IRListView* IR, OrderedNodeWrapper Arg, IR::RegisterAllocationData* RAData) { auto [CodeNode, IROp] = IR->at(Arg)(); const auto ArgID = Arg.ID(); @@ -114,14 +95,14 @@ static void PrintArg(fextl::stringstream *out, IRListView const* IR, OrderedNode auto PhyReg = RAData->GetNodeRegister(ArgID); switch (PhyReg.Class) { - case FEXCore::IR::GPRClass.Val: *out << "(GPR"; break; - case FEXCore::IR::GPRFixedClass.Val: *out << "(GPRFixed"; break; - case FEXCore::IR::FPRClass.Val: *out << "(FPR"; break; - case FEXCore::IR::FPRFixedClass.Val: *out << "(FPRFixed"; break; - case FEXCore::IR::GPRPairClass.Val: *out << "(GPRPair"; break; - case FEXCore::IR::ComplexClass.Val: *out << "(Complex"; break; - case FEXCore::IR::InvalidClass.Val: *out << "(Invalid"; break; - default: *out << "(Unknown"; break; + case FEXCore::IR::GPRClass.Val: *out << "(GPR"; break; + case FEXCore::IR::GPRFixedClass.Val: *out << "(GPRFixed"; break; + case FEXCore::IR::FPRClass.Val: *out << "(FPR"; break; + case FEXCore::IR::FPRFixedClass.Val: *out << "(FPRFixed"; break; + case FEXCore::IR::GPRPairClass.Val: *out << "(GPRPair"; break; + case FEXCore::IR::ComplexClass.Val: *out << "(Complex"; break; + case FEXCore::IR::InvalidClass.Val: *out << "(Invalid"; break; + default: *out << "(Unknown"; break; } if (PhyReg.Class != FEXCore::IR::InvalidClass.Val) { @@ -148,131 +129,127 @@ static void PrintArg(fextl::stringstream *out, IRListView const* IR, OrderedNode if (NumElements > 1) { *out << "v" << std::dec << NumElements; } - } } -static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, FEXCore::IR::FenceType Arg) { +static void PrintArg(fextl::stringstream* out, [[maybe_unused]] const IRListView* IR, FEXCore::IR::FenceType Arg) { if (Arg == IR::Fence_Load) { *out << "Loads"; - } - else if (Arg == IR::Fence_Store) { + } else if (Arg == IR::Fence_Store) { *out << "Stores"; - } - else if (Arg == IR::Fence_LoadStore) { + } else if (Arg == IR::Fence_LoadStore) { *out << "LoadStores"; - } - else { + } else { *out << ""; } } -static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, FEXCore::IR::RoundType Arg) { +static void PrintArg(fextl::stringstream* out, [[maybe_unused]] const IRListView* IR, FEXCore::IR::RoundType Arg) { switch (Arg) { - case FEXCore::IR::Round_Nearest: *out << "Nearest"; break; - case FEXCore::IR::Round_Negative_Infinity: *out << "-Inf"; break; - case FEXCore::IR::Round_Positive_Infinity: *out << "+Inf"; break; - case FEXCore::IR::Round_Towards_Zero: *out << "Towards Zero"; break; - case FEXCore::IR::Round_Host: *out << "Host"; break; - default: *out << ""; break; + case FEXCore::IR::Round_Nearest: *out << "Nearest"; break; + case FEXCore::IR::Round_Negative_Infinity: *out << "-Inf"; break; + case FEXCore::IR::Round_Positive_Infinity: *out << "+Inf"; break; + case FEXCore::IR::Round_Towards_Zero: *out << "Towards Zero"; break; + case FEXCore::IR::Round_Host: *out << "Host"; break; + default: *out << ""; break; } } -static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, FEXCore::IR::SyscallFlags Arg) { +static void PrintArg(fextl::stringstream* out, [[maybe_unused]] const IRListView* IR, FEXCore::IR::SyscallFlags Arg) { switch (Arg) { - case FEXCore::IR::SyscallFlags::DEFAULT: *out << "Default"; break; - case FEXCore::IR::SyscallFlags::OPTIMIZETHROUGH: *out << "Optimize Through"; break; - case FEXCore::IR::SyscallFlags::NOSYNCSTATEONENTRY: *out << "No Sync State on Entry"; break; - case FEXCore::IR::SyscallFlags::NORETURN: *out << "No Return"; break; - case FEXCore::IR::SyscallFlags::NOSIDEEFFECTS: *out << "No Side Effects"; break; - default: *out << ""; break; + case FEXCore::IR::SyscallFlags::DEFAULT: *out << "Default"; break; + case FEXCore::IR::SyscallFlags::OPTIMIZETHROUGH: *out << "Optimize Through"; break; + case FEXCore::IR::SyscallFlags::NOSYNCSTATEONENTRY: *out << "No Sync State on Entry"; break; + case FEXCore::IR::SyscallFlags::NORETURN: *out << "No Return"; break; + case FEXCore::IR::SyscallFlags::NOSIDEEFFECTS: *out << "No Side Effects"; break; + default: *out << ""; break; } } -static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, FEXCore::IR::NamedVectorConstant Arg) { +static void PrintArg(fextl::stringstream* out, [[maybe_unused]] const IRListView* IR, FEXCore::IR::NamedVectorConstant Arg) { switch (Arg) { - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX: { - *out << "u16_incremental_index"; - break; - } - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX_UPPER: { - *out << "u16_incremental_index_upper"; - break; - } - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPS_INVERT: { - *out << "addsubps_invert"; - break; - } - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPS_INVERT_UPPER: { - *out << "addsubps_invert_upper"; - break; - } - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPD_INVERT: { - *out << "addsubpd_invert"; - break; - } - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPD_INVERT_UPPER: { - *out << "addsubpd_invert_upper"; - break; - } - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_MOVMSKPS_SHIFT: { - *out << "movmskps_shift"; - break; - } - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_AESKEYGENASSIST_SWIZZLE: { - *out << "aeskeygenassist_swizzle"; - break; - } - case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO: { - *out << "vectorzero"; - break; - } - default: *out << ""; break; + case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX: { + *out << "u16_incremental_index"; + break; + } + case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX_UPPER: { + *out << "u16_incremental_index_upper"; + break; + } + case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPS_INVERT: { + *out << "addsubps_invert"; + break; + } + case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPS_INVERT_UPPER: { + *out << "addsubps_invert_upper"; + break; + } + case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPD_INVERT: { + *out << "addsubpd_invert"; + break; + } + case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPD_INVERT_UPPER: { + *out << "addsubpd_invert_upper"; + break; + } + case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_MOVMSKPS_SHIFT: { + *out << "movmskps_shift"; + break; + } + case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_AESKEYGENASSIST_SWIZZLE: { + *out << "aeskeygenassist_swizzle"; + break; + } + case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO: { + *out << "vectorzero"; + break; + } + default: *out << ""; break; } } -static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, FEXCore::IR::OpSize Arg) { +static void PrintArg(fextl::stringstream* out, [[maybe_unused]] const IRListView* IR, FEXCore::IR::OpSize Arg) { switch (Arg) { - case OpSize::i8Bit: *out << "i8"; break; - case OpSize::i16Bit: *out << "i16"; break; - case OpSize::i32Bit: *out << "i32"; break; - case OpSize::i64Bit: *out << "i64"; break; - case OpSize::i128Bit: *out << "i128"; break; - case OpSize::i256Bit: *out << "i256"; break; - default: *out << ""; break; + case OpSize::i8Bit: *out << "i8"; break; + case OpSize::i16Bit: *out << "i16"; break; + case OpSize::i32Bit: *out << "i32"; break; + case OpSize::i64Bit: *out << "i64"; break; + case OpSize::i128Bit: *out << "i128"; break; + case OpSize::i256Bit: *out << "i256"; break; + default: *out << ""; break; } } -static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, FEXCore::IR::FloatCompareOp Arg) { +static void PrintArg(fextl::stringstream* out, [[maybe_unused]] const IRListView* IR, FEXCore::IR::FloatCompareOp Arg) { switch (Arg) { - case FloatCompareOp::EQ: *out << "FEQ"; break; - case FloatCompareOp::LT: *out << "FLT"; break; - case FloatCompareOp::LE: *out << "FLE"; break; - case FloatCompareOp::UNO: *out << "UNO"; break; - case FloatCompareOp::NEQ: *out << "NEQ"; break; - case FloatCompareOp::ORD: *out << "ORD"; break; - default: *out << ""; break; + case FloatCompareOp::EQ: *out << "FEQ"; break; + case FloatCompareOp::LT: *out << "FLT"; break; + case FloatCompareOp::LE: *out << "FLE"; break; + case FloatCompareOp::UNO: *out << "UNO"; break; + case FloatCompareOp::NEQ: *out << "NEQ"; break; + case FloatCompareOp::ORD: *out << "ORD"; break; + default: *out << ""; break; } } -static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, FEXCore::IR::BreakDefinition Arg) { +static void PrintArg(fextl::stringstream* out, [[maybe_unused]] const IRListView* IR, FEXCore::IR::BreakDefinition Arg) { *out << "{" << Arg.ErrorRegister << "."; *out << static_cast(Arg.Signal) << "."; *out << static_cast(Arg.TrapNumber) << "."; *out << static_cast(Arg.si_code) << "}"; } -static void PrintArg(fextl::stringstream *out, [[maybe_unused]] IRListView const* IR, FEXCore::IR::ShiftType Arg) { +static void PrintArg(fextl::stringstream* out, [[maybe_unused]] const IRListView* IR, FEXCore::IR::ShiftType Arg) { switch (Arg) { - case ShiftType::LSL: *out << "LSL"; break; - case ShiftType::LSR: *out << "LSR"; break; - case ShiftType::ASR: *out << "ASR"; break; - case ShiftType::ROR: *out << "ROR"; break; - default: *out << ""; break; + case ShiftType::LSL: *out << "LSL"; break; + case ShiftType::LSR: *out << "LSR"; break; + case ShiftType::ASR: *out << "ASR"; break; + case ShiftType::ROR: *out << "ROR"; break; + default: *out << ""; break; } } -void Dump(fextl::stringstream *out, IRListView const* IR, IR::RegisterAllocationData *RAData) { +void Dump(fextl::stringstream* out, const IRListView* IR, IR::RegisterAllocationData* RAData) { auto HeaderOp = IR->GetHeader(); int8_t CurrentIndent = 0; @@ -284,7 +261,8 @@ void Dump(fextl::stringstream *out, IRListView const* IR, IR::RegisterAllocation ++CurrentIndent; AddIndent(); - *out << "(%0) " << "IRHeader "; + *out << "(%0) " + << "IRHeader "; *out << "%" << HeaderOp->Blocks.ID() << ", "; *out << "#" << std::dec << HeaderOp->OriginalRIP << ", "; *out << "#" << std::dec << HeaderOp->BlockCount << ", "; @@ -295,7 +273,8 @@ void Dump(fextl::stringstream *out, IRListView const* IR, IR::RegisterAllocation auto BlockIROp = BlockHeader->C(); AddIndent(); - *out << "(%" << IR->GetID(BlockNode) << ") " << "CodeBlock "; + *out << "(%" << IR->GetID(BlockNode) << ") " + << "CodeBlock "; *out << "%" << BlockIROp->Begin.ID() << ", "; *out << "%" << BlockIROp->Last.ID() << std::endl; @@ -325,14 +304,14 @@ void Dump(fextl::stringstream *out, IRListView const* IR, IR::RegisterAllocation if (RAData) { auto PhyReg = RAData->GetNodeRegister(ID); switch (PhyReg.Class) { - case FEXCore::IR::GPRClass.Val: *out << "(GPR"; break; - case FEXCore::IR::GPRFixedClass.Val: *out << "(GPRFixed"; break; - case FEXCore::IR::FPRClass.Val: *out << "(FPR"; break; - case FEXCore::IR::FPRFixedClass.Val: *out << "(FPRFixed"; break; - case FEXCore::IR::GPRPairClass.Val: *out << "(GPRPair"; break; - case FEXCore::IR::ComplexClass.Val: *out << "(Complex"; break; - case FEXCore::IR::InvalidClass.Val: *out << "(Invalid"; break; - default: *out << "(Unknown"; break; + case FEXCore::IR::GPRClass.Val: *out << "(GPR"; break; + case FEXCore::IR::GPRFixedClass.Val: *out << "(GPRFixed"; break; + case FEXCore::IR::FPRClass.Val: *out << "(FPR"; break; + case FEXCore::IR::FPRFixedClass.Val: *out << "(FPRFixed"; break; + case FEXCore::IR::GPRPairClass.Val: *out << "(GPRPair"; break; + case FEXCore::IR::ComplexClass.Val: *out << "(Complex"; break; + case FEXCore::IR::InvalidClass.Val: *out << "(Invalid"; break; + default: *out << "(Unknown"; break; } if (PhyReg.Class != FEXCore::IR::InvalidClass.Val) { *out << std::dec << (uint32_t)PhyReg.Reg << ")"; @@ -348,8 +327,7 @@ void Dump(fextl::stringstream *out, IRListView const* IR, IR::RegisterAllocation } *out << " = "; - } - else { + } else { uint32_t ElementSize = IROp->ElementSize; if (!IROp->ElementSize) { @@ -369,19 +347,18 @@ void Dump(fextl::stringstream *out, IRListView const* IR, IR::RegisterAllocation } *out << Name; - #define IROP_ARGPRINTER_HELPER - #include - default: *out << ""; break; - } +#define IROP_ARGPRINTER_HELPER +#include + default: *out << ""; break; + } - //*out << " (" << std::dec << CodeNode->GetUses() << ")"; + //*out << " (" << std::dec << CodeNode->GetUses() << ")"; - *out << "\n"; - } + *out << "\n"; } - - CurrentIndent = std::max(0, CurrentIndent - 1); } -} + CurrentIndent = std::max(0, CurrentIndent - 1); +} +} } diff --git a/FEXCore/Source/Interface/IR/IREmitter.cpp b/FEXCore/Source/Interface/IR/IREmitter.cpp index d811ce4acf..3439fd068b 100644 --- a/FEXCore/Source/Interface/IR/IREmitter.cpp +++ b/FEXCore/Source/Interface/IR/IREmitter.cpp @@ -21,77 +21,69 @@ namespace FEXCore::IR { bool IsFragmentExit(FEXCore::IR::IROps Op) { switch (Op) { - case OP_EXITFUNCTION: - case OP_BREAK: - return true; - default: - return false; + case OP_EXITFUNCTION: + case OP_BREAK: return true; + default: return false; } } bool IsBlockExit(FEXCore::IR::IROps Op) { - switch(Op) { - case OP_JUMP: - case OP_CONDJUMP: - return true; - default: - return IsFragmentExit(Op); + switch (Op) { + case OP_JUMP: + case OP_CONDJUMP: return true; + default: return IsFragmentExit(Op); } } -FEXCore::IR::RegisterClassType IREmitter::WalkFindRegClass(OrderedNode *Node) { +FEXCore::IR::RegisterClassType IREmitter::WalkFindRegClass(OrderedNode* Node) { auto Class = GetOpRegClass(Node); switch (Class) { - case GPRClass: - case GPRPairClass: - case FPRClass: - case GPRFixedClass: - case FPRFixedClass: - case InvalidClass: - return Class; - default: break; + case GPRClass: + case GPRPairClass: + case FPRClass: + case GPRFixedClass: + case FPRFixedClass: + case InvalidClass: return Class; + default: break; } // Complex case, needs to be handled on an op by op basis uintptr_t DataBegin = DualListData.DataBegin(); - FEXCore::IR::IROp_Header *IROp = Node->Op(DataBegin); + FEXCore::IR::IROp_Header* IROp = Node->Op(DataBegin); switch (IROp->Op) { - case IROps::OP_LOADREGISTER: { - auto Op = IROp->C(); - return Op->Class; - break; - } - case IROps::OP_LOADCONTEXT: { - auto Op = IROp->C(); - return Op->Class; - break; - } - case IROps::OP_LOADCONTEXTINDEXED: { - auto Op = IROp->C(); - return Op->Class; - break; - } - case IROps::OP_FILLREGISTER: { - auto Op = IROp->C(); - return Op->Class; - break; - } - case IROps::OP_LOADMEM: { - auto Op = IROp->C(); - return Op->Class; - break; - } - case IROps::OP_LOADMEMTSO: { - auto Op = IROp->C(); - return Op->Class; - break; - } - default: - LOGMAN_MSG_A_FMT("Unhandled op type: {} {} in argument class validation", - ToUnderlying(IROp->Op), GetOpName(Node)); - break; + case IROps::OP_LOADREGISTER: { + auto Op = IROp->C(); + return Op->Class; + break; + } + case IROps::OP_LOADCONTEXT: { + auto Op = IROp->C(); + return Op->Class; + break; + } + case IROps::OP_LOADCONTEXTINDEXED: { + auto Op = IROp->C(); + return Op->Class; + break; + } + case IROps::OP_FILLREGISTER: { + auto Op = IROp->C(); + return Op->Class; + break; + } + case IROps::OP_LOADMEM: { + auto Op = IROp->C(); + return Op->Class; + break; + } + case IROps::OP_LOADMEMTSO: { + auto Op = IROp->C(); + return Op->Class; + break; + } + default: LOGMAN_MSG_A_FMT("Unhandled op type: {} {} in argument class validation", ToUnderlying(IROp->Op), GetOpName(Node)); break; } return InvalidClass; } @@ -106,7 +98,7 @@ void IREmitter::ResetWorkingList() { CurrentCodeBlock = nullptr; } -void IREmitter::ReplaceAllUsesWithRange(OrderedNode *Node, OrderedNode *NewNode, AllNodesIterator Begin, AllNodesIterator End) { +void IREmitter::ReplaceAllUsesWithRange(OrderedNode* Node, OrderedNode* NewNode, AllNodesIterator Begin, AllNodesIterator End) { uintptr_t ListBegin = DualListData.ListBegin(); auto NodeId = Node->Wrapped(ListBegin).ID(); @@ -131,23 +123,23 @@ void IREmitter::ReplaceAllUsesWithRange(OrderedNode *Node, OrderedNode *NewNode, } } -void IREmitter::ReplaceNodeArgument(OrderedNode *Node, uint8_t Arg, OrderedNode *NewArg) { +void IREmitter::ReplaceNodeArgument(OrderedNode* Node, uint8_t Arg, OrderedNode* NewArg) { uintptr_t ListBegin = DualListData.ListBegin(); uintptr_t DataBegin = DualListData.DataBegin(); - FEXCore::IR::IROp_Header *IROp = Node->Op(DataBegin); + FEXCore::IR::IROp_Header* IROp = Node->Op(DataBegin); OrderedNodeWrapper OldArgWrapper = IROp->Args[Arg]; - OrderedNode *OldArg = OldArgWrapper.GetNode(ListBegin); + OrderedNode* OldArg = OldArgWrapper.GetNode(ListBegin); OldArg->RemoveUse(); NewArg->AddUse(); IROp->Args[Arg].NodeOffset = NewArg->Wrapped(ListBegin).NodeOffset; } -void IREmitter::RemoveArgUses(OrderedNode *Node) { +void IREmitter::RemoveArgUses(OrderedNode* Node) { uintptr_t ListBegin = DualListData.ListBegin(); uintptr_t DataBegin = DualListData.DataBegin(); - FEXCore::IR::IROp_Header *IROp = Node->Op(DataBegin); + FEXCore::IR::IROp_Header* IROp = Node->Op(DataBegin); const uint8_t NumArgs = IR::GetArgs(IROp->Op); for (uint8_t i = 0; i < NumArgs; ++i) { @@ -156,7 +148,7 @@ void IREmitter::RemoveArgUses(OrderedNode *Node) { } } -void IREmitter::Remove(OrderedNode *Node) { +void IREmitter::Remove(OrderedNode* Node) { RemoveArgUses(Node); Node->Unlink(DualListData.ListBegin()); @@ -175,8 +167,9 @@ IREmitter::IRPair IREmitter::CreateNewCodeBlockAfter(OrderedNode // Find last block auto LastBlock = CurrentCodeBlock; - while (LastBlock->Header.Next.GetNode(DualListData.ListBegin()) != InvalidNode) + while (LastBlock->Header.Next.GetNode(DualListData.ListBegin()) != InvalidNode) { LastBlock = LastBlock->Header.Next.GetNode(DualListData.ListBegin()); + } // Append it after the last block LinkCodeBlocks(LastBlock, CodeNode); @@ -187,34 +180,34 @@ IREmitter::IRPair IREmitter::CreateNewCodeBlockAfter(OrderedNode return CodeNode; } -void IREmitter::SetCurrentCodeBlock(OrderedNode *Node) { +void IREmitter::SetCurrentCodeBlock(OrderedNode* Node) { CurrentCodeBlock = Node; - LOGMAN_THROW_A_FMT(Node->Op(DualListData.DataBegin())->Op == OP_CODEBLOCK, "Node wasn't codeblock. It was '{}'", IR::GetName(Node->Op(DualListData.DataBegin())->Op)); + LOGMAN_THROW_A_FMT(Node->Op(DualListData.DataBegin())->Op == OP_CODEBLOCK, "Node wasn't codeblock. It was '{}'", + IR::GetName(Node->Op(DualListData.DataBegin())->Op)); SetWriteCursor(Node->Op(DualListData.DataBegin())->CW()->Begin.GetNode(DualListData.ListBegin())); } -void IREmitter::ReplaceWithConstant(OrderedNode *Node, uint64_t Value) { - auto Header = Node->Op(DualListData.DataBegin()); +void IREmitter::ReplaceWithConstant(OrderedNode* Node, uint64_t Value) { + auto Header = Node->Op(DualListData.DataBegin()); - if (IRSizes[Header->Op] >= sizeof(IROp_Constant)) { - // Unlink any arguments the node currently has - RemoveArgUses(Node); + if (IRSizes[Header->Op] >= sizeof(IROp_Constant)) { + // Unlink any arguments the node currently has + RemoveArgUses(Node); - // Overwrite data with the new constant op - Header->Op = OP_CONSTANT; - auto Const = Header->CW(); - Const->Constant = Value; - } else { - // Fallback path for when the node to overwrite is too small - auto cursor = GetWriteCursor(); - SetWriteCursor(Node); + // Overwrite data with the new constant op + Header->Op = OP_CONSTANT; + auto Const = Header->CW(); + Const->Constant = Value; + } else { + // Fallback path for when the node to overwrite is too small + auto cursor = GetWriteCursor(); + SetWriteCursor(Node); - auto NewNode = _Constant(Value); - ReplaceAllUsesWith(Node, NewNode); + auto NewNode = _Constant(Value); + ReplaceAllUsesWith(Node, NewNode); - SetWriteCursor(cursor); - } + SetWriteCursor(cursor); } - } +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/IR/IRParser.cpp b/FEXCore/Source/Interface/IR/IRParser.cpp index 94c16b4c8b..ad1a1bdd2f 100644 --- a/FEXCore/Source/Interface/IR/IRParser.cpp +++ b/FEXCore/Source/Interface/IR/IRParser.cpp @@ -31,23 +31,23 @@ tags: ir|parser namespace FEXCore::IR { namespace { -enum class DecodeFailure { - DECODE_OKAY, - DECODE_UNKNOWN_TYPE, - DECODE_INVALID, - DECODE_INVALIDCHAR, - DECODE_INVALIDRANGE, - DECODE_INVALIDREGISTERCLASS, - DECODE_UNKNOWN_SSA, - DECODE_INVALID_CONDFLAG, - DECODE_INVALID_MEMOFFSETTYPE, - DECODE_INVALID_FENCETYPE, - DECODE_INVALID_BREAKTYPE, - DECODE_INVALID_OPSIZE, -}; - -fextl::string DecodeErrorToString(DecodeFailure Failure) { - switch (Failure) { + enum class DecodeFailure { + DECODE_OKAY, + DECODE_UNKNOWN_TYPE, + DECODE_INVALID, + DECODE_INVALIDCHAR, + DECODE_INVALIDRANGE, + DECODE_INVALIDREGISTERCLASS, + DECODE_UNKNOWN_SSA, + DECODE_INVALID_CONDFLAG, + DECODE_INVALID_MEMOFFSETTYPE, + DECODE_INVALID_FENCETYPE, + DECODE_INVALID_BREAKTYPE, + DECODE_INVALID_OPSIZE, + }; + + fextl::string DecodeErrorToString(DecodeFailure Failure) { + switch (Failure) { case DecodeFailure::DECODE_OKAY: return "Okay"; case DecodeFailure::DECODE_UNKNOWN_TYPE: return "Unknown Type"; case DecodeFailure::DECODE_INVALID: return "Invalid"; @@ -60,387 +60,414 @@ fextl::string DecodeErrorToString(DecodeFailure Failure) { case DecodeFailure::DECODE_INVALID_FENCETYPE: return "Invalid Fence Type"; case DecodeFailure::DECODE_INVALID_BREAKTYPE: return "Invalid Break Reason Type"; case DecodeFailure::DECODE_INVALID_OPSIZE: return "Invalid Operation size name"; + } + return "Unknown Error"; } - return "Unknown Error"; -} -class IRParser: public FEXCore::IR::IREmitter { + class IRParser : public FEXCore::IR::IREmitter { public: - template - std::pair DecodeValue(const fextl::string &Arg) { - return {DecodeFailure::DECODE_UNKNOWN_TYPE, {}}; - } - - template<> - std::pair DecodeValue(const fextl::string &Arg) { - if (Arg.at(0) != '#') return {DecodeFailure::DECODE_INVALIDCHAR, 0}; + template + std::pair DecodeValue(const fextl::string& Arg) { + return {DecodeFailure::DECODE_UNKNOWN_TYPE, {}}; + } - uint8_t Result = strtoul(&Arg.at(1), nullptr, 0); - if (errno == ERANGE) return {DecodeFailure::DECODE_INVALIDRANGE, 0}; - return {DecodeFailure::DECODE_OKAY, Result}; - } + template<> + std::pair DecodeValue(const fextl::string& Arg) { + if (Arg.at(0) != '#') { + return {DecodeFailure::DECODE_INVALIDCHAR, 0}; + } - template<> - std::pair DecodeValue(const fextl::string &Arg) { - if (Arg.at(0) != '#') return {DecodeFailure::DECODE_INVALIDCHAR, 0}; + uint8_t Result = strtoul(&Arg.at(1), nullptr, 0); + if (errno == ERANGE) { + return {DecodeFailure::DECODE_INVALIDRANGE, 0}; + } + return {DecodeFailure::DECODE_OKAY, Result}; + } - uint8_t Result = strtoul(&Arg.at(1), nullptr, 0); - if (errno == ERANGE || Result > 1) return {DecodeFailure::DECODE_INVALIDRANGE, 0}; - return {DecodeFailure::DECODE_OKAY, Result != 0}; - } + template<> + std::pair DecodeValue(const fextl::string& Arg) { + if (Arg.at(0) != '#') { + return {DecodeFailure::DECODE_INVALIDCHAR, 0}; + } - template<> - std::pair DecodeValue(const fextl::string &Arg) { - if (Arg.at(0) != '#') return {DecodeFailure::DECODE_INVALIDCHAR, 0}; + uint8_t Result = strtoul(&Arg.at(1), nullptr, 0); + if (errno == ERANGE || Result > 1) { + return {DecodeFailure::DECODE_INVALIDRANGE, 0}; + } + return {DecodeFailure::DECODE_OKAY, Result != 0}; + } - uint16_t Result = strtoul(&Arg.at(1), nullptr, 0); - if (errno == ERANGE) return {DecodeFailure::DECODE_INVALIDRANGE, 0}; - return {DecodeFailure::DECODE_OKAY, Result}; - } + template<> + std::pair DecodeValue(const fextl::string& Arg) { + if (Arg.at(0) != '#') { + return {DecodeFailure::DECODE_INVALIDCHAR, 0}; + } - template<> - std::pair DecodeValue(const fextl::string &Arg) { - if (Arg.at(0) != '#') return {DecodeFailure::DECODE_INVALIDCHAR, 0}; + uint16_t Result = strtoul(&Arg.at(1), nullptr, 0); + if (errno == ERANGE) { + return {DecodeFailure::DECODE_INVALIDRANGE, 0}; + } + return {DecodeFailure::DECODE_OKAY, Result}; + } - uint32_t Result = strtoul(&Arg.at(1), nullptr, 0); - if (errno == ERANGE) return {DecodeFailure::DECODE_INVALIDRANGE, 0}; - return {DecodeFailure::DECODE_OKAY, Result}; - } + template<> + std::pair DecodeValue(const fextl::string& Arg) { + if (Arg.at(0) != '#') { + return {DecodeFailure::DECODE_INVALIDCHAR, 0}; + } - template<> - std::pair DecodeValue(const fextl::string &Arg) { - if (Arg.at(0) != '#') return {DecodeFailure::DECODE_INVALIDCHAR, 0}; + uint32_t Result = strtoul(&Arg.at(1), nullptr, 0); + if (errno == ERANGE) { + return {DecodeFailure::DECODE_INVALIDRANGE, 0}; + } + return {DecodeFailure::DECODE_OKAY, Result}; + } - uint64_t Result = strtoull(&Arg.at(1), nullptr, 0); - if (errno == ERANGE) return {DecodeFailure::DECODE_INVALIDRANGE, 0}; - return {DecodeFailure::DECODE_OKAY, Result}; - } + template<> + std::pair DecodeValue(const fextl::string& Arg) { + if (Arg.at(0) != '#') { + return {DecodeFailure::DECODE_INVALIDCHAR, 0}; + } - template<> - std::pair DecodeValue(const fextl::string &Arg) { - if (Arg.at(0) != '#') return {DecodeFailure::DECODE_INVALIDCHAR, 0}; + uint64_t Result = strtoull(&Arg.at(1), nullptr, 0); + if (errno == ERANGE) { + return {DecodeFailure::DECODE_INVALIDRANGE, 0}; + } + return {DecodeFailure::DECODE_OKAY, Result}; + } - int64_t Result = (int64_t)strtoull(&Arg.at(1), nullptr, 0); - if (errno == ERANGE) return {DecodeFailure::DECODE_INVALIDRANGE, 0}; - return {DecodeFailure::DECODE_OKAY, Result}; - } + template<> + std::pair DecodeValue(const fextl::string& Arg) { + if (Arg.at(0) != '#') { + return {DecodeFailure::DECODE_INVALIDCHAR, 0}; + } - template<> - std::pair DecodeValue(const fextl::string &Arg) { - IR::SHA256Sum Result; + int64_t Result = (int64_t)strtoull(&Arg.at(1), nullptr, 0); + if (errno == ERANGE) { + return {DecodeFailure::DECODE_INVALIDRANGE, 0}; + } + return {DecodeFailure::DECODE_OKAY, Result}; + } - if (Arg.at(0) != 's' || Arg.at(1) != 'h' || Arg.at(2) != 'a' || Arg.at(3) != '2' || Arg.at(4) != '5' || Arg.at(5) != '6' || Arg.at(6) != ':') - return {DecodeFailure::DECODE_INVALIDCHAR, Result}; + template<> + std::pair DecodeValue(const fextl::string& Arg) { + IR::SHA256Sum Result; - auto GetDigit = [](const fextl::string &Arg, int pos, uint8_t *val) { - auto chr = Arg.at(pos); - if (chr >= '0' && chr <= '9') { - *val = chr - '0'; - return true; - } else if (chr >= 'a' && chr <= 'f') { - *val = 10 + chr - 'a'; - return true; - } else { - return false; + if (Arg.at(0) != 's' || Arg.at(1) != 'h' || Arg.at(2) != 'a' || Arg.at(3) != '2' || Arg.at(4) != '5' || Arg.at(5) != '6' || Arg.at(6) != ':') { + return {DecodeFailure::DECODE_INVALIDCHAR, Result}; } - }; - for (size_t i = 0; i < sizeof(Result.data); i++) { - uint8_t high, low; - if (!GetDigit(Arg, 7 + 2 * i + 0, &high) || !GetDigit(Arg, 7 + 2 * i + 1, &low)) { - return {DecodeFailure::DECODE_INVALIDRANGE, Result}; + auto GetDigit = [](const fextl::string& Arg, int pos, uint8_t* val) { + auto chr = Arg.at(pos); + if (chr >= '0' && chr <= '9') { + *val = chr - '0'; + return true; + } else if (chr >= 'a' && chr <= 'f') { + *val = 10 + chr - 'a'; + return true; + } else { + return false; + } + }; + + for (size_t i = 0; i < sizeof(Result.data); i++) { + uint8_t high, low; + if (!GetDigit(Arg, 7 + 2 * i + 0, &high) || !GetDigit(Arg, 7 + 2 * i + 1, &low)) { + return {DecodeFailure::DECODE_INVALIDRANGE, Result}; + } + Result.data[i] = high * 16 + low; } - Result.data[i] = high * 16 + low; + + return {DecodeFailure::DECODE_OKAY, Result}; } - return {DecodeFailure::DECODE_OKAY, Result}; - } + template<> + std::pair DecodeValue(const fextl::string& Arg) { + if (Arg == "GPR") { + return {DecodeFailure::DECODE_OKAY, FEXCore::IR::GPRClass}; + } else if (Arg == "FPR") { + return {DecodeFailure::DECODE_OKAY, FEXCore::IR::FPRClass}; + } else if (Arg == "GPRFixed") { + return {DecodeFailure::DECODE_OKAY, FEXCore::IR::GPRFixedClass}; + } else if (Arg == "FPRFixed") { + return {DecodeFailure::DECODE_OKAY, FEXCore::IR::FPRFixedClass}; + } else if (Arg == "GPRPair") { + return {DecodeFailure::DECODE_OKAY, FEXCore::IR::GPRPairClass}; + } else if (Arg == "Complex") { + return {DecodeFailure::DECODE_OKAY, FEXCore::IR::ComplexClass}; + } - template<> - std::pair DecodeValue(const fextl::string &Arg) { - if (Arg == "GPR") { - return {DecodeFailure::DECODE_OKAY, FEXCore::IR::GPRClass}; - } - else if (Arg == "FPR") { - return {DecodeFailure::DECODE_OKAY, FEXCore::IR::FPRClass}; - } - else if (Arg == "GPRFixed") { - return {DecodeFailure::DECODE_OKAY, FEXCore::IR::GPRFixedClass}; - } - else if (Arg == "FPRFixed") { - return {DecodeFailure::DECODE_OKAY, FEXCore::IR::FPRFixedClass}; - } - else if (Arg == "GPRPair") { - return {DecodeFailure::DECODE_OKAY, FEXCore::IR::GPRPairClass}; - } - else if (Arg == "Complex") { - return {DecodeFailure::DECODE_OKAY, FEXCore::IR::ComplexClass}; + return {DecodeFailure::DECODE_INVALIDREGISTERCLASS, FEXCore::IR::InvalidClass}; } - return {DecodeFailure::DECODE_INVALIDREGISTERCLASS, FEXCore::IR::InvalidClass}; - } + template<> + std::pair DecodeValue(const fextl::string& Arg) { + uint8_t Size {}, Elements {1}; + int NumArgs = sscanf(Arg.c_str(), "i%hhdv%hhd", &Size, &Elements); - template<> - std::pair DecodeValue(const fextl::string &Arg) { - uint8_t Size{}, Elements{1}; - int NumArgs = sscanf(Arg.c_str(), "i%hhdv%hhd", &Size, &Elements); + if (NumArgs != 1 && NumArgs != 2) { + return {DecodeFailure::DECODE_INVALID, {}}; + } - if (NumArgs != 1 && NumArgs != 2) { - return {DecodeFailure::DECODE_INVALID, {}}; + return {DecodeFailure::DECODE_OKAY, FEXCore::IR::TypeDefinition::Create(Size / 8, Elements)}; } - return {DecodeFailure::DECODE_OKAY, FEXCore::IR::TypeDefinition::Create(Size / 8, Elements)}; - } + template<> + std::pair DecodeValue(const fextl::string& Arg) { + static constexpr std::array CondNames = {"EQ", "NEQ", "UGE", "ULT", "MI", "PL", "VS", "VC", + "UGT", "ULE", "SGE", "SLT", "SGT", "SLE", "ANDZ", "ANDNZ", + "FLU", "FGE", "FLEU", "FGT", "FU", "FNU"}; - template<> - std::pair DecodeValue(const fextl::string &Arg) { - static constexpr std::array CondNames = { - "EQ", - "NEQ", - "UGE", - "ULT", - "MI", - "PL", - "VS", - "VC", - "UGT", - "ULE", - "SGE", - "SLT", - "SGT", - "SLE", - "ANDZ", - "ANDNZ", - "FLU", - "FGE", - "FLEU", - "FGT", - "FU", - "FNU" - }; - - for (size_t i = 0; i < CondNames.size(); ++i) { - if (CondNames[i] == Arg) { - return {DecodeFailure::DECODE_OKAY, CondClassType{static_cast(i)}}; + for (size_t i = 0; i < CondNames.size(); ++i) { + if (CondNames[i] == Arg) { + return {DecodeFailure::DECODE_OKAY, CondClassType {static_cast(i)}}; + } } + return {DecodeFailure::DECODE_INVALID_CONDFLAG, {}}; } - return {DecodeFailure::DECODE_INVALID_CONDFLAG, {}}; - } - - template<> - std::pair DecodeValue(const fextl::string &Arg) { - static constexpr std::array Names = { - "SXTX", - "UXTW", - "SXTW", - }; - for (size_t i = 0; i < Names.size(); ++i) { - if (Names[i] == Arg) { - return {DecodeFailure::DECODE_OKAY, MemOffsetType{static_cast(i)}}; + template<> + std::pair DecodeValue(const fextl::string& Arg) { + static constexpr std::array Names = { + "SXTX", + "UXTW", + "SXTW", + }; + + for (size_t i = 0; i < Names.size(); ++i) { + if (Names[i] == Arg) { + return {DecodeFailure::DECODE_OKAY, MemOffsetType {static_cast(i)}}; + } } + return {DecodeFailure::DECODE_INVALID_MEMOFFSETTYPE, {}}; } - return {DecodeFailure::DECODE_INVALID_MEMOFFSETTYPE, {}}; - } - template<> - std::pair DecodeValue(const fextl::string &Arg) { - static constexpr std::array Names = { - "Loads", - "Stores", - "LoadStores", - }; - - for (size_t i = 0; i < Names.size(); ++i) { - if (Names[i] == Arg) { - return {DecodeFailure::DECODE_OKAY, FenceType{static_cast(i)}}; + template<> + std::pair DecodeValue(const fextl::string& Arg) { + static constexpr std::array Names = { + "Loads", + "Stores", + "LoadStores", + }; + + for (size_t i = 0; i < Names.size(); ++i) { + if (Names[i] == Arg) { + return {DecodeFailure::DECODE_OKAY, FenceType {static_cast(i)}}; + } } + return {DecodeFailure::DECODE_INVALID_FENCETYPE, {}}; } - return {DecodeFailure::DECODE_INVALID_FENCETYPE, {}}; - } - template<> - std::pair DecodeValue(const fextl::string &Arg) { - uint32_t tmp{}; - fextl::stringstream ss{Arg}; - BreakDefinition Reason{}; - - // Seek past '{' - ss.seekg(1, std::ios::cur); - ss >> Reason.ErrorRegister; - - // Seek past '.' - ss.seekg(1, std::ios::cur); - ss >> tmp; - Reason.Signal = tmp; - - // Seek past '.' - ss.seekg(1, std::ios::cur); - ss >> tmp; - Reason.TrapNumber = tmp; - - // Seek past '.' - ss.seekg(1, std::ios::cur); - ss >> tmp; - Reason.si_code = tmp; - - if (ss.fail()) { - return {DecodeFailure::DECODE_INVALIDCHAR, {}}; - } - else { - return {DecodeFailure::DECODE_OKAY, Reason}; + template<> + std::pair DecodeValue(const fextl::string& Arg) { + uint32_t tmp {}; + fextl::stringstream ss {Arg}; + BreakDefinition Reason {}; + + // Seek past '{' + ss.seekg(1, std::ios::cur); + ss >> Reason.ErrorRegister; + + // Seek past '.' + ss.seekg(1, std::ios::cur); + ss >> tmp; + Reason.Signal = tmp; + + // Seek past '.' + ss.seekg(1, std::ios::cur); + ss >> tmp; + Reason.TrapNumber = tmp; + + // Seek past '.' + ss.seekg(1, std::ios::cur); + ss >> tmp; + Reason.si_code = tmp; + + if (ss.fail()) { + return {DecodeFailure::DECODE_INVALIDCHAR, {}}; + } else { + return {DecodeFailure::DECODE_OKAY, Reason}; + } } - } - template<> - std::pair DecodeValue(const fextl::string &Arg) { - static constexpr std::array, 6> Names = {{ - { "i8", OpSize::i8Bit }, - { "i16", OpSize::i16Bit }, - { "i32", OpSize::i32Bit }, - { "i64", OpSize::i64Bit }, - { "i128", OpSize::i128Bit }, - { "i256", OpSize::i256Bit }, - }}; - - for (size_t i = 0; i < Names.size(); ++i) { - if (Names[i].first == Arg) { - return {DecodeFailure::DECODE_OKAY, Names[i].second}; + template<> + std::pair DecodeValue(const fextl::string& Arg) { + static constexpr std::array, 6> Names = {{ + {"i8", OpSize::i8Bit}, + {"i16", OpSize::i16Bit}, + {"i32", OpSize::i32Bit}, + {"i64", OpSize::i64Bit}, + {"i128", OpSize::i128Bit}, + {"i256", OpSize::i256Bit}, + }}; + + for (size_t i = 0; i < Names.size(); ++i) { + if (Names[i].first == Arg) { + return {DecodeFailure::DECODE_OKAY, Names[i].second}; + } } + return {DecodeFailure::DECODE_INVALID_OPSIZE, {}}; } - return {DecodeFailure::DECODE_INVALID_OPSIZE, {}}; - } - template<> - std::pair DecodeValue(const fextl::string &Arg) { - if (Arg.at(0) != '%') return {DecodeFailure::DECODE_INVALIDCHAR, 0}; + template<> + std::pair DecodeValue(const fextl::string& Arg) { + if (Arg.at(0) != '%') { + return {DecodeFailure::DECODE_INVALIDCHAR, 0}; + } - // Strip off the type qualifier from the ssa value - fextl::string SSAName = FEXCore::StringUtils::Trim(Arg); - const size_t ArgEnd = SSAName.find_first_of(' '); + // Strip off the type qualifier from the ssa value + fextl::string SSAName = FEXCore::StringUtils::Trim(Arg); + const size_t ArgEnd = SSAName.find_first_of(' '); - if (ArgEnd != fextl::string::npos) { - SSAName = SSAName.substr(0, ArgEnd); - } + if (ArgEnd != fextl::string::npos) { + SSAName = SSAName.substr(0, ArgEnd); + } + + // Forward declarations may make this not succed + auto Op = SSANameMapper.find(SSAName); + if (Op == SSANameMapper.end()) { + return {DecodeFailure::DECODE_UNKNOWN_SSA, nullptr}; + } - // Forward declarations may make this not succed - auto Op = SSANameMapper.find(SSAName); - if (Op == SSANameMapper.end()) { - return {DecodeFailure::DECODE_UNKNOWN_SSA, nullptr}; + return {DecodeFailure::DECODE_OKAY, Op->second}; } - return {DecodeFailure::DECODE_OKAY, Op->second}; - } + struct LineDefinition { + size_t LineNumber; + bool HasDefinition {}; + fextl::string Definition {}; + FEXCore::IR::TypeDefinition Size {}; + fextl::string IROp {}; + FEXCore::IR::IROps OpEnum; + bool HasArgs {}; + fextl::vector Args; + OrderedNode* Node {}; + }; - struct LineDefinition { - size_t LineNumber; - bool HasDefinition{}; - fextl::string Definition{}; - FEXCore::IR::TypeDefinition Size{}; - fextl::string IROp{}; - FEXCore::IR::IROps OpEnum; - bool HasArgs{}; - fextl::vector Args; - OrderedNode *Node{}; - }; + fextl::vector Lines; + fextl::unordered_map SSANameMapper; + fextl::vector Defs; + LineDefinition* CurrentDef {}; + fextl::unordered_map NameToOpMap; - fextl::vector Lines; - fextl::unordered_map SSANameMapper; - fextl::vector Defs; - LineDefinition *CurrentDef{}; - fextl::unordered_map NameToOpMap; - - IRParser(FEXCore::Utils::IntrusivePooledAllocator &ThreadAllocator, fextl::stringstream &MapsStream) - : IREmitter {ThreadAllocator} { - InitializeNameMap(); - - fextl::string Line; - while (std::getline(MapsStream, Line)) { - if (MapsStream.eof()) break; - if (MapsStream.fail()) { - LogMan::Msg::EFmt("Failed to getline on line: {}", Lines.size()); - return; + IRParser(FEXCore::Utils::IntrusivePooledAllocator& ThreadAllocator, fextl::stringstream& MapsStream) + : IREmitter {ThreadAllocator} { + InitializeNameMap(); + + fextl::string Line; + while (std::getline(MapsStream, Line)) { + if (MapsStream.eof()) { + break; + } + if (MapsStream.fail()) { + LogMan::Msg::EFmt("Failed to getline on line: {}", Lines.size()); + return; + } + Lines.emplace_back(Line); } - Lines.emplace_back(Line); - } - ResetWorkingList(); - Loaded = Parse(); - } + ResetWorkingList(); + Loaded = Parse(); + } - bool Loaded = false; + bool Loaded = false; - bool Parse() { - const auto CheckPrintError = [&](const LineDefinition &Def, DecodeFailure Failure) -> bool { - if (Failure != DecodeFailure::DECODE_OKAY) { - LogMan::Msg::EFmt("Error on Line: {}", Def.LineNumber); - LogMan::Msg::EFmt("{}", Lines[Def.LineNumber]); - LogMan::Msg::EFmt("Value Couldn't be decoded due to {}", DecodeErrorToString(Failure)); - return false; - } + bool Parse() { + const auto CheckPrintError = [&](const LineDefinition& Def, DecodeFailure Failure) -> bool { + if (Failure != DecodeFailure::DECODE_OKAY) { + LogMan::Msg::EFmt("Error on Line: {}", Def.LineNumber); + LogMan::Msg::EFmt("{}", Lines[Def.LineNumber]); + LogMan::Msg::EFmt("Value Couldn't be decoded due to {}", DecodeErrorToString(Failure)); + return false; + } - return true; - }; + return true; + }; - const auto CheckPrintErrorArg = [&](const LineDefinition &Def, DecodeFailure Failure, size_t Arg) -> bool { - if (Failure != DecodeFailure::DECODE_OKAY) { - LogMan::Msg::EFmt("Error on Line: {}", Def.LineNumber); - LogMan::Msg::EFmt("{}", Lines[Def.LineNumber]); - LogMan::Msg::EFmt("Argument Number {}: {}", Arg + 1, Def.Args[Arg]); - LogMan::Msg::EFmt("Value Couldn't be decoded due to {}", DecodeErrorToString(Failure)); - return false; - } + const auto CheckPrintErrorArg = [&](const LineDefinition& Def, DecodeFailure Failure, size_t Arg) -> bool { + if (Failure != DecodeFailure::DECODE_OKAY) { + LogMan::Msg::EFmt("Error on Line: {}", Def.LineNumber); + LogMan::Msg::EFmt("{}", Lines[Def.LineNumber]); + LogMan::Msg::EFmt("Argument Number {}: {}", Arg + 1, Def.Args[Arg]); + LogMan::Msg::EFmt("Value Couldn't be decoded due to {}", DecodeErrorToString(Failure)); + return false; + } - return true; - }; + return true; + }; - // String parse every line for our definitions - for (size_t i = 0; i < Lines.size(); ++i) { - fextl::string Line = Lines[i]; - LineDefinition Def{}; - CurrentDef = &Def; - Def.LineNumber = i; + // String parse every line for our definitions + for (size_t i = 0; i < Lines.size(); ++i) { + fextl::string Line = Lines[i]; + LineDefinition Def {}; + CurrentDef = &Def; + Def.LineNumber = i; - Line = FEXCore::StringUtils::Trim(Line); + Line = FEXCore::StringUtils::Trim(Line); - // Skip empty lines - if (Line.empty()) { - continue; - } + // Skip empty lines + if (Line.empty()) { + continue; + } - if (Line[0] == ';') { - // This is a comment line - // Skip it - continue; - } + if (Line[0] == ';') { + // This is a comment line + // Skip it + continue; + } - size_t CurrentPos{}; - // Let's see if this node is assigning something first - if (Line[0] == '%') { - size_t DefinitionEnd = fextl::string::npos; - if ((DefinitionEnd = Line.find_first_of('=', CurrentPos)) != fextl::string::npos) { - Def.Definition = Line.substr(0, DefinitionEnd); - Def.Definition = FEXCore::StringUtils::Trim(Def.Definition); - Def.HasDefinition = true; - CurrentPos = DefinitionEnd + 1; // +1 to ensure we go past then assignment + size_t CurrentPos {}; + // Let's see if this node is assigning something first + if (Line[0] == '%') { + size_t DefinitionEnd = fextl::string::npos; + if ((DefinitionEnd = Line.find_first_of('=', CurrentPos)) != fextl::string::npos) { + Def.Definition = Line.substr(0, DefinitionEnd); + Def.Definition = FEXCore::StringUtils::Trim(Def.Definition); + Def.HasDefinition = true; + CurrentPos = DefinitionEnd + 1; // +1 to ensure we go past then assignment + } else { + LogMan::Msg::EFmt("Error on Line: {}", i); + LogMan::Msg::EFmt("{}", Lines[i]); + LogMan::Msg::EFmt("SSA declaration without assignment"); + return false; + } } - else { - LogMan::Msg::EFmt("Error on Line: {}", i); - LogMan::Msg::EFmt("{}", Lines[i]); - LogMan::Msg::EFmt("SSA declaration without assignment"); - return false; + + // Check if we are pulling in some IR from the IR Printer + // Prints (%%d) at the start of lines without a definition + if (Line[0] == '(') { + size_t DefinitionEnd = fextl::string::npos; + if ((DefinitionEnd = Line.find_first_of(')', CurrentPos)) != fextl::string::npos) { + size_t SSAEnd = fextl::string::npos; + if ((SSAEnd = Line.find_last_of(' ', DefinitionEnd)) != fextl::string::npos) { + fextl::string Type = Line.substr(SSAEnd + 1, DefinitionEnd - SSAEnd - 1); + Type = FEXCore::StringUtils::Trim(Type); + + auto DefinitionSize = DecodeValue(Type); + if (!CheckPrintError(Def, DefinitionSize.first)) { + return false; + } + Def.Size = DefinitionSize.second; + } + + Def.Definition = FEXCore::StringUtils::Trim(Line.substr(1, std::min(DefinitionEnd, SSAEnd) - 1)); + + CurrentPos = DefinitionEnd + 1; + } else { + LogMan::Msg::EFmt("Error on Line: {}", i); + LogMan::Msg::EFmt("{}", Lines[i]); + LogMan::Msg::EFmt("SSA value with numbered SSA provided but no closing parentheses"); + return false; + } } - } - // Check if we are pulling in some IR from the IR Printer - // Prints (%%d) at the start of lines without a definition - if (Line[0] == '(') { - size_t DefinitionEnd = fextl::string::npos; - if ((DefinitionEnd = Line.find_first_of(')', CurrentPos)) != fextl::string::npos) { - size_t SSAEnd = fextl::string::npos; - if ((SSAEnd = Line.find_last_of(' ', DefinitionEnd)) != fextl::string::npos) { - fextl::string Type = Line.substr(SSAEnd + 1, DefinitionEnd - SSAEnd - 1); + if (Def.HasDefinition) { + // Let's check if we have a size declared with this variable + size_t NameEnd = fextl::string::npos; + if ((NameEnd = Def.Definition.find_first_of(' ')) != fextl::string::npos) { + fextl::string Type = Def.Definition.substr(NameEnd + 1); Type = FEXCore::StringUtils::Trim(Type); + Def.Definition = FEXCore::StringUtils::Trim(Def.Definition.substr(0, NameEnd)); auto DefinitionSize = DecodeValue(Type); if (!CheckPrintError(Def, DefinitionSize.first)) { @@ -449,156 +476,135 @@ class IRParser: public FEXCore::IR::IREmitter { Def.Size = DefinitionSize.second; } - Def.Definition = FEXCore::StringUtils::Trim(Line.substr(1, std::min(DefinitionEnd, SSAEnd) - 1)); - - CurrentPos = DefinitionEnd + 1; + if (Def.Definition == "%Invalid") { + LogMan::Msg::EFmt("Error on Line: {}", i); + LogMan::Msg::EFmt("{}", Lines[i]); + LogMan::Msg::EFmt("Definition tried to define reserved %Invalid ssa node"); + return false; + } } - else { - LogMan::Msg::EFmt("Error on Line: {}", i); - LogMan::Msg::EFmt("{}", Lines[i]); - LogMan::Msg::EFmt("SSA value with numbered SSA provided but no closing parentheses"); - return false; + + // Let's get the IR op + size_t OpNameEnd = fextl::string::npos; + fextl::string RemainingLine = FEXCore::StringUtils::Trim(Line.substr(CurrentPos)); + CurrentPos = 0; + if ((OpNameEnd = RemainingLine.find_first_of(" \t\n\r\0", CurrentPos)) != fextl::string::npos) { + Def.IROp = RemainingLine.substr(CurrentPos, OpNameEnd); + Def.IROp = FEXCore::StringUtils::Trim(Def.IROp); + Def.HasArgs = true; + CurrentPos = OpNameEnd; + } else { + if (RemainingLine.empty()) { + LogMan::Msg::EFmt("Error on Line: {}", i); + LogMan::Msg::EFmt("{}", Lines[i]); + LogMan::Msg::EFmt("Line without an IROp?"); + return false; + } + + Def.IROp = RemainingLine; + Def.HasArgs = false; } - } - if (Def.HasDefinition) { - // Let's check if we have a size declared with this variable - size_t NameEnd = fextl::string::npos; - if ((NameEnd = Def.Definition.find_first_of(' ')) != fextl::string::npos) { - fextl::string Type = Def.Definition.substr(NameEnd + 1); - Type = FEXCore::StringUtils::Trim(Type); - Def.Definition = FEXCore::StringUtils::Trim(Def.Definition.substr(0, NameEnd)); - - auto DefinitionSize = DecodeValue(Type); - if (!CheckPrintError(Def, DefinitionSize.first)) return false; - Def.Size = DefinitionSize.second; + if (Def.HasArgs) { + RemainingLine = FEXCore::StringUtils::Trim(RemainingLine.substr(CurrentPos)); + if (RemainingLine.empty()) { + // How did we get here? + Def.HasArgs = false; + } else { + while (!RemainingLine.empty()) { + const size_t ArgEnd = RemainingLine.find(','); + fextl::string Arg = FEXCore::StringUtils::Trim(RemainingLine.substr(0, ArgEnd)); + + Def.Args.emplace_back(std::move(Arg)); + + RemainingLine.erase(0, ArgEnd + 1); // +1 to ensure we go past the ',' + if (ArgEnd == fextl::string::npos) { + break; + } + } + } } - if (Def.Definition == "%Invalid") { - LogMan::Msg::EFmt("Error on Line: {}", i); - LogMan::Msg::EFmt("{}", Lines[i]); - LogMan::Msg::EFmt("Definition tried to define reserved %Invalid ssa node"); + CurrentDef = &Defs.emplace_back(std::move(Def)); + } + + // Ensure all of the ops are real ops + for (size_t i = 0; i < Defs.size(); ++i) { + auto& Def = Defs[i]; + auto Op = NameToOpMap.find(Def.IROp); + if (Op == NameToOpMap.end()) { + LogMan::Msg::EFmt("Error on Line: {}", Def.LineNumber); + LogMan::Msg::EFmt("{}", Lines[Def.LineNumber]); + LogMan::Msg::EFmt("IROp '{}' doesn't exist", Def.IROp); return false; } + Def.OpEnum = Op->second; } - // Let's get the IR op - size_t OpNameEnd = fextl::string::npos; - fextl::string RemainingLine = FEXCore::StringUtils::Trim(Line.substr(CurrentPos)); - CurrentPos = 0; - if ((OpNameEnd = RemainingLine.find_first_of(" \t\n\r\0", CurrentPos)) != fextl::string::npos) { - Def.IROp = RemainingLine.substr(CurrentPos, OpNameEnd); - Def.IROp = FEXCore::StringUtils::Trim(Def.IROp); - Def.HasArgs = true; - CurrentPos = OpNameEnd; - } - else { - if (RemainingLine.empty()) { - LogMan::Msg::EFmt("Error on Line: {}", i); - LogMan::Msg::EFmt("{}", Lines[i]); - LogMan::Msg::EFmt("Line without an IROp?"); + // Emit the header op + IRPair IRHeader; + { + auto& Def = Defs[0]; + CurrentDef = &Def; + if (Def.OpEnum != FEXCore::IR::IROps::OP_IRHEADER) { + LogMan::Msg::EFmt("Error on Line: {}", Def.LineNumber); + LogMan::Msg::EFmt("{}", Lines[Def.LineNumber]); + LogMan::Msg::EFmt("First op needs to be IRHeader. Was '{}'", Def.IROp); return false; } - Def.IROp = RemainingLine; - Def.HasArgs = false; - } + auto OriginalRIP = DecodeValue(Def.Args[1]); - if (Def.HasArgs) { - RemainingLine = - FEXCore::StringUtils::Trim(RemainingLine.substr(CurrentPos)); - if (RemainingLine.empty()) { - // How did we get here? - Def.HasArgs = false; + if (!CheckPrintError(Def, OriginalRIP.first)) { + return false; } - else { - while (!RemainingLine.empty()) { - const size_t ArgEnd = RemainingLine.find(','); - fextl::string Arg = FEXCore::StringUtils::Trim(RemainingLine.substr(0, ArgEnd)); - Def.Args.emplace_back(std::move(Arg)); + auto CodeBlockCount = DecodeValue(Def.Args[2]); - RemainingLine.erase(0, ArgEnd+1); // +1 to ensure we go past the ',' - if (ArgEnd == fextl::string::npos) - break; - } + if (!CheckPrintError(Def, CodeBlockCount.first)) { + return false; } - } - CurrentDef = &Defs.emplace_back(std::move(Def)); - } + auto InstructionCount = DecodeValue(Def.Args[3]); - // Ensure all of the ops are real ops - for(size_t i = 0; i < Defs.size(); ++i) { - auto &Def = Defs[i]; - auto Op = NameToOpMap.find(Def.IROp); - if (Op == NameToOpMap.end()) { - LogMan::Msg::EFmt("Error on Line: {}", Def.LineNumber); - LogMan::Msg::EFmt("{}", Lines[Def.LineNumber]); - LogMan::Msg::EFmt("IROp '{}' doesn't exist", Def.IROp); - return false; - } - Def.OpEnum = Op->second; - } + if (!CheckPrintError(Def, InstructionCount.first)) { + return false; + } - // Emit the header op - IRPair IRHeader; - { - auto &Def = Defs[0]; - CurrentDef = &Def; - if (Def.OpEnum != FEXCore::IR::IROps::OP_IRHEADER) { - LogMan::Msg::EFmt("Error on Line: {}", Def.LineNumber); - LogMan::Msg::EFmt("{}", Lines[Def.LineNumber]); - LogMan::Msg::EFmt("First op needs to be IRHeader. Was '{}'", Def.IROp); - return false; + IRHeader = _IRHeader(InvalidNode, OriginalRIP.second, CodeBlockCount.second, InstructionCount.second); } - auto OriginalRIP = DecodeValue(Def.Args[1]); - - if (!CheckPrintError(Def, OriginalRIP.first)) return false; - - auto CodeBlockCount = DecodeValue(Def.Args[2]); - - if (!CheckPrintError(Def, CodeBlockCount.first)) return false; + SetWriteCursor(nullptr); // isolate the header from everything following - auto InstructionCount = DecodeValue(Def.Args[3]); + // Initialize SSANameMapper with Invalid value + SSANameMapper.insert_or_assign("%Invalid", Invalid()); - if (!CheckPrintError(Def, InstructionCount.first)) return false; + // Spin through the blocks and generate basic block ops + for (size_t i = 0; i < Defs.size(); ++i) { + auto& Def = Defs[i]; + if (Def.OpEnum == FEXCore::IR::IROps::OP_CODEBLOCK) { + auto CodeBlock = _CodeBlock(InvalidNode, InvalidNode); + SSANameMapper.insert_or_assign(Def.Definition, CodeBlock.Node); + Def.Node = CodeBlock.Node; - IRHeader = _IRHeader(InvalidNode, OriginalRIP.second, CodeBlockCount.second, InstructionCount.second); - } - - SetWriteCursor(nullptr); // isolate the header from everything following - - // Initialize SSANameMapper with Invalid value - SSANameMapper.insert_or_assign("%Invalid", Invalid()); - - // Spin through the blocks and generate basic block ops - for(size_t i = 0; i < Defs.size(); ++i) { - auto &Def = Defs[i]; - if (Def.OpEnum == FEXCore::IR::IROps::OP_CODEBLOCK) { - auto CodeBlock = _CodeBlock(InvalidNode, InvalidNode); - SSANameMapper.insert_or_assign(Def.Definition, CodeBlock.Node); - Def.Node = CodeBlock.Node; - - if (i == 1) { - // First code block is the entry block - // Link the header to the first block - IRHeader.first->Blocks = CodeBlock.Node->Wrapped(DualListData.ListBegin()); + if (i == 1) { + // First code block is the entry block + // Link the header to the first block + IRHeader.first->Blocks = CodeBlock.Node->Wrapped(DualListData.ListBegin()); + } + CodeBlocks.emplace_back(CodeBlock.Node); } - CodeBlocks.emplace_back(CodeBlock.Node); } - } - SetWriteCursor(nullptr); // isolate the block headers too + SetWriteCursor(nullptr); // isolate the block headers too - // Spin through all the definitions and add the ops to the basic blocks - OrderedNode *CurrentBlock{}; - FEXCore::IR::IROp_CodeBlock *CurrentBlockOp{}; - for(size_t i = 1; i < Defs.size(); ++i) { - auto &Def = Defs[i]; - CurrentDef = &Def; + // Spin through all the definitions and add the ops to the basic blocks + OrderedNode* CurrentBlock {}; + FEXCore::IR::IROp_CodeBlock* CurrentBlockOp {}; + for (size_t i = 1; i < Defs.size(); ++i) { + auto& Def = Defs[i]; + CurrentDef = &Def; - switch (Def.OpEnum) { + switch (Def.OpEnum) { // Special handled case FEXCore::IR::IROps::OP_IRHEADER: LogMan::Msg::EFmt("Error on Line: {}", Def.LineNumber); @@ -675,46 +681,44 @@ class IRParser: public FEXCore::IR::IREmitter { LogMan::Msg::EFmt("Unhandled Op enum '{}' in parser", Def.IROp); return false; } - } - - if (Def.HasDefinition) { - auto IROp = Def.Node->Op(DualListData.DataBegin()); - if (Def.Size.Elements()) { - IROp->Size = Def.Size.Bytes() * Def.Size.Elements(); - IROp->ElementSize = Def.Size.Bytes(); } - else { - IROp->Size = Def.Size.Bytes(); - IROp->ElementSize = 0; + + if (Def.HasDefinition) { + auto IROp = Def.Node->Op(DualListData.DataBegin()); + if (Def.Size.Elements()) { + IROp->Size = Def.Size.Bytes() * Def.Size.Elements(); + IROp->ElementSize = Def.Size.Bytes(); + } else { + IROp->Size = Def.Size.Bytes(); + IROp->ElementSize = 0; + } + SSANameMapper.insert_or_assign(Def.Definition, Def.Node); } - SSANameMapper.insert_or_assign(Def.Definition, Def.Node); } - } - return true; - } + return true; + } - void InitializeNameMap() { - if (NameToOpMap.empty()) { - for (FEXCore::IR::IROps Op = FEXCore::IR::IROps::OP_DUMMY; - Op <= FEXCore::IR::IROps::OP_LAST; - Op = static_cast(static_cast(Op) + 1)) { - NameToOpMap.insert_or_assign(FEXCore::IR::GetName(Op), Op); + void InitializeNameMap() { + if (NameToOpMap.empty()) { + for (FEXCore::IR::IROps Op = FEXCore::IR::IROps::OP_DUMMY; Op <= FEXCore::IR::IROps::OP_LAST; + Op = static_cast(static_cast(Op) + 1)) { + NameToOpMap.insert_or_assign(FEXCore::IR::GetName(Op), Op); + } } } - } -}; + }; -} // anon namespace +} // namespace -fextl::unique_ptr Parse(FEXCore::Utils::IntrusivePooledAllocator &ThreadAllocator, fextl::stringstream &MapsStream) { - auto parser = fextl::make_unique(ThreadAllocator, MapsStream); +fextl::unique_ptr Parse(FEXCore::Utils::IntrusivePooledAllocator& ThreadAllocator, fextl::stringstream& MapsStream) { + auto parser = fextl::make_unique(ThreadAllocator, MapsStream); - if (parser->Loaded) { - return parser; - } else { - return nullptr; - } + if (parser->Loaded) { + return parser; + } else { + return nullptr; + } } -} +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/IR/PassManager.cpp b/FEXCore/Source/Interface/IR/PassManager.cpp index f9c8f255d7..8ba36cb976 100644 --- a/FEXCore/Source/Interface/IR/PassManager.cpp +++ b/FEXCore/Source/Interface/IR/PassManager.cpp @@ -66,7 +66,7 @@ void PassManager::Finalize() { } } -void PassManager::AddDefaultPasses(FEXCore::Context::ContextImpl *ctx, bool InlineConstants) { +void PassManager::AddDefaultPasses(FEXCore::Context::ContextImpl* ctx, bool InlineConstants) { FEX_CONFIG_OPT(DisablePasses, O0); if (!DisablePasses()) { @@ -105,16 +105,16 @@ void PassManager::InsertRegisterAllocationPass(bool SupportsAVX) { InsertPass(IR::CreateRegisterAllocationPass(GetPass("Compaction"), SupportsAVX), "RA"); } -bool PassManager::Run(IREmitter *IREmit) { +bool PassManager::Run(IREmitter* IREmit) { FEXCORE_PROFILE_SCOPED("PassManager::Run"); bool Changed = false; - for (auto const &Pass : Passes) { + for (const auto& Pass : Passes) { Changed |= Pass->Run(IREmit); } #if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED - for (auto const &Pass : ValidationPasses) { + for (const auto& Pass : ValidationPasses) { Changed |= Pass->Run(IREmit); } #endif @@ -122,4 +122,4 @@ bool PassManager::Run(IREmitter *IREmit) { return Changed; } -} +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp index 5e78c360aa..b820cef678 100644 --- a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp +++ b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp @@ -7,7 +7,7 @@ desc: ConstProp, ZExt elim, addressgen coalesce, const pooling, fcmp reduction, */ -//aarch64 heuristics +// aarch64 heuristics #include "aarch64/assembler-aarch64.h" #include "aarch64/cpu-aarch64.h" #include "aarch64/disasm-aarch64.h" @@ -57,11 +57,18 @@ static bool HasConsecutiveBits(uint64_t imm, unsigned width) { return ((imm ^ (imm >> 1)) & ((1ULL << (width - 1)) - 1)) == 0; } -//aarch64 heuristics -static bool IsImmLogical(uint64_t imm, unsigned width) { if (width < 32) width = 32; return vixl::aarch64::Assembler::IsImmLogical(imm, width); } -static bool IsImmAddSub(uint64_t imm) { return vixl::aarch64::Assembler::IsImmAddSub(imm); } +// aarch64 heuristics +static bool IsImmLogical(uint64_t imm, unsigned width) { + if (width < 32) { + width = 32; + } + return vixl::aarch64::Assembler::IsImmLogical(imm, width); +} +static bool IsImmAddSub(uint64_t imm) { + return vixl::aarch64::Assembler::IsImmAddSub(imm); +} static bool IsMemoryScale(uint64_t Scale, uint8_t AccessSize) { - return Scale == AccessSize; + return Scale == AccessSize; } static bool IsSIMM9Range(uint64_t imm) { @@ -72,79 +79,80 @@ static bool IsSIMM9Range(uint64_t imm) { } static bool IsImmMemory(uint64_t imm, uint8_t AccessSize) { - if (IsSIMM9Range(imm)) + if (IsSIMM9Range(imm)) { return true; - else if ( (imm & (AccessSize-1)) == 0 && imm/AccessSize <= 4095 ) + } else if ((imm & (AccessSize - 1)) == 0 && imm / AccessSize <= 4095) { return true; - else { + } else { return false; } } static bool IsTSOImm9(uint64_t imm) { // RCPC2 only has a 9-bit signed offset - if (IsSIMM9Range(imm)) + if (IsSIMM9Range(imm)) { return true; - else { + } else { return false; } } -static std::tuple MemExtendedAddressing(IREmitter *IREmit, uint8_t AccessSize, IROp_Header* AddressHeader) { +static std::tuple +MemExtendedAddressing(IREmitter* IREmit, uint8_t AccessSize, IROp_Header* AddressHeader) { auto Src0Header = IREmit->GetOpHeader(AddressHeader->Args[0]); if (Src0Header->Size == 8) { - //Try to optimize: Base + MUL(Offset, Scale) + // Try to optimize: Base + MUL(Offset, Scale) if (Src0Header->Op == OP_MUL) { uint64_t Scale; if (IREmit->IsValueConstant(Src0Header->Args[1], &Scale)) { if (IsMemoryScale(Scale, AccessSize)) { // remove mul as it can be folded to the mem op - return { MEM_OFFSET_SXTX, (uint8_t)Scale, IREmit->UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0]) }; + return {MEM_OFFSET_SXTX, (uint8_t)Scale, IREmit->UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0])}; } else if (Scale == 1) { // remove nop mul - return { MEM_OFFSET_SXTX, 1, IREmit->UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0]) }; + return {MEM_OFFSET_SXTX, 1, IREmit->UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0])}; } } } - //Try to optimize: Base + LSHL(Offset, Scale) + // Try to optimize: Base + LSHL(Offset, Scale) else if (Src0Header->Op == OP_LSHL) { uint64_t Constant2; if (IREmit->IsValueConstant(Src0Header->Args[1], &Constant2)) { - uint64_t Scale = 1<UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0]) }; + return {MEM_OFFSET_SXTX, Scale, IREmit->UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0])}; } else if (Scale == 1) { // remove nop shift - return { MEM_OFFSET_SXTX, 1, IREmit->UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0]) }; + return {MEM_OFFSET_SXTX, 1, IREmit->UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0])}; } } } #if defined(_M_ARM_64) // x86 can't sext or zext on mem ops - //Try to optimize: Base + (u32)Offset + // Try to optimize: Base + (u32)Offset else if (Src0Header->Op == OP_BFE) { auto Bfe = Src0Header->C(); if (Bfe->lsb == 0 && Bfe->Width == 32) { - //todo: arm can also scale here - return { MEM_OFFSET_UXTW, 1, IREmit->UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0]) }; + // todo: arm can also scale here + return {MEM_OFFSET_UXTW, 1, IREmit->UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0])}; } } - //Try to optimize: Base + (s32)Offset + // Try to optimize: Base + (s32)Offset else if (Src0Header->Op == OP_SBFE) { auto Sbfe = Src0Header->C(); if (Sbfe->lsb == 0 && Sbfe->Width == 32) { - //todo: arm can also scale here - return { MEM_OFFSET_SXTW, 1, IREmit->UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0]) }; + // todo: arm can also scale here + return {MEM_OFFSET_SXTW, 1, IREmit->UnwrapNode(AddressHeader->Args[1]), IREmit->UnwrapNode(Src0Header->Args[0])}; } } #endif } // no match anywhere, just add - return { MEM_OFFSET_SXTX, 1, IREmit->UnwrapNode(AddressHeader->Args[0]), IREmit->UnwrapNode(AddressHeader->Args[1]) }; + return {MEM_OFFSET_SXTX, 1, IREmit->UnwrapNode(AddressHeader->Args[0]), IREmit->UnwrapNode(AddressHeader->Args[1])}; } -static OrderedNodeWrapper RemoveUselessMasking(IREmitter *IREmit, OrderedNodeWrapper src, uint64_t mask) { +static OrderedNodeWrapper RemoveUselessMasking(IREmitter* IREmit, OrderedNodeWrapper src, uint64_t mask) { #if 1 // HOTFIX: We need to clear up the meaning of opsize and dest size. See #594 return src; #else @@ -158,8 +166,8 @@ static OrderedNodeWrapper RemoveUselessMasking(IREmitter *IREmit, OrderedNodeWra } else if (IROp->Op == OP_BFE) { auto Op = IROp->C(); if (Op->lsb == 0) { - uint64_t imm = 1ULL << (Op->Width-1); - imm = (imm-1) *2 + 1; + uint64_t imm = 1ULL << (Op->Width - 1); + imm = (imm - 1) * 2 + 1; if ((imm & mask) == mask) { return RemoveUselessMasking(IREmit, IROp->Args[0], mask); @@ -171,7 +179,7 @@ static OrderedNodeWrapper RemoveUselessMasking(IREmitter *IREmit, OrderedNodeWra #endif } -static bool IsBfeAlreadyDone(IREmitter *IREmit, OrderedNodeWrapper src, uint64_t Width) { +static bool IsBfeAlreadyDone(IREmitter* IREmit, OrderedNodeWrapper src, uint64_t Width) { auto IROp = IREmit->GetOpHeader(src); if (IROp->Op == OP_BFE) { auto Op = IROp->C(); @@ -186,23 +194,21 @@ class ConstProp final : public FEXCore::IR::Pass { public: explicit ConstProp(bool DoInlineConstants, bool SupportsTSOImm9) : InlineConstants(DoInlineConstants) - , SupportsTSOImm9 {SupportsTSOImm9} { } + , SupportsTSOImm9 {SupportsTSOImm9} {} - bool Run(IREmitter *IREmit) override; + bool Run(IREmitter* IREmit) override; bool InlineConstants; private: - bool HandleConstantPools(IREmitter *IREmit, const IRListView& CurrentIR); - void LoadMemStoreMemImmediatePooling(IREmitter *IREmit, const IRListView& CurrentIR); - bool ZextAndMaskingElimination(IREmitter *IREmit, const IRListView& CurrentIR, - OrderedNode* CodeNode, IROp_Header* IROp); - bool ConstantPropagation(IREmitter *IREmit, const IRListView& CurrentIR, - OrderedNode* CodeNode, IROp_Header* IROp); - bool ConstantInlining(IREmitter *IREmit, const IRListView& CurrentIR); + bool HandleConstantPools(IREmitter* IREmit, const IRListView& CurrentIR); + void LoadMemStoreMemImmediatePooling(IREmitter* IREmit, const IRListView& CurrentIR); + bool ZextAndMaskingElimination(IREmitter* IREmit, const IRListView& CurrentIR, OrderedNode* CodeNode, IROp_Header* IROp); + bool ConstantPropagation(IREmitter* IREmit, const IRListView& CurrentIR, OrderedNode* CodeNode, IROp_Header* IROp); + bool ConstantInlining(IREmitter* IREmit, const IRListView& CurrentIR); struct ConstPoolData { - OrderedNode *Node; + OrderedNode* Node; IR::NodeID NodeID; }; fextl::unordered_map ConstPool; @@ -210,7 +216,7 @@ class ConstProp final : public FEXCore::IR::Pass { // Pool inline constant generation. These are typically very small and pool efficiently. fextl::robin_map InlineConstantGen; - OrderedNode *CreateInlineConstant(IREmitter *IREmit, uint64_t Constant) { + OrderedNode* CreateInlineConstant(IREmitter* IREmit, uint64_t Constant) { const auto it = InlineConstantGen.find(Constant); if (it != InlineConstantGen.end()) { return it->second; @@ -218,7 +224,7 @@ class ConstProp final : public FEXCore::IR::Pass { auto Result = InlineConstantGen.insert_or_assign(Constant, IREmit->_InlineConstant(Constant)); return Result.first->second; } - bool SupportsTSOImm9{}; + bool SupportsTSOImm9 {}; // This is a heuristic to limit constant pool live ranges to reduce RA interference pressure. // If the range is unbounded then RA interference pressure seems to increase to the point // that long blocks of constant usage can slow to a crawl. @@ -226,7 +232,7 @@ class ConstProp final : public FEXCore::IR::Pass { constexpr static uint32_t CONSTANT_POOL_RANGE_LIMIT = 200; }; -bool ConstProp::HandleConstantPools(IREmitter *IREmit, const IRListView& CurrentIR) { +bool ConstProp::HandleConstantPools(IREmitter* IREmit, const IRListView& CurrentIR) { bool Changed = false; // constants are pooled per block @@ -267,7 +273,7 @@ bool ConstProp::HandleConstantPools(IREmitter *IREmit, const IRListView& Current // LoadMem / StoreMem imm pooling // If imms are close by, use address gen to generate the values instead of using a new imm -void ConstProp::LoadMemStoreMemImmediatePooling(IREmitter *IREmit, const IRListView& CurrentIR) { +void ConstProp::LoadMemStoreMemImmediatePooling(IREmitter* IREmit, const IRListView& CurrentIR) { for (auto [BlockNode, BlockIROp] : CurrentIR.GetBlocks()) { for (auto [CodeNode, IROp] : CurrentIR.GetCode(BlockNode)) { if (IROp->Op == OP_LOADMEM || IROp->Op == OP_STOREMEM) { @@ -277,15 +283,14 @@ void ConstProp::LoadMemStoreMemImmediatePooling(IREmitter *IREmit, const IRListV if (IROp->Op == OP_LOADMEM) { AddrIndex = IR::IROp_LoadMem::Addr_Index; OffsetIndex = IR::IROp_LoadMem::Offset_Index; - } - else { + } else { AddrIndex = IR::IROp_StoreMem::Addr_Index; OffsetIndex = IR::IROp_StoreMem::Offset_Index; } uint64_t Addr; if (IREmit->IsValueConstant(IROp->Args[AddrIndex], &Addr) && IROp->Args[OffsetIndex].IsInvalid()) { - for (auto& Const: AddressgenConsts) { + for (auto& Const : AddressgenConsts) { if ((Addr - Const.second) < 65536) { IREmit->ReplaceNodeArgument(CodeNode, AddrIndex, Const.first); IREmit->ReplaceNodeArgument(CodeNode, OffsetIndex, IREmit->_Constant(Addr - Const.second)); @@ -295,8 +300,7 @@ void ConstProp::LoadMemStoreMemImmediatePooling(IREmitter *IREmit, const IRListV AddressgenConsts[IREmit->UnwrapNode(IROp->Args[AddrIndex])] = Addr; } - doneOp: - ; +doneOp:; } IREmit->SetWriteCursor(CodeNode); } @@ -304,348 +308,336 @@ void ConstProp::LoadMemStoreMemImmediatePooling(IREmitter *IREmit, const IRListV } } -bool ConstProp::ZextAndMaskingElimination(IREmitter *IREmit, const IRListView& CurrentIR, - OrderedNode* CodeNode, IROp_Header* IROp) { +bool ConstProp::ZextAndMaskingElimination(IREmitter* IREmit, const IRListView& CurrentIR, OrderedNode* CodeNode, IROp_Header* IROp) { bool Changed = false; switch (IROp->Op) { - // Generic handling - case OP_OR: - case OP_XOR: - case OP_NOT: - case OP_ADD: - case OP_SUB: - case OP_MUL: - case OP_UMUL: - case OP_DIV: - case OP_UDIV: - case OP_LSHR: - case OP_ASHR: - case OP_LSHL: - case OP_ROR: { - for (int i = 0; i < IR::GetArgs(IROp->Op); i++) { - auto newArg = RemoveUselessMasking(IREmit, IROp->Args[i], getMask(IROp)); - if (newArg.ID() != IROp->Args[i].ID()) { - IREmit->ReplaceNodeArgument(CodeNode, i, IREmit->UnwrapNode(newArg)); - Changed = true; - } + // Generic handling + case OP_OR: + case OP_XOR: + case OP_NOT: + case OP_ADD: + case OP_SUB: + case OP_MUL: + case OP_UMUL: + case OP_DIV: + case OP_UDIV: + case OP_LSHR: + case OP_ASHR: + case OP_LSHL: + case OP_ROR: { + for (int i = 0; i < IR::GetArgs(IROp->Op); i++) { + auto newArg = RemoveUselessMasking(IREmit, IROp->Args[i], getMask(IROp)); + if (newArg.ID() != IROp->Args[i].ID()) { + IREmit->ReplaceNodeArgument(CodeNode, i, IREmit->UnwrapNode(newArg)); + Changed = true; } - break; } - case OP_AND: { - // if AND's arguments are imms, they are masking - for (int i = 0; i < IR::GetArgs(IROp->Op); i++) { - uint64_t imm = 0; - if (!IREmit->IsValueConstant(IROp->Args[i^1], &imm)) - continue; + break; + } + case OP_AND: { + // if AND's arguments are imms, they are masking + for (int i = 0; i < IR::GetArgs(IROp->Op); i++) { + uint64_t imm = 0; + if (!IREmit->IsValueConstant(IROp->Args[i ^ 1], &imm)) { + continue; + } - auto newArg = RemoveUselessMasking(IREmit, IROp->Args[i], imm); + auto newArg = RemoveUselessMasking(IREmit, IROp->Args[i], imm); - if (newArg.ID() != IROp->Args[i].ID()) { - IREmit->ReplaceNodeArgument(CodeNode, i, IREmit->UnwrapNode(newArg)); - Changed = true; - } + if (newArg.ID() != IROp->Args[i].ID()) { + IREmit->ReplaceNodeArgument(CodeNode, i, IREmit->UnwrapNode(newArg)); + Changed = true; } + } + break; + } + + case OP_BFE: { + auto Op = IROp->C(); + + // Is this value already BFE'd? + if (IsBfeAlreadyDone(IREmit, Op->Src, Op->Width)) { + IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(Op->Src)); + // printf("Removed BFE once \n"); break; } - case OP_BFE: { - auto Op = IROp->C(); + // Is this value already ZEXT'd? + if (Op->lsb == 0) { + // LoadMem, LoadMemTSO & LoadContext ZExt + auto source = Op->Src; + auto sourceHeader = IREmit->GetOpHeader(source); - // Is this value already BFE'd? - if (IsBfeAlreadyDone(IREmit, Op->Src, Op->Width)) { - IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(Op->Src)); - //printf("Removed BFE once \n"); + if (Op->Width >= (sourceHeader->Size * 8) && + (sourceHeader->Op == OP_LOADMEM || sourceHeader->Op == OP_LOADMEMTSO || sourceHeader->Op == OP_LOADCONTEXT)) { + // printf("Eliminated needless zext bfe\n"); + // Load mem / load ctx zexts, no need to vmem + IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(source)); break; } + } - // Is this value already ZEXT'd? - if (Op->lsb == 0) { - //LoadMem, LoadMemTSO & LoadContext ZExt - auto source = Op->Src; - auto sourceHeader = IREmit->GetOpHeader(source); - - if (Op->Width >= (sourceHeader->Size*8) && - (sourceHeader->Op == OP_LOADMEM || sourceHeader->Op == OP_LOADMEMTSO || sourceHeader->Op == OP_LOADCONTEXT) - ) { - //printf("Eliminated needless zext bfe\n"); - // Load mem / load ctx zexts, no need to vmem - IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(source)); - break; - } - } - - // BFE does implicit masking, remove any masks leading to this, if possible - uint64_t imm = 1ULL << (Op->Width-1); - imm = (imm-1) *2 + 1; - imm <<= Op->lsb; + // BFE does implicit masking, remove any masks leading to this, if possible + uint64_t imm = 1ULL << (Op->Width - 1); + imm = (imm - 1) * 2 + 1; + imm <<= Op->lsb; - auto newArg = RemoveUselessMasking(IREmit, Op->Src, imm); + auto newArg = RemoveUselessMasking(IREmit, Op->Src, imm); - if (newArg.ID() != Op->Src.ID()) { - IREmit->ReplaceNodeArgument(CodeNode, Op->Src_Index, IREmit->UnwrapNode(newArg)); - Changed = true; - } - break; + if (newArg.ID() != Op->Src.ID()) { + IREmit->ReplaceNodeArgument(CodeNode, Op->Src_Index, IREmit->UnwrapNode(newArg)); + Changed = true; } + break; + } - case OP_SBFE: { - auto Op = IROp->C(); + case OP_SBFE: { + auto Op = IROp->C(); - // BFE does implicit masking - uint64_t imm = 1ULL << (Op->Width-1); - imm = (imm-1) *2 + 1; - imm <<= Op->lsb; + // BFE does implicit masking + uint64_t imm = 1ULL << (Op->Width - 1); + imm = (imm - 1) * 2 + 1; + imm <<= Op->lsb; - auto newArg = RemoveUselessMasking(IREmit, Op->Src, imm); + auto newArg = RemoveUselessMasking(IREmit, Op->Src, imm); - if (newArg.ID() != Op->Src.ID()) { - IREmit->ReplaceNodeArgument(CodeNode, Op->Src_Index, IREmit->UnwrapNode(newArg)); - Changed = true; - } - break; + if (newArg.ID() != Op->Src.ID()) { + IREmit->ReplaceNodeArgument(CodeNode, Op->Src_Index, IREmit->UnwrapNode(newArg)); + Changed = true; } + break; + } - case OP_VMOV: { - // elim from load mem - auto source = IROp->Args[0]; - auto sourceHeader = IREmit->GetOpHeader(source); - - if (IROp->Size >= sourceHeader->Size && - (sourceHeader->Op == OP_LOADMEM || sourceHeader->Op == OP_LOADMEMTSO || sourceHeader->Op == OP_LOADCONTEXT) - ) { - //printf("Eliminated needless zext VMOV\n"); - // Load mem / load ctx zexts, no need to vmem - IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(source)); - } else if (IROp->Size == sourceHeader->Size) { - // VMOV of same size - // XXX: This is unsafe of an optimization since in some cases we can't see through garbage data in the upper bits of a vector - // RCLSE generates VMOV instructions which are being used as a zero extension - //printf("printf vmov of same size?!\n"); - //IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(source)); - } - break; + case OP_VMOV: { + // elim from load mem + auto source = IROp->Args[0]; + auto sourceHeader = IREmit->GetOpHeader(source); + + if (IROp->Size >= sourceHeader->Size && + (sourceHeader->Op == OP_LOADMEM || sourceHeader->Op == OP_LOADMEMTSO || sourceHeader->Op == OP_LOADCONTEXT)) { + // printf("Eliminated needless zext VMOV\n"); + // Load mem / load ctx zexts, no need to vmem + IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(source)); + } else if (IROp->Size == sourceHeader->Size) { + // VMOV of same size + // XXX: This is unsafe of an optimization since in some cases we can't see through garbage data in the upper bits of a vector + // RCLSE generates VMOV instructions which are being used as a zero extension + // printf("printf vmov of same size?!\n"); + // IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(source)); } - default: - break; + break; + } + default: break; } return Changed; } // constprop + some more per instruction logic -bool ConstProp::ConstantPropagation(IREmitter *IREmit, const IRListView& CurrentIR, - OrderedNode* CodeNode, IROp_Header* IROp) { - bool Changed = false; +bool ConstProp::ConstantPropagation(IREmitter* IREmit, const IRListView& CurrentIR, OrderedNode* CodeNode, IROp_Header* IROp) { + bool Changed = false; - switch (IROp->Op) { -/* - case OP_UMUL: - case OP_DIV: - case OP_UDIV: - case OP_REM: - case OP_UREM: - case OP_MULH: - case OP_UMULH: - case OP_LSHR: - case OP_ASHR: - case OP_ROL: - case OP_ROR: - case OP_LDIV: - case OP_LUDIV: - case OP_LREM: - case OP_LUREM: - case OP_BFI: - { - uint64_t Constant1; - uint64_t Constant2; + switch (IROp->Op) { + /* + case OP_UMUL: + case OP_DIV: + case OP_UDIV: + case OP_REM: + case OP_UREM: + case OP_MULH: + case OP_UMULH: + case OP_LSHR: + case OP_ASHR: + case OP_ROL: + case OP_ROR: + case OP_LDIV: + case OP_LUDIV: + case OP_LREM: + case OP_LUREM: + case OP_BFI: + { + uint64_t Constant1; + uint64_t Constant2; - if (IREmit->IsValueConstant(IROp->Args[0], &Constant1) && - IREmit->IsValueConstant(IROp->Args[1], &Constant2)) { - LOGMAN_MSG_A_FMT("Could const prop op: {}", IR::GetName(IROp->Op)); - } - break; - } + if (IREmit->IsValueConstant(IROp->Args[0], &Constant1) && + IREmit->IsValueConstant(IROp->Args[1], &Constant2)) { + LOGMAN_MSG_A_FMT("Could const prop op: {}", IR::GetName(IROp->Op)); + } + break; + } - case OP_SEXT: - case OP_NEG: - case OP_POPCOUNT: - case OP_FINDLSB: - case OP_FINDMSB: - case OP_REV: - case OP_SBFE: { - uint64_t Constant1; - - if (IREmit->IsValueConstant(IROp->Args[0], &Constant1)) { - LOGMAN_MSG_A_FMT("Could const prop op: {}", IR::GetName(IROp->Op)); - } - break; - } -*/ + case OP_SEXT: + case OP_NEG: + case OP_POPCOUNT: + case OP_FINDLSB: + case OP_FINDMSB: + case OP_REV: + case OP_SBFE: { + uint64_t Constant1; + + if (IREmit->IsValueConstant(IROp->Args[0], &Constant1)) { + LOGMAN_MSG_A_FMT("Could const prop op: {}", IR::GetName(IROp->Op)); + } + break; + } + */ - case OP_LOADMEMTSO: { - auto Op = IROp->CW(); - auto AddressHeader = IREmit->GetOpHeader(Op->Addr); + case OP_LOADMEMTSO: { + auto Op = IROp->CW(); + auto AddressHeader = IREmit->GetOpHeader(Op->Addr); - if (Op->Class == FEXCore::IR::FPRClass && AddressHeader->Op == OP_ADD && AddressHeader->Size == 8) { - // TODO: LRCPC3 supports a vector unscaled offset like LRCPC2. - // Support once hardware is available to use this. - auto [OffsetType, OffsetScale, Arg0, Arg1] = MemExtendedAddressing(IREmit, IROp->Size, AddressHeader); + if (Op->Class == FEXCore::IR::FPRClass && AddressHeader->Op == OP_ADD && AddressHeader->Size == 8) { + // TODO: LRCPC3 supports a vector unscaled offset like LRCPC2. + // Support once hardware is available to use this. + auto [OffsetType, OffsetScale, Arg0, Arg1] = MemExtendedAddressing(IREmit, IROp->Size, AddressHeader); - Op->OffsetType = OffsetType; - Op->OffsetScale = OffsetScale; - IREmit->ReplaceNodeArgument(CodeNode, Op->Addr_Index, Arg0); // Addr - IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, Arg1); // Offset + Op->OffsetType = OffsetType; + Op->OffsetScale = OffsetScale; + IREmit->ReplaceNodeArgument(CodeNode, Op->Addr_Index, Arg0); // Addr + IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, Arg1); // Offset - Changed = true; - } - break; + Changed = true; } + break; + } - case OP_STOREMEMTSO: { - auto Op = IROp->CW(); - auto AddressHeader = IREmit->GetOpHeader(Op->Addr); + case OP_STOREMEMTSO: { + auto Op = IROp->CW(); + auto AddressHeader = IREmit->GetOpHeader(Op->Addr); - if (Op->Class == FEXCore::IR::FPRClass && AddressHeader->Op == OP_ADD && AddressHeader->Size == 8) { - // TODO: LRCPC3 supports a vector unscaled offset like LRCPC2. - // Support once hardware is available to use this. - auto [OffsetType, OffsetScale, Arg0, Arg1] = MemExtendedAddressing(IREmit, IROp->Size, AddressHeader); + if (Op->Class == FEXCore::IR::FPRClass && AddressHeader->Op == OP_ADD && AddressHeader->Size == 8) { + // TODO: LRCPC3 supports a vector unscaled offset like LRCPC2. + // Support once hardware is available to use this. + auto [OffsetType, OffsetScale, Arg0, Arg1] = MemExtendedAddressing(IREmit, IROp->Size, AddressHeader); - Op->OffsetType = OffsetType; - Op->OffsetScale = OffsetScale; - IREmit->ReplaceNodeArgument(CodeNode, Op->Addr_Index, Arg0); // Addr - IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, Arg1); // Offset + Op->OffsetType = OffsetType; + Op->OffsetScale = OffsetScale; + IREmit->ReplaceNodeArgument(CodeNode, Op->Addr_Index, Arg0); // Addr + IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, Arg1); // Offset - Changed = true; - } - break; + Changed = true; } + break; + } - case OP_LOADMEM: { - auto Op = IROp->CW(); - auto AddressHeader = IREmit->GetOpHeader(Op->Addr); + case OP_LOADMEM: { + auto Op = IROp->CW(); + auto AddressHeader = IREmit->GetOpHeader(Op->Addr); - if (AddressHeader->Op == OP_ADD && AddressHeader->Size == 8) { - auto [OffsetType, OffsetScale, Arg0, Arg1] = MemExtendedAddressing(IREmit, IROp->Size, AddressHeader); + if (AddressHeader->Op == OP_ADD && AddressHeader->Size == 8) { + auto [OffsetType, OffsetScale, Arg0, Arg1] = MemExtendedAddressing(IREmit, IROp->Size, AddressHeader); - Op->OffsetType = OffsetType; - Op->OffsetScale = OffsetScale; - IREmit->ReplaceNodeArgument(CodeNode, Op->Addr_Index, Arg0); // Addr - IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, Arg1); // Offset + Op->OffsetType = OffsetType; + Op->OffsetScale = OffsetScale; + IREmit->ReplaceNodeArgument(CodeNode, Op->Addr_Index, Arg0); // Addr + IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, Arg1); // Offset - Changed = true; - } - break; + Changed = true; } + break; + } - case OP_STOREMEM: { - auto Op = IROp->CW(); - auto AddressHeader = IREmit->GetOpHeader(Op->Addr); + case OP_STOREMEM: { + auto Op = IROp->CW(); + auto AddressHeader = IREmit->GetOpHeader(Op->Addr); - if (AddressHeader->Op == OP_ADD && AddressHeader->Size == 8) { - auto [OffsetType, OffsetScale, Arg0, Arg1] = MemExtendedAddressing(IREmit, IROp->Size, AddressHeader); + if (AddressHeader->Op == OP_ADD && AddressHeader->Size == 8) { + auto [OffsetType, OffsetScale, Arg0, Arg1] = MemExtendedAddressing(IREmit, IROp->Size, AddressHeader); - Op->OffsetType = OffsetType; - Op->OffsetScale = OffsetScale; - IREmit->ReplaceNodeArgument(CodeNode, Op->Addr_Index, Arg0); // Addr - IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, Arg1); // Offset + Op->OffsetType = OffsetType; + Op->OffsetScale = OffsetScale; + IREmit->ReplaceNodeArgument(CodeNode, Op->Addr_Index, Arg0); // Addr + IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, Arg1); // Offset - Changed = true; - } - break; + Changed = true; } + break; + } - case OP_ADD: { - auto Op = IROp->C(); - uint64_t Constant1{}; - uint64_t Constant2{}; - bool IsConstant1 = IREmit->IsValueConstant(Op->Header.Args[0], &Constant1); - bool IsConstant2 = IREmit->IsValueConstant(Op->Header.Args[1], &Constant2); + case OP_ADD: { + auto Op = IROp->C(); + uint64_t Constant1 {}; + uint64_t Constant2 {}; + bool IsConstant1 = IREmit->IsValueConstant(Op->Header.Args[0], &Constant1); + bool IsConstant2 = IREmit->IsValueConstant(Op->Header.Args[1], &Constant2); - if (IsConstant1 && IsConstant2) { - uint64_t NewConstant = (Constant1 + Constant2) & getMask(Op) ; - IREmit->ReplaceWithConstant(CodeNode, NewConstant); - Changed = true; - } - else if (IsConstant2 && !IsImmAddSub(Constant2) && IsImmAddSub(-Constant2)) { - // If the second argument is constant, the immediate is not ImmAddSub, but when negated is. - // This means we can convert the operation in to a subtract. - // Change the IR operation itself. - IROp->Op = OP_SUB; - // Set the write cursor to just before this operation. - auto CodeIter = CurrentIR.at(CodeNode); - --CodeIter; - IREmit->SetWriteCursor(std::get<0>(*CodeIter)); - - // Negate the constant. - auto NegConstant = IREmit->_Constant(-Constant2); - - // Replace the second source with the negated constant. - IREmit->ReplaceNodeArgument(CodeNode, Op->Src2_Index, NegConstant); - Changed = true; - } - break; + if (IsConstant1 && IsConstant2) { + uint64_t NewConstant = (Constant1 + Constant2) & getMask(Op); + IREmit->ReplaceWithConstant(CodeNode, NewConstant); + Changed = true; + } else if (IsConstant2 && !IsImmAddSub(Constant2) && IsImmAddSub(-Constant2)) { + // If the second argument is constant, the immediate is not ImmAddSub, but when negated is. + // This means we can convert the operation in to a subtract. + // Change the IR operation itself. + IROp->Op = OP_SUB; + // Set the write cursor to just before this operation. + auto CodeIter = CurrentIR.at(CodeNode); + --CodeIter; + IREmit->SetWriteCursor(std::get<0>(*CodeIter)); + + // Negate the constant. + auto NegConstant = IREmit->_Constant(-Constant2); + + // Replace the second source with the negated constant. + IREmit->ReplaceNodeArgument(CodeNode, Op->Src2_Index, NegConstant); + Changed = true; } - case OP_SUB: { - auto Op = IROp->C(); - uint64_t Constant1{}; - uint64_t Constant2{}; - - if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && - IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { - uint64_t NewConstant = (Constant1 - Constant2) & getMask(Op) ; - IREmit->ReplaceWithConstant(CodeNode, NewConstant); - Changed = true; - } break; + } + case OP_SUB: { + auto Op = IROp->C(); + uint64_t Constant1 {}; + uint64_t Constant2 {}; + + if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { + uint64_t NewConstant = (Constant1 - Constant2) & getMask(Op); + IREmit->ReplaceWithConstant(CodeNode, NewConstant); + Changed = true; } - case OP_SUBSHIFT: { - auto Op = IROp->C(); - - uint64_t Constant1, Constant2; - if (IREmit->IsValueConstant(IROp->Args[0], &Constant1) && - IREmit->IsValueConstant(IROp->Args[1], &Constant2) && - Op->Shift == IR::ShiftType::LSL) { - // Optimize the LSL case when we know both sources are constant. - // This is a pattern that shows up with direction flag calculations if DF was set just before the operation. - uint64_t NewConstant = (Constant1 - (Constant2 << Op->ShiftAmount)) & getMask(Op); - IREmit->ReplaceWithConstant(CodeNode, NewConstant); - Changed = true; - } - break; + break; + } + case OP_SUBSHIFT: { + auto Op = IROp->C(); + + uint64_t Constant1, Constant2; + if (IREmit->IsValueConstant(IROp->Args[0], &Constant1) && IREmit->IsValueConstant(IROp->Args[1], &Constant2) && + Op->Shift == IR::ShiftType::LSL) { + // Optimize the LSL case when we know both sources are constant. + // This is a pattern that shows up with direction flag calculations if DF was set just before the operation. + uint64_t NewConstant = (Constant1 - (Constant2 << Op->ShiftAmount)) & getMask(Op); + IREmit->ReplaceWithConstant(CodeNode, NewConstant); + Changed = true; } - case OP_AND: { - auto Op = IROp->CW(); - uint64_t Constant1{}; - uint64_t Constant2{}; + break; + } + case OP_AND: { + auto Op = IROp->CW(); + uint64_t Constant1 {}; + uint64_t Constant2 {}; + + if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { + uint64_t NewConstant = (Constant1 & Constant2) & getMask(Op); + IREmit->ReplaceWithConstant(CodeNode, NewConstant); + Changed = true; + } else if (Constant2 == 1) { + // happens from flag calcs + auto val = IREmit->GetOpHeader(Op->Header.Args[0]); - if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && - IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { - uint64_t NewConstant = (Constant1 & Constant2) & getMask(Op) ; - IREmit->ReplaceWithConstant(CodeNode, NewConstant); - Changed = true; - } else if (Constant2 == 1) { - // happens from flag calcs - auto val = IREmit->GetOpHeader(Op->Header.Args[0]); - - uint64_t Constant3; - if (val->Op == OP_SELECT && - IREmit->IsValueConstant(val->Args[2], &Constant2) && - IREmit->IsValueConstant(val->Args[3], &Constant3) && - Constant2 == 1 && - Constant3 == 0) - { - IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(Op->Header.Args[0])); - Changed = true; - } - } else if (Op->Header.Args[0].ID() == Op->Header.Args[1].ID()) { - // AND with same value results in original value + uint64_t Constant3; + if (val->Op == OP_SELECT && IREmit->IsValueConstant(val->Args[2], &Constant2) && IREmit->IsValueConstant(val->Args[3], &Constant3) && + Constant2 == 1 && Constant3 == 0) { IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(Op->Header.Args[0])); Changed = true; } - break; + } else if (Op->Header.Args[0].ID() == Op->Header.Args[1].ID()) { + // AND with same value results in original value + IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(Op->Header.Args[0])); + Changed = true; } - /* TODO: restore this when we have rmif or something? */ + break; + } + /* TODO: restore this when we have rmif or something? */ #if 0 case OP_TESTNZ: { auto Op = IROp->CW(); @@ -662,353 +654,305 @@ bool ConstProp::ConstantPropagation(IREmitter *IREmit, const IRListView& Current break; } #endif - case OP_OR: { - auto Op = IROp->CW(); - uint64_t Constant1{}; - uint64_t Constant2{}; - - if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && - IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { - uint64_t NewConstant = Constant1 | Constant2; - IREmit->ReplaceWithConstant(CodeNode, NewConstant); - Changed = true; - } else if (Op->Header.Args[0].ID() == Op->Header.Args[1].ID()) { - // OR with same value results in original value - IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(Op->Header.Args[0])); - Changed = true; - } - break; + case OP_OR: { + auto Op = IROp->CW(); + uint64_t Constant1 {}; + uint64_t Constant2 {}; + + if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { + uint64_t NewConstant = Constant1 | Constant2; + IREmit->ReplaceWithConstant(CodeNode, NewConstant); + Changed = true; + } else if (Op->Header.Args[0].ID() == Op->Header.Args[1].ID()) { + // OR with same value results in original value + IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(Op->Header.Args[0])); + Changed = true; } - case OP_ORLSHL: { - auto Op = IROp->CW(); - uint64_t Constant1{}; - uint64_t Constant2{}; - - if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && - IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { - uint64_t NewConstant = Constant1 | (Constant2 << Op->BitShift); - IREmit->ReplaceWithConstant(CodeNode, NewConstant); - Changed = true; - } break; + } + case OP_ORLSHL: { + auto Op = IROp->CW(); + uint64_t Constant1 {}; + uint64_t Constant2 {}; + + if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { + uint64_t NewConstant = Constant1 | (Constant2 << Op->BitShift); + IREmit->ReplaceWithConstant(CodeNode, NewConstant); + Changed = true; } - case OP_ORLSHR: { - auto Op = IROp->CW(); - uint64_t Constant1{}; - uint64_t Constant2{}; - - if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && - IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { - uint64_t NewConstant = Constant1 | (Constant2 >> Op->BitShift); - IREmit->ReplaceWithConstant(CodeNode, NewConstant); - Changed = true; - } break; + } + case OP_ORLSHR: { + auto Op = IROp->CW(); + uint64_t Constant1 {}; + uint64_t Constant2 {}; + + if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { + uint64_t NewConstant = Constant1 | (Constant2 >> Op->BitShift); + IREmit->ReplaceWithConstant(CodeNode, NewConstant); + Changed = true; } - case OP_XOR: { - auto Op = IROp->C(); - uint64_t Constant1{}; - uint64_t Constant2{}; - - if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && - IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { - uint64_t NewConstant = Constant1 ^ Constant2; - IREmit->ReplaceWithConstant(CodeNode, NewConstant); - Changed = true; - } else if (Op->Header.Args[0].ID() == Op->Header.Args[1].ID()) { - // XOR with same value results to zero - IREmit->SetWriteCursor(CodeNode); - IREmit->ReplaceAllUsesWith(CodeNode, IREmit->_Constant(0)); - Changed = true; - } else { - // XOR with zero results in the nonzero source - for (unsigned i = 0; i < 2; ++i) { - if (!IREmit->IsValueConstant(Op->Header.Args[i], &Constant1)) - continue; - - if (Constant1 != 0) - continue; + break; + } + case OP_XOR: { + auto Op = IROp->C(); + uint64_t Constant1 {}; + uint64_t Constant2 {}; + + if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { + uint64_t NewConstant = Constant1 ^ Constant2; + IREmit->ReplaceWithConstant(CodeNode, NewConstant); + Changed = true; + } else if (Op->Header.Args[0].ID() == Op->Header.Args[1].ID()) { + // XOR with same value results to zero + IREmit->SetWriteCursor(CodeNode); + IREmit->ReplaceAllUsesWith(CodeNode, IREmit->_Constant(0)); + Changed = true; + } else { + // XOR with zero results in the nonzero source + for (unsigned i = 0; i < 2; ++i) { + if (!IREmit->IsValueConstant(Op->Header.Args[i], &Constant1)) { + continue; + } - IREmit->SetWriteCursor(CodeNode); - OrderedNode *Arg = CurrentIR.GetNode(Op->Header.Args[1 - i]); - IREmit->ReplaceAllUsesWith(CodeNode, Arg); - Changed = true; - break; + if (Constant1 != 0) { + continue; } - } - break; - } - case OP_LSHL: { - auto Op = IROp->CW(); - uint64_t Constant1{}; - uint64_t Constant2{}; - - if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && - IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { - // Shifts mask the shift amount by 63 or 31 depending on operating size; - uint64_t ShiftMask = IROp->Size == 8 ? 63 : 31; - uint64_t NewConstant = (Constant1 << (Constant2 & ShiftMask)) & getMask(Op); - IREmit->ReplaceWithConstant(CodeNode, NewConstant); - Changed = true; - } - else if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant2) && - Constant2 == 0) { + IREmit->SetWriteCursor(CodeNode); - OrderedNode *Arg = CurrentIR.GetNode(Op->Header.Args[0]); + OrderedNode* Arg = CurrentIR.GetNode(Op->Header.Args[1 - i]); IREmit->ReplaceAllUsesWith(CodeNode, Arg); Changed = true; - } else { - auto newArg = RemoveUselessMasking(IREmit, Op->Header.Args[1], IROp->Size * 8 - 1); - if (newArg.ID() != Op->Header.Args[1].ID()) { - IREmit->ReplaceNodeArgument(CodeNode, 1, IREmit->UnwrapNode(newArg)); - Changed = true; - } + break; } - break; } - case OP_LSHR: { - auto Op = IROp->CW(); - uint64_t Constant1{}; - uint64_t Constant2{}; - - if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && - IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { - // Shifts mask the shift amount by 63 or 31 depending on operating size; - uint64_t ShiftMask = IROp->Size == 8 ? 63 : 31; - uint64_t NewConstant = (Constant1 >> (Constant2 & ShiftMask)) & getMask(Op); - IREmit->ReplaceWithConstant(CodeNode, NewConstant); + break; + } + case OP_LSHL: { + auto Op = IROp->CW(); + uint64_t Constant1 {}; + uint64_t Constant2 {}; + + if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { + // Shifts mask the shift amount by 63 or 31 depending on operating size; + uint64_t ShiftMask = IROp->Size == 8 ? 63 : 31; + uint64_t NewConstant = (Constant1 << (Constant2 & ShiftMask)) & getMask(Op); + IREmit->ReplaceWithConstant(CodeNode, NewConstant); + Changed = true; + } else if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant2) && Constant2 == 0) { + IREmit->SetWriteCursor(CodeNode); + OrderedNode* Arg = CurrentIR.GetNode(Op->Header.Args[0]); + IREmit->ReplaceAllUsesWith(CodeNode, Arg); + Changed = true; + } else { + auto newArg = RemoveUselessMasking(IREmit, Op->Header.Args[1], IROp->Size * 8 - 1); + if (newArg.ID() != Op->Header.Args[1].ID()) { + IREmit->ReplaceNodeArgument(CodeNode, 1, IREmit->UnwrapNode(newArg)); Changed = true; } - else if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant2) && - Constant2 == 0) { - IREmit->SetWriteCursor(CodeNode); - OrderedNode *Arg = CurrentIR.GetNode(Op->Header.Args[0]); - IREmit->ReplaceAllUsesWith(CodeNode, Arg); + } + break; + } + case OP_LSHR: { + auto Op = IROp->CW(); + uint64_t Constant1 {}; + uint64_t Constant2 {}; + + if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { + // Shifts mask the shift amount by 63 or 31 depending on operating size; + uint64_t ShiftMask = IROp->Size == 8 ? 63 : 31; + uint64_t NewConstant = (Constant1 >> (Constant2 & ShiftMask)) & getMask(Op); + IREmit->ReplaceWithConstant(CodeNode, NewConstant); + Changed = true; + } else if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant2) && Constant2 == 0) { + IREmit->SetWriteCursor(CodeNode); + OrderedNode* Arg = CurrentIR.GetNode(Op->Header.Args[0]); + IREmit->ReplaceAllUsesWith(CodeNode, Arg); + Changed = true; + } else { + auto newArg = RemoveUselessMasking(IREmit, Op->Header.Args[1], IROp->Size * 8 - 1); + if (newArg.ID() != Op->Header.Args[1].ID()) { + IREmit->ReplaceNodeArgument(CodeNode, 1, IREmit->UnwrapNode(newArg)); Changed = true; - } else { - auto newArg = RemoveUselessMasking(IREmit, Op->Header.Args[1], IROp->Size * 8 - 1); - if (newArg.ID() != Op->Header.Args[1].ID()) { - IREmit->ReplaceNodeArgument(CodeNode, 1, IREmit->UnwrapNode(newArg)); - Changed = true; - } } - break; } - case OP_BFE: { - auto Op = IROp->C(); - uint64_t Constant; - if (IROp->Size <= 8 && IREmit->IsValueConstant(Op->Src, &Constant)) { - uint64_t SourceMask = Op->Width == 64 ? ~0ULL : ((1ULL << Op->Width) - 1); - SourceMask <<= Op->lsb; - - uint64_t NewConstant = (Constant & SourceMask) >> Op->lsb; - IREmit->ReplaceWithConstant(CodeNode, NewConstant); + break; + } + case OP_BFE: { + auto Op = IROp->C(); + uint64_t Constant; + if (IROp->Size <= 8 && IREmit->IsValueConstant(Op->Src, &Constant)) { + uint64_t SourceMask = Op->Width == 64 ? ~0ULL : ((1ULL << Op->Width) - 1); + SourceMask <<= Op->lsb; + + uint64_t NewConstant = (Constant & SourceMask) >> Op->lsb; + IREmit->ReplaceWithConstant(CodeNode, NewConstant); + Changed = true; + } else if (IROp->Size == CurrentIR.GetOp(Op->Header.Args[0])->Size && Op->Width == (IROp->Size * 8) && Op->lsb == 0) { + // A BFE that extracts all bits results in original value + // XXX - This is broken for now - see https://github.com/FEX-Emu/FEX/issues/351 + // IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(Op->Header.Args[0])); + // Changed = true; + } else if (Op->Width == 1 && Op->lsb == 0) { + // common from flag codegen + auto val = IREmit->GetOpHeader(Op->Header.Args[0]); + + uint64_t Constant2 {}; + uint64_t Constant3 {}; + if (val->Op == OP_SELECT && IREmit->IsValueConstant(val->Args[2], &Constant2) && IREmit->IsValueConstant(val->Args[3], &Constant3) && + Constant2 == 1 && Constant3 == 0) { + IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(Op->Header.Args[0])); Changed = true; - } else if (IROp->Size == CurrentIR.GetOp(Op->Header.Args[0])->Size && Op->Width == (IROp->Size * 8) && Op->lsb == 0 ) { - // A BFE that extracts all bits results in original value - // XXX - This is broken for now - see https://github.com/FEX-Emu/FEX/issues/351 - // IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(Op->Header.Args[0])); - // Changed = true; - } else if (Op->Width == 1 && Op->lsb == 0) { - // common from flag codegen - auto val = IREmit->GetOpHeader(Op->Header.Args[0]); - - uint64_t Constant2{}; - uint64_t Constant3{}; - if (val->Op == OP_SELECT && - IREmit->IsValueConstant(val->Args[2], &Constant2) && - IREmit->IsValueConstant(val->Args[3], &Constant3) && - Constant2 == 1 && - Constant3 == 0) - { - IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(Op->Header.Args[0])); - Changed = true; - } } + } break; + } + case OP_SBFE: { + auto Op = IROp->C(); + uint64_t Constant; + if (IREmit->IsValueConstant(Op->Src, &Constant)) { + // SBFE of a constant can be converted to a constant. + uint64_t SourceMask = Op->Width == 64 ? ~0ULL : ((1ULL << Op->Width) - 1); + uint64_t DestSizeInBits = IROp->Size * 8; + uint64_t DestMask = DestSizeInBits == 64 ? ~0ULL : ((1ULL << DestSizeInBits) - 1); + SourceMask <<= Op->lsb; + + int64_t NewConstant = (Constant & SourceMask) >> Op->lsb; + NewConstant <<= 64 - Op->Width; + NewConstant >>= 64 - Op->Width; + NewConstant &= DestMask; + IREmit->ReplaceWithConstant(CodeNode, NewConstant); + + Changed = true; } - case OP_SBFE: { - auto Op = IROp->C(); - uint64_t Constant; - if (IREmit->IsValueConstant(Op->Src, &Constant)) { - // SBFE of a constant can be converted to a constant. - uint64_t SourceMask = - Op->Width == 64 ? ~0ULL : ((1ULL << Op->Width) - 1); - uint64_t DestSizeInBits = IROp->Size * 8; - uint64_t DestMask = - DestSizeInBits == 64 ? ~0ULL : ((1ULL << DestSizeInBits) - 1); - SourceMask <<= Op->lsb; - - int64_t NewConstant = (Constant & SourceMask) >> Op->lsb; - NewConstant <<= 64 - Op->Width; - NewConstant >>= 64 - Op->Width; - NewConstant &= DestMask; - IREmit->ReplaceWithConstant(CodeNode, NewConstant); + break; + } + case OP_BFI: { + auto Op = IROp->C(); + uint64_t ConstantDest {}; + uint64_t ConstantSrc {}; + bool DestIsConstant = IREmit->IsValueConstant(Op->Header.Args[0], &ConstantDest); + bool SrcIsConstant = IREmit->IsValueConstant(Op->Header.Args[1], &ConstantSrc); + + if (DestIsConstant && SrcIsConstant) { + uint64_t SourceMask = Op->Width == 64 ? ~0ULL : ((1ULL << Op->Width) - 1); + uint64_t NewConstant = ConstantDest & ~(SourceMask << Op->lsb); + NewConstant |= (ConstantSrc & SourceMask) << Op->lsb; + + IREmit->ReplaceWithConstant(CodeNode, NewConstant); + Changed = true; + } else if (SrcIsConstant && HasConsecutiveBits(ConstantSrc, Op->Width)) { + // We are trying to insert constant, if it is a bitfield of only set bits then we can orr or and it. + IREmit->SetWriteCursor(CodeNode); + uint64_t SourceMask = Op->Width == 64 ? ~0ULL : ((1ULL << Op->Width) - 1); + uint64_t NewConstant = SourceMask << Op->lsb; + if (ConstantSrc & 1) { + auto orr = IREmit->_Or(IR::SizeToOpSize(IROp->Size), CurrentIR.GetNode(Op->Header.Args[0]), IREmit->_Constant(NewConstant)); + IREmit->ReplaceAllUsesWith(CodeNode, orr); Changed = true; - } - break; - } - case OP_BFI: { - auto Op = IROp->C(); - uint64_t ConstantDest{}; - uint64_t ConstantSrc{}; - bool DestIsConstant = IREmit->IsValueConstant(Op->Header.Args[0], &ConstantDest); - bool SrcIsConstant = IREmit->IsValueConstant(Op->Header.Args[1], &ConstantSrc); - - if (DestIsConstant && SrcIsConstant) { - uint64_t SourceMask = Op->Width == 64 ? ~0ULL : ((1ULL << Op->Width) - 1); - uint64_t NewConstant = ConstantDest & ~(SourceMask << Op->lsb); - NewConstant |= (ConstantSrc & SourceMask) << Op->lsb; - - IREmit->ReplaceWithConstant(CodeNode, NewConstant); + } else { + // We are wanting to clear the bitfield. + auto andn = IREmit->_Andn(IR::SizeToOpSize(IROp->Size), CurrentIR.GetNode(Op->Header.Args[0]), IREmit->_Constant(NewConstant)); + IREmit->ReplaceAllUsesWith(CodeNode, andn); Changed = true; } - else if (SrcIsConstant && HasConsecutiveBits(ConstantSrc, Op->Width)) { - // We are trying to insert constant, if it is a bitfield of only set bits then we can orr or and it. - IREmit->SetWriteCursor(CodeNode); - uint64_t SourceMask = Op->Width == 64 ? ~0ULL : ((1ULL << Op->Width) - 1); - uint64_t NewConstant = SourceMask << Op->lsb; - - if (ConstantSrc & 1) { - auto orr = IREmit->_Or(IR::SizeToOpSize(IROp->Size), CurrentIR.GetNode(Op->Header.Args[0]), IREmit->_Constant(NewConstant)); - IREmit->ReplaceAllUsesWith(CodeNode, orr); - Changed = true; - } - else { - // We are wanting to clear the bitfield. - auto andn = IREmit->_Andn(IR::SizeToOpSize(IROp->Size), CurrentIR.GetNode(Op->Header.Args[0]), IREmit->_Constant(NewConstant)); - IREmit->ReplaceAllUsesWith(CodeNode, andn); - Changed = true; - } - } - break; } - case OP_MUL: { - auto Op = IROp->C(); - uint64_t Constant1{}; - uint64_t Constant2{}; - - if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && - IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { - uint64_t NewConstant = (Constant1 * Constant2) & getMask(Op); - IREmit->ReplaceWithConstant(CodeNode, NewConstant); + break; + } + case OP_MUL: { + auto Op = IROp->C(); + uint64_t Constant1 {}; + uint64_t Constant2 {}; + + if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { + uint64_t NewConstant = (Constant1 * Constant2) & getMask(Op); + IREmit->ReplaceWithConstant(CodeNode, NewConstant); + Changed = true; + } else if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant2) && std::popcount(Constant2) == 1) { + if (IROp->Size == 4 || IROp->Size == 8) { + uint64_t amt = std::countr_zero(Constant2); + IREmit->SetWriteCursor(CodeNode); + auto shift = IREmit->_Lshl(IR::SizeToOpSize(IROp->Size), CurrentIR.GetNode(Op->Header.Args[0]), IREmit->_Constant(amt)); + IREmit->ReplaceAllUsesWith(CodeNode, shift); Changed = true; - } else if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant2) && std::popcount(Constant2) == 1) { - if (IROp->Size == 4 || IROp->Size == 8) { - uint64_t amt = std::countr_zero(Constant2); - IREmit->SetWriteCursor(CodeNode); - auto shift = IREmit->_Lshl(IR::SizeToOpSize(IROp->Size), CurrentIR.GetNode(Op->Header.Args[0]), IREmit->_Constant(amt)); - IREmit->ReplaceAllUsesWith(CodeNode, shift); - Changed = true; - } } - break; } - case OP_SELECT: { - auto Op = IROp->C(); - uint64_t Constant1{}; - uint64_t Constant2{}; + break; + } + case OP_SELECT: { + auto Op = IROp->C(); + uint64_t Constant1 {}; + uint64_t Constant2 {}; - if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && - IREmit->IsValueConstant(Op->Header.Args[1], &Constant2) && - Op->Cond == COND_EQ) { + if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) && IREmit->IsValueConstant(Op->Header.Args[1], &Constant2) && Op->Cond == COND_EQ) { - Constant1 &= getMask(Op); - Constant2 &= getMask(Op); + Constant1 &= getMask(Op); + Constant2 &= getMask(Op); - bool is_true = Constant1 == Constant2; + bool is_true = Constant1 == Constant2; - IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(Op->Header.Args[is_true ? 2 : 3])); - Changed = true; - } - break; - } - default: - break; + IREmit->ReplaceAllUsesWith(CodeNode, CurrentIR.GetNode(Op->Header.Args[is_true ? 2 : 3])); + Changed = true; } + break; + } + default: break; + } - return Changed; + return Changed; } -bool ConstProp::ConstantInlining(IREmitter *IREmit, const IRListView& CurrentIR) { +bool ConstProp::ConstantInlining(IREmitter* IREmit, const IRListView& CurrentIR) { InlineConstantGen.clear(); bool Changed = false; for (auto [CodeNode, IROp] : CurrentIR.GetAllCode()) { - switch(IROp->Op) { - case OP_LSHR: - case OP_ASHR: - case OP_ROR: - case OP_LSHL: - { - auto Op = IROp->C(); - - uint64_t Constant2{}; - if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[1])); - - // this shouldn't be here, but rather on the emitter themselves or the constprop transformation? - if (IROp->Size <=4) - Constant2 &= 31; - else - Constant2 &= 63; + switch (IROp->Op) { + case OP_LSHR: + case OP_ASHR: + case OP_ROR: + case OP_LSHL: { + auto Op = IROp->C(); - IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2)); + uint64_t Constant2 {}; + if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[1])); - Changed = true; + // this shouldn't be here, but rather on the emitter themselves or the constprop transformation? + if (IROp->Size <= 4) { + Constant2 &= 31; + } else { + Constant2 &= 63; } - break; - } - case OP_ADD: - case OP_SUB: - case OP_ADDNZCV: - case OP_SUBNZCV: - { - auto Op = IROp->C(); - - uint64_t Constant2{}; - if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { - // We don't allow 8/16-bit operations to have constants, since no - // constant would be in bounds after the JIT's 24/16 shift. - if (IsImmAddSub(Constant2) && Op->Header.Size >= 4) { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[1])); - - IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2)); - Changed = true; - } - } else if (IROp->Op == OP_SUBNZCV) { - // If the first source is zero, we can use a NEGS instruction. - uint64_t Constant1{}; - if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1)) { - if (Constant1 == 0) { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[0])); - IREmit->ReplaceNodeArgument(CodeNode, 0, CreateInlineConstant(IREmit, 0)); - Changed = true; - } - } - } + IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2)); - break; + Changed = true; } - case OP_CONDADDNZCV: - { - auto Op = IROp->C(); + break; + } + case OP_ADD: + case OP_SUB: + case OP_ADDNZCV: + case OP_SUBNZCV: { + auto Op = IROp->C(); - uint64_t Constant2{}; - if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { - if (IsImmAddSub(Constant2)) { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[1])); + uint64_t Constant2 {}; + if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { + // We don't allow 8/16-bit operations to have constants, since no + // constant would be in bounds after the JIT's 24/16 shift. + if (IsImmAddSub(Constant2) && Op->Header.Size >= 4) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[1])); - IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2)); + IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2)); - Changed = true; - } + Changed = true; } - - uint64_t Constant1{}; + } else if (IROp->Op == OP_SUBNZCV) { + // If the first source is zero, we can use a NEGS instruction. + uint64_t Constant1 {}; if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1)) { if (Constant1 == 0) { IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[0])); @@ -1016,254 +960,260 @@ bool ConstProp::ConstantInlining(IREmitter *IREmit, const IRListView& CurrentIR) Changed = true; } } - break; } - case OP_TESTNZ: - { - auto Op = IROp->C(); - uint64_t Constant1{}; - if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant1)) { - if (IsImmLogical(Constant1, IROp->Size * 8)) { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[1])); + break; + } + case OP_CONDADDNZCV: { + auto Op = IROp->C(); - IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant1)); + uint64_t Constant2 {}; + if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { + if (IsImmAddSub(Constant2)) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[1])); - Changed = true; - } + IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2)); + + Changed = true; } - break; } - case OP_SELECT: - { - auto Op = IROp->C(); - uint64_t Constant1{}; - if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant1)) { - if (IsImmAddSub(Constant1)) { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[1])); + uint64_t Constant1 {}; + if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1)) { + if (Constant1 == 0) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[0])); + IREmit->ReplaceNodeArgument(CodeNode, 0, CreateInlineConstant(IREmit, 0)); + Changed = true; + } + } + break; + } + case OP_TESTNZ: { + auto Op = IROp->C(); - IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant1)); + uint64_t Constant1 {}; + if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant1)) { + if (IsImmLogical(Constant1, IROp->Size * 8)) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[1])); - Changed = true; - } + IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant1)); + + Changed = true; } + } + break; + } + case OP_SELECT: { + auto Op = IROp->C(); + + uint64_t Constant1 {}; + if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant1)) { + if (IsImmAddSub(Constant1)) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[1])); - uint64_t AllOnes = IROp->Size == 8 ? 0xffff'ffff'ffff'ffffull : 0xffff'ffffull; + IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant1)); + + Changed = true; + } + } + + uint64_t AllOnes = IROp->Size == 8 ? 0xffff'ffff'ffff'ffffull : 0xffff'ffffull; #ifdef JIT_ARM64 - bool SupportsAllOnes = true; + bool SupportsAllOnes = true; #else - bool SupportsAllOnes = false; + bool SupportsAllOnes = false; #endif - uint64_t Constant2{}; - uint64_t Constant3{}; - if (IREmit->IsValueConstant(Op->Header.Args[2], &Constant2) && - IREmit->IsValueConstant(Op->Header.Args[3], &Constant3) && - (Constant2 == 1 || (SupportsAllOnes && Constant2 == AllOnes)) && - Constant3 == 0) - { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[2])); + uint64_t Constant2 {}; + uint64_t Constant3 {}; + if (IREmit->IsValueConstant(Op->Header.Args[2], &Constant2) && IREmit->IsValueConstant(Op->Header.Args[3], &Constant3) && + (Constant2 == 1 || (SupportsAllOnes && Constant2 == AllOnes)) && Constant3 == 0) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[2])); - IREmit->ReplaceNodeArgument(CodeNode, 2, CreateInlineConstant(IREmit, Constant2)); - IREmit->ReplaceNodeArgument(CodeNode, 3, CreateInlineConstant(IREmit, Constant3)); - } - - break; + IREmit->ReplaceNodeArgument(CodeNode, 2, CreateInlineConstant(IREmit, Constant2)); + IREmit->ReplaceNodeArgument(CodeNode, 3, CreateInlineConstant(IREmit, Constant3)); } - case OP_NZCVSELECT: - { - auto Op = IROp->C(); - - uint64_t AllOnes = IROp->Size == 8 ? 0xffff'ffff'ffff'ffffull : 0xffff'ffffull; - - // We always allow source 1 to be zero, but source 0 can only be a - // special 1/~0 constant if source 1 is 0. - uint64_t Constant0{}; - uint64_t Constant1{}; - if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant1) && - Constant1 == 0) - { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[1])); - IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant1)); - if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant0) && - (Constant0 == 1 || Constant0 == AllOnes)) - { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[0])); + break; + } + case OP_NZCVSELECT: { + auto Op = IROp->C(); - IREmit->ReplaceNodeArgument(CodeNode, 0, CreateInlineConstant(IREmit, Constant0)); - } - } + uint64_t AllOnes = IROp->Size == 8 ? 0xffff'ffff'ffff'ffffull : 0xffff'ffffull; - break; + // We always allow source 1 to be zero, but source 0 can only be a + // special 1/~0 constant if source 1 is 0. + uint64_t Constant0 {}; + uint64_t Constant1 {}; + if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant1) && Constant1 == 0) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[1])); + IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant1)); + + if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant0) && (Constant0 == 1 || Constant0 == AllOnes)) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[0])); + + IREmit->ReplaceNodeArgument(CodeNode, 0, CreateInlineConstant(IREmit, Constant0)); + } } - case OP_CONDJUMP: - { - auto Op = IROp->C(); - uint64_t Constant2{}; - if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { - if (IsImmAddSub(Constant2)) { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[1])); + break; + } + case OP_CONDJUMP: { + auto Op = IROp->C(); + + uint64_t Constant2 {}; + if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { + if (IsImmAddSub(Constant2)) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[1])); - IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2)); + IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2)); - Changed = true; - } + Changed = true; } - break; } - case OP_EXITFUNCTION: - { - auto Op = IROp->C(); + break; + } + case OP_EXITFUNCTION: { + auto Op = IROp->C(); - uint64_t Constant{}; - if (IREmit->IsValueConstant(Op->NewRIP, &Constant)) { + uint64_t Constant {}; + if (IREmit->IsValueConstant(Op->NewRIP, &Constant)) { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->NewRIP)); + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->NewRIP)); - IREmit->ReplaceNodeArgument(CodeNode, 0, CreateInlineConstant(IREmit, Constant)); + IREmit->ReplaceNodeArgument(CodeNode, 0, CreateInlineConstant(IREmit, Constant)); - Changed = true; - } else { - auto NewRIP = IREmit->GetOpHeader(Op->NewRIP); - if (NewRIP->Op == OP_ENTRYPOINTOFFSET) { - auto EO = NewRIP->C(); - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->NewRIP)); + Changed = true; + } else { + auto NewRIP = IREmit->GetOpHeader(Op->NewRIP); + if (NewRIP->Op == OP_ENTRYPOINTOFFSET) { + auto EO = NewRIP->C(); + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->NewRIP)); - IREmit->ReplaceNodeArgument(CodeNode, 0, IREmit->_InlineEntrypointOffset(IR::SizeToOpSize(EO->Header.Size), EO->Offset)); - Changed = true; - } + IREmit->ReplaceNodeArgument(CodeNode, 0, IREmit->_InlineEntrypointOffset(IR::SizeToOpSize(EO->Header.Size), EO->Offset)); + Changed = true; } - break; } - case OP_OR: - case OP_XOR: - case OP_AND: - case OP_ANDWITHFLAGS: - case OP_ANDN: - { - auto Op = IROp->CW(); + break; + } + case OP_OR: + case OP_XOR: + case OP_AND: + case OP_ANDWITHFLAGS: + case OP_ANDN: { + auto Op = IROp->CW(); - uint64_t Constant2{}; - if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { - if (IsImmLogical(Constant2, IROp->Size * 8)) { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[1])); + uint64_t Constant2 {}; + if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) { + if (IsImmLogical(Constant2, IROp->Size * 8)) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[1])); - IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2)); + IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2)); - Changed = true; - } + Changed = true; } - break; } - case OP_LOADMEM: - { - auto Op = IROp->CW(); + break; + } + case OP_LOADMEM: { + auto Op = IROp->CW(); - uint64_t Constant2{}; - if (Op->OffsetType == MEM_OFFSET_SXTX && IREmit->IsValueConstant(Op->Offset, &Constant2)) { - if (IsImmMemory(Constant2, IROp->Size)) { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Offset)); + uint64_t Constant2 {}; + if (Op->OffsetType == MEM_OFFSET_SXTX && IREmit->IsValueConstant(Op->Offset, &Constant2)) { + if (IsImmMemory(Constant2, IROp->Size)) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Offset)); - IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, CreateInlineConstant(IREmit, Constant2)); + IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, CreateInlineConstant(IREmit, Constant2)); - Changed = true; - } + Changed = true; } - break; } - case OP_STOREMEM: - { - auto Op = IROp->CW(); + break; + } + case OP_STOREMEM: { + auto Op = IROp->CW(); - uint64_t Constant2{}; - if (Op->OffsetType == MEM_OFFSET_SXTX && IREmit->IsValueConstant(Op->Offset, &Constant2)) { - if (IsImmMemory(Constant2, IROp->Size)) { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Offset)); + uint64_t Constant2 {}; + if (Op->OffsetType == MEM_OFFSET_SXTX && IREmit->IsValueConstant(Op->Offset, &Constant2)) { + if (IsImmMemory(Constant2, IROp->Size)) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Offset)); - IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, CreateInlineConstant(IREmit, Constant2)); + IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, CreateInlineConstant(IREmit, Constant2)); - Changed = true; - } + Changed = true; } - break; } - case OP_LOADMEMTSO: - { - auto Op = IROp->CW(); + break; + } + case OP_LOADMEMTSO: { + auto Op = IROp->CW(); - uint64_t Constant2{}; - if (SupportsTSOImm9) { - if (Op->OffsetType == MEM_OFFSET_SXTX && IREmit->IsValueConstant(Op->Offset, &Constant2)) { - if (IsTSOImm9(Constant2)) { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Offset)); + uint64_t Constant2 {}; + if (SupportsTSOImm9) { + if (Op->OffsetType == MEM_OFFSET_SXTX && IREmit->IsValueConstant(Op->Offset, &Constant2)) { + if (IsTSOImm9(Constant2)) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Offset)); - IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, CreateInlineConstant(IREmit, Constant2)); + IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, CreateInlineConstant(IREmit, Constant2)); - Changed = true; - } + Changed = true; } } - break; } - case OP_STOREMEMTSO: - { - auto Op = IROp->CW(); + break; + } + case OP_STOREMEMTSO: { + auto Op = IROp->CW(); - uint64_t Constant2{}; - if (SupportsTSOImm9) { - if (Op->OffsetType == MEM_OFFSET_SXTX && IREmit->IsValueConstant(Op->Offset, &Constant2)) { - if (IsTSOImm9(Constant2)) { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Offset)); + uint64_t Constant2 {}; + if (SupportsTSOImm9) { + if (Op->OffsetType == MEM_OFFSET_SXTX && IREmit->IsValueConstant(Op->Offset, &Constant2)) { + if (IsTSOImm9(Constant2)) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Offset)); - IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, CreateInlineConstant(IREmit, Constant2)); + IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, CreateInlineConstant(IREmit, Constant2)); - Changed = true; - } + Changed = true; } } - break; } - case OP_MEMCPY: - { - auto Op = IROp->CW(); + break; + } + case OP_MEMCPY: { + auto Op = IROp->CW(); - uint64_t Constant{}; - if (IREmit->IsValueConstant(Op->Direction, &Constant)) { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Direction)); + uint64_t Constant {}; + if (IREmit->IsValueConstant(Op->Direction, &Constant)) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Direction)); - IREmit->ReplaceNodeArgument(CodeNode, Op->Direction_Index, CreateInlineConstant(IREmit, Constant & 1)); + IREmit->ReplaceNodeArgument(CodeNode, Op->Direction_Index, CreateInlineConstant(IREmit, Constant & 1)); - Changed = true; - } - break; + Changed = true; } - case OP_MEMSET: - { - auto Op = IROp->CW(); + break; + } + case OP_MEMSET: { + auto Op = IROp->CW(); - uint64_t Constant{}; - if (IREmit->IsValueConstant(Op->Direction, &Constant)) { - IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Direction)); + uint64_t Constant {}; + if (IREmit->IsValueConstant(Op->Direction, &Constant)) { + IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Direction)); - IREmit->ReplaceNodeArgument(CodeNode, Op->Direction_Index, CreateInlineConstant(IREmit, Constant & 1)); + IREmit->ReplaceNodeArgument(CodeNode, Op->Direction_Index, CreateInlineConstant(IREmit, Constant & 1)); - Changed = true; - } - break; + Changed = true; } + break; + } - default: - break; + default: break; } } return Changed; } -bool ConstProp::Run(IREmitter *IREmit) { +bool ConstProp::Run(IREmitter* IREmit) { FEXCORE_PROFILE_SCOPED("PassManager::ConstProp"); bool Changed = false; @@ -1297,4 +1247,4 @@ fextl::unique_ptr CreateConstProp(bool InlineConstants, bool return fextl::make_unique(InlineConstants, SupportsTSOImm9); } -} +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/IR/Passes/DeadCodeElimination.cpp b/FEXCore/Source/Interface/IR/Passes/DeadCodeElimination.cpp index 28ee963c14..131d1df7d2 100644 --- a/FEXCore/Source/Interface/IR/Passes/DeadCodeElimination.cpp +++ b/FEXCore/Source/Interface/IR/Passes/DeadCodeElimination.cpp @@ -17,13 +17,13 @@ tags: ir|opts namespace FEXCore::IR { class DeadCodeElimination final : public FEXCore::IR::Pass { - bool Run(IREmitter *IREmit) override; + bool Run(IREmitter* IREmit) override; private: - void markUsed(OrderedNodeWrapper *CodeOp, IROp_Header *IROp); + void markUsed(OrderedNodeWrapper* CodeOp, IROp_Header* IROp); }; -bool DeadCodeElimination::Run(IREmitter *IREmit) { +bool DeadCodeElimination::Run(IREmitter* IREmit) { FEXCORE_PROFILE_SCOPED("PassManager::DCE"); auto CurrentIR = IREmit->ViewIR(); bool Changed = false; @@ -43,62 +43,47 @@ bool DeadCodeElimination::Run(IREmitter *IREmit) { bool HasSideEffects = IR::HasSideEffects(IROp->Op); switch (IROp->Op) { - case OP_SYSCALL: - case OP_INLINESYSCALL: { - FEXCore::IR::SyscallFlags Flags{}; - if (IROp->Op == OP_SYSCALL) { - auto Op = IROp->C(); - Flags = Op->Flags; - } - else { - auto Op = IROp->C(); - Flags = Op->Flags; - } - - if ((Flags & FEXCore::IR::SyscallFlags::NOSIDEEFFECTS) == FEXCore::IR::SyscallFlags::NOSIDEEFFECTS) { - HasSideEffects = false; - } + case OP_SYSCALL: + case OP_INLINESYSCALL: { + FEXCore::IR::SyscallFlags Flags {}; + if (IROp->Op == OP_SYSCALL) { + auto Op = IROp->C(); + Flags = Op->Flags; + } else { + auto Op = IROp->C(); + Flags = Op->Flags; + } - break; + if ((Flags & FEXCore::IR::SyscallFlags::NOSIDEEFFECTS) == FEXCore::IR::SyscallFlags::NOSIDEEFFECTS) { + HasSideEffects = false; } - case OP_ATOMICFETCHADD: - case OP_ATOMICFETCHSUB: - case OP_ATOMICFETCHAND: - case OP_ATOMICFETCHCLR: - case OP_ATOMICFETCHOR: - case OP_ATOMICFETCHXOR: - case OP_ATOMICFETCHNEG: { - // If the result of the atomic fetch is completely unused, convert it to a non-fetching atomic operation. - if (CodeNode->GetUses() == 0) { - switch (IROp->Op) { - case OP_ATOMICFETCHADD: - IROp->Op = OP_ATOMICADD; - break; - case OP_ATOMICFETCHSUB: - IROp->Op = OP_ATOMICSUB; - break; - case OP_ATOMICFETCHAND: - IROp->Op = OP_ATOMICAND; - break; - case OP_ATOMICFETCHCLR: - IROp->Op = OP_ATOMICCLR; - break; - case OP_ATOMICFETCHOR: - IROp->Op = OP_ATOMICOR; - break; - case OP_ATOMICFETCHXOR: - IROp->Op = OP_ATOMICXOR; - break; - case OP_ATOMICFETCHNEG: - IROp->Op = OP_ATOMICNEG; - break; - default: FEX_UNREACHABLE; - } - Changed = true; + + break; + } + case OP_ATOMICFETCHADD: + case OP_ATOMICFETCHSUB: + case OP_ATOMICFETCHAND: + case OP_ATOMICFETCHCLR: + case OP_ATOMICFETCHOR: + case OP_ATOMICFETCHXOR: + case OP_ATOMICFETCHNEG: { + // If the result of the atomic fetch is completely unused, convert it to a non-fetching atomic operation. + if (CodeNode->GetUses() == 0) { + switch (IROp->Op) { + case OP_ATOMICFETCHADD: IROp->Op = OP_ATOMICADD; break; + case OP_ATOMICFETCHSUB: IROp->Op = OP_ATOMICSUB; break; + case OP_ATOMICFETCHAND: IROp->Op = OP_ATOMICAND; break; + case OP_ATOMICFETCHCLR: IROp->Op = OP_ATOMICCLR; break; + case OP_ATOMICFETCHOR: IROp->Op = OP_ATOMICOR; break; + case OP_ATOMICFETCHXOR: IROp->Op = OP_ATOMICXOR; break; + case OP_ATOMICFETCHNEG: IROp->Op = OP_ATOMICNEG; break; + default: FEX_UNREACHABLE; } - break; + Changed = true; } - default: break; + break; + } + default: break; } // Skip over anything that has side effects @@ -120,12 +105,10 @@ bool DeadCodeElimination::Run(IREmitter *IREmit) { return Changed; } -void DeadCodeElimination::markUsed(OrderedNodeWrapper *CodeOp, IROp_Header *IROp) { - -} +void DeadCodeElimination::markUsed(OrderedNodeWrapper* CodeOp, IROp_Header* IROp) {} fextl::unique_ptr CreatePassDeadCodeElimination() { return fextl::make_unique(); } -} +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/IR/Passes/DeadContextStoreElimination.cpp b/FEXCore/Source/Interface/IR/Passes/DeadContextStoreElimination.cpp index f8a33e653f..6f737a83db 100644 --- a/FEXCore/Source/Interface/IR/Passes/DeadContextStoreElimination.cpp +++ b/FEXCore/Source/Interface/IR/Passes/DeadContextStoreElimination.cpp @@ -27,450 +27,449 @@ desc: Transforms ContextLoad/Store to temporaries, similar to mem2reg #include namespace { - struct ContextMemberClassification { - size_t Offset; - uint16_t Size; - }; - - enum class LastAccessType { - NONE = (0b000 << 0), ///< Was never previously accessed - WRITE = (0b001 << 0), ///< Was fully overwritten - READ = (0b010 << 0), ///< Was fully read - INVALID = (0b011 << 0), ///< Accessing this is invalid - MASK = (0b011 << 0), - PARTIAL = (0b100 << 0), - PARTIAL_WRITE = (PARTIAL | WRITE), ///< Was partially written - PARTIAL_READ = (PARTIAL | READ), ///< Was partially read - }; - FEX_DEF_NUM_OPS(LastAccessType); - - static bool IsWriteAccess(LastAccessType Type) { - return (Type & LastAccessType::MASK) == LastAccessType::WRITE; - } - - static bool IsReadAccess(LastAccessType Type) { - return (Type & LastAccessType::MASK) == LastAccessType::READ; - } - - [[maybe_unused]] - static bool IsInvalidAccess(LastAccessType Type) { - return (Type & LastAccessType::MASK) == LastAccessType::INVALID; - } - - [[maybe_unused]] - static bool IsPartialAccess(LastAccessType Type) { - return (Type & LastAccessType::PARTIAL) == LastAccessType::PARTIAL; - } - - [[maybe_unused]] - static bool IsFullAccess(LastAccessType Type) { - return (Type & LastAccessType::PARTIAL) == LastAccessType::NONE; - } - - struct ContextMemberInfo { - ContextMemberClassification Class; - LastAccessType Accessed; - FEXCore::IR::RegisterClassType AccessRegClass; - uint32_t AccessOffset; - uint8_t AccessSize; - ///< The last value that was loaded or stored. - FEXCore::IR::OrderedNode *ValueNode; - ///< With a store access, the store node that is doing the operation. - FEXCore::IR::OrderedNode *StoreNode; - }; - - struct ContextInfo { - fextl::vector Lookup; - fextl::vector ClassificationInfo; - }; - - static void ClassifyContextStruct(ContextInfo *ContextClassificationInfo, bool SupportsAVX) { - auto ContextClassification = &ContextClassificationInfo->ClassificationInfo; - - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, rip), - sizeof(FEXCore::Core::CPUState::rip), - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); - - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_GPRS; ++i) { - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, gregs[0]) + sizeof(FEXCore::Core::CPUState::gregs[0]) * i, - FEXCore::Core::CPUState::GPR_REG_SIZE, - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); - } - - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, es_idx), - sizeof(FEXCore::Core::CPUState::es_idx), - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); - - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, cs_idx), - sizeof(FEXCore::Core::CPUState::cs_idx), - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); - - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, ss_idx), - sizeof(FEXCore::Core::CPUState::ss_idx), - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); - - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, ds_idx), - sizeof(FEXCore::Core::CPUState::ds_idx), - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); - - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, gs_idx), - sizeof(FEXCore::Core::CPUState::gs_idx), - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); - - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, fs_idx), - sizeof(FEXCore::Core::CPUState::fs_idx), - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); - - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, _pad), - sizeof(FEXCore::Core::CPUState::_pad), - }, - LastAccessType::INVALID, - FEXCore::IR::InvalidClass, - }); - - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, es_cached), - sizeof(FEXCore::Core::CPUState::es_cached), - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); +struct ContextMemberClassification { + size_t Offset; + uint16_t Size; +}; - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, cs_cached), - sizeof(FEXCore::Core::CPUState::cs_cached), - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); +enum class LastAccessType { + NONE = (0b000 << 0), ///< Was never previously accessed + WRITE = (0b001 << 0), ///< Was fully overwritten + READ = (0b010 << 0), ///< Was fully read + INVALID = (0b011 << 0), ///< Accessing this is invalid + MASK = (0b011 << 0), + PARTIAL = (0b100 << 0), + PARTIAL_WRITE = (PARTIAL | WRITE), ///< Was partially written + PARTIAL_READ = (PARTIAL | READ), ///< Was partially read +}; +FEX_DEF_NUM_OPS(LastAccessType); - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, ss_cached), - sizeof(FEXCore::Core::CPUState::ss_cached), - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); +static bool IsWriteAccess(LastAccessType Type) { + return (Type & LastAccessType::MASK) == LastAccessType::WRITE; +} - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, ds_cached), - sizeof(FEXCore::Core::CPUState::ds_cached), - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); +static bool IsReadAccess(LastAccessType Type) { + return (Type & LastAccessType::MASK) == LastAccessType::READ; +} - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, gs_cached), - sizeof(FEXCore::Core::CPUState::gs_cached), - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); +[[maybe_unused]] +static bool IsInvalidAccess(LastAccessType Type) { + return (Type & LastAccessType::MASK) == LastAccessType::INVALID; +} - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, fs_cached), - sizeof(FEXCore::Core::CPUState::fs_cached), - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); +[[maybe_unused]] +static bool IsPartialAccess(LastAccessType Type) { + return (Type & LastAccessType::PARTIAL) == LastAccessType::PARTIAL; +} - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, InlineJITBlockHeader), - sizeof(FEXCore::Core::CPUState::InlineJITBlockHeader), - }, - LastAccessType::INVALID, - FEXCore::IR::InvalidClass, - }); +[[maybe_unused]] +static bool IsFullAccess(LastAccessType Type) { + return (Type & LastAccessType::PARTIAL) == LastAccessType::NONE; +} - if (SupportsAVX) { - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) { - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, xmm.avx.data[0][0]) + FEXCore::Core::CPUState::XMM_AVX_REG_SIZE * i, - FEXCore::Core::CPUState::XMM_AVX_REG_SIZE, - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); - } - } else { - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) { - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, xmm.sse.data[0][0]) + FEXCore::Core::CPUState::XMM_SSE_REG_SIZE * i, - FEXCore::Core::CPUState::XMM_SSE_REG_SIZE, - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); - } +struct ContextMemberInfo { + ContextMemberClassification Class; + LastAccessType Accessed; + FEXCore::IR::RegisterClassType AccessRegClass; + uint32_t AccessOffset; + uint8_t AccessSize; + ///< The last value that was loaded or stored. + FEXCore::IR::OrderedNode* ValueNode; + ///< With a store access, the store node that is doing the operation. + FEXCore::IR::OrderedNode* StoreNode; +}; - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, xmm.sse.pad[0][0]), - static_cast(FEXCore::Core::CPUState::XMM_SSE_REG_SIZE * FEXCore::Core::CPUState::NUM_XMMS), - }, - LastAccessType::INVALID, - FEXCore::IR::InvalidClass, - }); - } +struct ContextInfo { + fextl::vector Lookup; + fextl::vector ClassificationInfo; +}; - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_FLAGS; ++i) { - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, flags[0]) + sizeof(FEXCore::Core::CPUState::flags[0]) * i, - FEXCore::Core::CPUState::FLAG_SIZE, - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); - } +static void ClassifyContextStruct(ContextInfo* ContextClassificationInfo, bool SupportsAVX) { + auto ContextClassification = &ContextClassificationInfo->ClassificationInfo; - ContextClassification->emplace_back(ContextMemberInfo{ - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, pf_raw), - sizeof(FEXCore::Core::CPUState::pf_raw), - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, rip), + sizeof(FEXCore::Core::CPUState::rip), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); - ContextClassification->emplace_back(ContextMemberInfo{ + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_GPRS; ++i) { + ContextClassification->emplace_back(ContextMemberInfo { ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, af_raw), - sizeof(FEXCore::Core::CPUState::af_raw), + offsetof(FEXCore::Core::CPUState, gregs[0]) + sizeof(FEXCore::Core::CPUState::gregs[0]) * i, + FEXCore::Core::CPUState::GPR_REG_SIZE, }, LastAccessType::NONE, FEXCore::IR::InvalidClass, }); + } - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) { - ContextClassification->emplace_back(ContextMemberInfo{ + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, es_idx), + sizeof(FEXCore::Core::CPUState::es_idx), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, cs_idx), + sizeof(FEXCore::Core::CPUState::cs_idx), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, ss_idx), + sizeof(FEXCore::Core::CPUState::ss_idx), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, ds_idx), + sizeof(FEXCore::Core::CPUState::ds_idx), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, gs_idx), + sizeof(FEXCore::Core::CPUState::gs_idx), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, fs_idx), + sizeof(FEXCore::Core::CPUState::fs_idx), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, _pad), + sizeof(FEXCore::Core::CPUState::_pad), + }, + LastAccessType::INVALID, + FEXCore::IR::InvalidClass, + }); + + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, es_cached), + sizeof(FEXCore::Core::CPUState::es_cached), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, cs_cached), + sizeof(FEXCore::Core::CPUState::cs_cached), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, ss_cached), + sizeof(FEXCore::Core::CPUState::ss_cached), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, ds_cached), + sizeof(FEXCore::Core::CPUState::ds_cached), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, gs_cached), + sizeof(FEXCore::Core::CPUState::gs_cached), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, fs_cached), + sizeof(FEXCore::Core::CPUState::fs_cached), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, InlineJITBlockHeader), + sizeof(FEXCore::Core::CPUState::InlineJITBlockHeader), + }, + LastAccessType::INVALID, + FEXCore::IR::InvalidClass, + }); + + if (SupportsAVX) { + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) { + ContextClassification->emplace_back(ContextMemberInfo { ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, mm[0][0]) + sizeof(FEXCore::Core::CPUState::mm[0]) * i, - FEXCore::Core::CPUState::MM_REG_SIZE + offsetof(FEXCore::Core::CPUState, xmm.avx.data[0][0]) + FEXCore::Core::CPUState::XMM_AVX_REG_SIZE * i, + FEXCore::Core::CPUState::XMM_AVX_REG_SIZE, }, LastAccessType::NONE, FEXCore::IR::InvalidClass, }); } - - // GDTs - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_GDTS; ++i) { - ContextClassification->emplace_back(ContextMemberInfo{ + } else { + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) { + ContextClassification->emplace_back(ContextMemberInfo { ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, gdt[0]) + sizeof(FEXCore::Core::CPUState::gdt[0]) * i, - sizeof(FEXCore::Core::CPUState::gdt[0]), + offsetof(FEXCore::Core::CPUState, xmm.sse.data[0][0]) + FEXCore::Core::CPUState::XMM_SSE_REG_SIZE * i, + FEXCore::Core::CPUState::XMM_SSE_REG_SIZE, }, LastAccessType::NONE, FEXCore::IR::InvalidClass, }); } - // FCW - ContextClassification->emplace_back(ContextMemberInfo { - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, FCW), - sizeof(FEXCore::Core::CPUState::FCW), - }, - LastAccessType::NONE, - FEXCore::IR::InvalidClass, - }); - - // AbridgedFTW ContextClassification->emplace_back(ContextMemberInfo { ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, AbridgedFTW), - sizeof(FEXCore::Core::CPUState::AbridgedFTW), + offsetof(FEXCore::Core::CPUState, xmm.sse.pad[0][0]), + static_cast(FEXCore::Core::CPUState::XMM_SSE_REG_SIZE * FEXCore::Core::CPUState::NUM_XMMS), }, - LastAccessType::NONE, + LastAccessType::INVALID, FEXCore::IR::InvalidClass, }); + } - // _pad2 + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_FLAGS; ++i) { ContextClassification->emplace_back(ContextMemberInfo { ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, _pad2), - sizeof(FEXCore::Core::CPUState::_pad2), + offsetof(FEXCore::Core::CPUState, flags[0]) + sizeof(FEXCore::Core::CPUState::flags[0]) * i, + FEXCore::Core::CPUState::FLAG_SIZE, }, LastAccessType::NONE, FEXCore::IR::InvalidClass, }); + } - // DeferredSignalRefCount + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, pf_raw), + sizeof(FEXCore::Core::CPUState::pf_raw), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, af_raw), + sizeof(FEXCore::Core::CPUState::af_raw), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) { ContextClassification->emplace_back(ContextMemberInfo { - ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount), - sizeof(FEXCore::Core::CPUState::DeferredSignalRefCount), - }, + ContextMemberClassification {offsetof(FEXCore::Core::CPUState, mm[0][0]) + sizeof(FEXCore::Core::CPUState::mm[0]) * i, + FEXCore::Core::CPUState::MM_REG_SIZE}, LastAccessType::NONE, FEXCore::IR::InvalidClass, }); + } - // DeferredSignalFaultAddress + // GDTs + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_GDTS; ++i) { ContextClassification->emplace_back(ContextMemberInfo { ContextMemberClassification { - offsetof(FEXCore::Core::CPUState, DeferredSignalFaultAddress), - sizeof(FEXCore::Core::CPUState::DeferredSignalFaultAddress), + offsetof(FEXCore::Core::CPUState, gdt[0]) + sizeof(FEXCore::Core::CPUState::gdt[0]) * i, + sizeof(FEXCore::Core::CPUState::gdt[0]), }, LastAccessType::NONE, FEXCore::IR::InvalidClass, }); + } - - [[maybe_unused]] size_t ClassifiedStructSize{}; - ContextClassificationInfo->Lookup.reserve(sizeof(FEXCore::Core::CPUState)); - for (auto &it : *ContextClassification) { - LOGMAN_THROW_A_FMT(it.Class.Offset == ContextClassificationInfo->Lookup.size(), "Offset mismatch (offset={})", it.Class.Offset); - for (int i = 0; i < it.Class.Size; i++) { - ContextClassificationInfo->Lookup.push_back(&it); - } - ClassifiedStructSize += it.Class.Size; + // FCW + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, FCW), + sizeof(FEXCore::Core::CPUState::FCW), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + // AbridgedFTW + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, AbridgedFTW), + sizeof(FEXCore::Core::CPUState::AbridgedFTW), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + // _pad2 + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, _pad2), + sizeof(FEXCore::Core::CPUState::_pad2), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + // DeferredSignalRefCount + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount), + sizeof(FEXCore::Core::CPUState::DeferredSignalRefCount), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + // DeferredSignalFaultAddress + ContextClassification->emplace_back(ContextMemberInfo { + ContextMemberClassification { + offsetof(FEXCore::Core::CPUState, DeferredSignalFaultAddress), + sizeof(FEXCore::Core::CPUState::DeferredSignalFaultAddress), + }, + LastAccessType::NONE, + FEXCore::IR::InvalidClass, + }); + + + [[maybe_unused]] size_t ClassifiedStructSize {}; + ContextClassificationInfo->Lookup.reserve(sizeof(FEXCore::Core::CPUState)); + for (auto& it : *ContextClassification) { + LOGMAN_THROW_A_FMT(it.Class.Offset == ContextClassificationInfo->Lookup.size(), "Offset mismatch (offset={})", it.Class.Offset); + for (int i = 0; i < it.Class.Size; i++) { + ContextClassificationInfo->Lookup.push_back(&it); } + ClassifiedStructSize += it.Class.Size; + } - LOGMAN_THROW_AA_FMT(ClassifiedStructSize == sizeof(FEXCore::Core::CPUState), - "Classified CPUStruct size doesn't match real CPUState struct size! {} (classified) != {} (real)", - ClassifiedStructSize, sizeof(FEXCore::Core::CPUState)); + LOGMAN_THROW_AA_FMT(ClassifiedStructSize == sizeof(FEXCore::Core::CPUState), + "Classified CPUStruct size doesn't match real CPUState struct size! {} (classified) != {} (real)", + ClassifiedStructSize, sizeof(FEXCore::Core::CPUState)); - LOGMAN_THROW_A_FMT(ContextClassificationInfo->Lookup.size() == sizeof(FEXCore::Core::CPUState), - "Classified lookup size doesn't match real CPUState struct size! {} (classified) != {} (real)", - ContextClassificationInfo->Lookup.size(), sizeof(FEXCore::Core::CPUState)); - } + LOGMAN_THROW_A_FMT(ContextClassificationInfo->Lookup.size() == sizeof(FEXCore::Core::CPUState), + "Classified lookup size doesn't match real CPUState struct size! {} (classified) != {} (real)", + ContextClassificationInfo->Lookup.size(), sizeof(FEXCore::Core::CPUState)); +} - static void ResetClassificationAccesses(ContextInfo *ContextClassificationInfo, bool SupportsAVX) { - auto ContextClassification = &ContextClassificationInfo->ClassificationInfo; +static void ResetClassificationAccesses(ContextInfo* ContextClassificationInfo, bool SupportsAVX) { + auto ContextClassification = &ContextClassificationInfo->ClassificationInfo; - auto SetAccess = [&](size_t Offset, LastAccessType Access) { - ContextClassification->at(Offset).Accessed = Access; - ContextClassification->at(Offset).AccessRegClass = FEXCore::IR::InvalidClass; - ContextClassification->at(Offset).AccessOffset = 0; - ContextClassification->at(Offset).StoreNode = nullptr; - }; - size_t Offset = 0; + auto SetAccess = [&](size_t Offset, LastAccessType Access) { + ContextClassification->at(Offset).Accessed = Access; + ContextClassification->at(Offset).AccessRegClass = FEXCore::IR::InvalidClass; + ContextClassification->at(Offset).AccessOffset = 0; + ContextClassification->at(Offset).StoreNode = nullptr; + }; + size_t Offset = 0; + SetAccess(Offset++, LastAccessType::NONE); + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_GPRS; ++i) { SetAccess(Offset++, LastAccessType::NONE); - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_GPRS; ++i) { - SetAccess(Offset++, LastAccessType::NONE); - } + } - // Segment indexes - SetAccess(Offset++, LastAccessType::NONE); - SetAccess(Offset++, LastAccessType::NONE); - SetAccess(Offset++, LastAccessType::NONE); + // Segment indexes + SetAccess(Offset++, LastAccessType::NONE); + SetAccess(Offset++, LastAccessType::NONE); + SetAccess(Offset++, LastAccessType::NONE); + SetAccess(Offset++, LastAccessType::NONE); + SetAccess(Offset++, LastAccessType::NONE); + SetAccess(Offset++, LastAccessType::NONE); + + // Pad + SetAccess(Offset++, LastAccessType::INVALID); + + // Segments + SetAccess(Offset++, LastAccessType::NONE); + SetAccess(Offset++, LastAccessType::NONE); + SetAccess(Offset++, LastAccessType::NONE); + SetAccess(Offset++, LastAccessType::NONE); + SetAccess(Offset++, LastAccessType::NONE); + SetAccess(Offset++, LastAccessType::NONE); + + // Pad2 + SetAccess(Offset++, LastAccessType::INVALID); + + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) { SetAccess(Offset++, LastAccessType::NONE); - SetAccess(Offset++, LastAccessType::NONE); - SetAccess(Offset++, LastAccessType::NONE); - - // Pad - SetAccess(Offset++, LastAccessType::INVALID); + } - // Segments - SetAccess(Offset++, LastAccessType::NONE); - SetAccess(Offset++, LastAccessType::NONE); + if (!SupportsAVX) { SetAccess(Offset++, LastAccessType::NONE); - SetAccess(Offset++, LastAccessType::NONE); - SetAccess(Offset++, LastAccessType::NONE); - SetAccess(Offset++, LastAccessType::NONE); - - // Pad2 - SetAccess(Offset++, LastAccessType::INVALID); - - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) { - SetAccess(Offset++, LastAccessType::NONE); - } - - if (!SupportsAVX) { - SetAccess(Offset++, LastAccessType::NONE); - } - - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_FLAGS; ++i) { - SetAccess(Offset++, LastAccessType::NONE); - } + } - // PF/AF - SetAccess(Offset++, LastAccessType::NONE); + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_FLAGS; ++i) { SetAccess(Offset++, LastAccessType::NONE); + } - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) { - SetAccess(Offset++, LastAccessType::NONE); - } - - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_GDTS; ++i) { - SetAccess(Offset++, LastAccessType::NONE); - } + // PF/AF + SetAccess(Offset++, LastAccessType::NONE); + SetAccess(Offset++, LastAccessType::NONE); + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) { SetAccess(Offset++, LastAccessType::NONE); - SetAccess(Offset++, LastAccessType::NONE); + } - SetAccess(Offset++, LastAccessType::INVALID); - SetAccess(Offset++, LastAccessType::INVALID); - SetAccess(Offset++, LastAccessType::INVALID); + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_GDTS; ++i) { + SetAccess(Offset++, LastAccessType::NONE); } - struct BlockInfo { - fextl::vector Predecessors; - fextl::vector Successors; - ContextInfo IncomingClassifiedStruct; - ContextInfo OutgoingClassifiedStruct; - }; + SetAccess(Offset++, LastAccessType::NONE); + SetAccess(Offset++, LastAccessType::NONE); + + SetAccess(Offset++, LastAccessType::INVALID); + SetAccess(Offset++, LastAccessType::INVALID); + SetAccess(Offset++, LastAccessType::INVALID); +} + +struct BlockInfo { + fextl::vector Predecessors; + fextl::vector Successors; + ContextInfo IncomingClassifiedStruct; + ContextInfo OutgoingClassifiedStruct; +}; class RCLSE final : public FEXCore::IR::Pass { public: - explicit RCLSE(bool SupportsAVX_) : SupportsAVX{SupportsAVX_} { + explicit RCLSE(bool SupportsAVX_) + : SupportsAVX {SupportsAVX_} { ClassifyContextStruct(&ClassifiedStruct, SupportsAVX); DCE = FEXCore::IR::CreatePassDeadCodeElimination(); } - bool Run(FEXCore::IR::IREmitter *IREmit) override; + bool Run(FEXCore::IR::IREmitter* IREmit) override; private: fextl::unique_ptr DCE; @@ -479,30 +478,34 @@ class RCLSE final : public FEXCore::IR::Pass { bool SupportsAVX; - ContextMemberInfo *FindMemberInfo(ContextInfo *ClassifiedInfo, uint32_t Offset, uint8_t Size); - ContextMemberInfo *RecordAccess(ContextMemberInfo *Info, FEXCore::IR::RegisterClassType RegClass, uint32_t Offset, uint8_t Size, LastAccessType AccessType, FEXCore::IR::OrderedNode *Node, FEXCore::IR::OrderedNode *StoreNode = nullptr); - ContextMemberInfo *RecordAccess(ContextInfo *ClassifiedInfo, FEXCore::IR::RegisterClassType RegClass, uint32_t Offset, uint8_t Size, LastAccessType AccessType, FEXCore::IR::OrderedNode *Node, FEXCore::IR::OrderedNode *StoreNode = nullptr); + ContextMemberInfo* FindMemberInfo(ContextInfo* ClassifiedInfo, uint32_t Offset, uint8_t Size); + ContextMemberInfo* RecordAccess(ContextMemberInfo* Info, FEXCore::IR::RegisterClassType RegClass, uint32_t Offset, uint8_t Size, + LastAccessType AccessType, FEXCore::IR::OrderedNode* Node, FEXCore::IR::OrderedNode* StoreNode = nullptr); + ContextMemberInfo* RecordAccess(ContextInfo* ClassifiedInfo, FEXCore::IR::RegisterClassType RegClass, uint32_t Offset, uint8_t Size, + LastAccessType AccessType, FEXCore::IR::OrderedNode* Node, FEXCore::IR::OrderedNode* StoreNode = nullptr); // Classify context loads and stores. - bool ClassifyContextLoad(FEXCore::IR::IREmitter *IREmit, ContextInfo *LocalInfo, FEXCore::IR::RegisterClassType Class, uint32_t Offset, uint8_t Size, FEXCore::IR::OrderedNode *CodeNode, FEXCore::IR::NodeIterator BlockEnd); - bool ClassifyContextStore(FEXCore::IR::IREmitter *IREmit, ContextInfo *LocalInfo, FEXCore::IR::RegisterClassType Class, uint32_t Offset, uint8_t Size, FEXCore::IR::OrderedNode *CodeNode, FEXCore::IR::OrderedNode *ValueNode); + bool ClassifyContextLoad(FEXCore::IR::IREmitter* IREmit, ContextInfo* LocalInfo, FEXCore::IR::RegisterClassType Class, uint32_t Offset, + uint8_t Size, FEXCore::IR::OrderedNode* CodeNode, FEXCore::IR::NodeIterator BlockEnd); + bool ClassifyContextStore(FEXCore::IR::IREmitter* IREmit, ContextInfo* LocalInfo, FEXCore::IR::RegisterClassType Class, uint32_t Offset, + uint8_t Size, FEXCore::IR::OrderedNode* CodeNode, FEXCore::IR::OrderedNode* ValueNode); // Block local Passes - bool RedundantStoreLoadElimination(FEXCore::IR::IREmitter *IREmit); + bool RedundantStoreLoadElimination(FEXCore::IR::IREmitter* IREmit); }; -ContextMemberInfo *RCLSE::FindMemberInfo(ContextInfo *ContextClassificationInfo, uint32_t Offset, uint8_t Size) { +ContextMemberInfo* RCLSE::FindMemberInfo(ContextInfo* ContextClassificationInfo, uint32_t Offset, uint8_t Size) { return ContextClassificationInfo->Lookup.at(Offset); } -ContextMemberInfo *RCLSE::RecordAccess(ContextMemberInfo *Info, FEXCore::IR::RegisterClassType RegClass, uint32_t Offset, uint8_t Size, LastAccessType AccessType, FEXCore::IR::OrderedNode *ValueNode, FEXCore::IR::OrderedNode *StoreNode) { +ContextMemberInfo* RCLSE::RecordAccess(ContextMemberInfo* Info, FEXCore::IR::RegisterClassType RegClass, uint32_t Offset, uint8_t Size, + LastAccessType AccessType, FEXCore::IR::OrderedNode* ValueNode, FEXCore::IR::OrderedNode* StoreNode) { LOGMAN_THROW_AA_FMT((Offset + Size) <= (Info->Class.Offset + Info->Class.Size), "Access to context item went over member size"); LOGMAN_THROW_AA_FMT(Info->Accessed != LastAccessType::INVALID, "Tried to access invalid member"); // If we aren't fully overwriting the member then it is a partial write that we need to track if (Size < Info->Class.Size) { - AccessType = AccessType == LastAccessType::WRITE ? LastAccessType::PARTIAL_WRITE - : LastAccessType::PARTIAL_READ; + AccessType = AccessType == LastAccessType::WRITE ? LastAccessType::PARTIAL_WRITE : LastAccessType::PARTIAL_READ; } if (Size > Info->Class.Size) { LOGMAN_MSG_A_FMT("Can't handle this"); @@ -513,23 +516,25 @@ ContextMemberInfo *RCLSE::RecordAccess(ContextMemberInfo *Info, FEXCore::IR::Reg Info->AccessOffset = Offset; Info->AccessSize = Size; Info->ValueNode = ValueNode; - if (StoreNode != nullptr) + if (StoreNode != nullptr) { Info->StoreNode = StoreNode; + } return Info; } -ContextMemberInfo *RCLSE::RecordAccess(ContextInfo *ClassifiedInfo, FEXCore::IR::RegisterClassType RegClass, uint32_t Offset, uint8_t Size, LastAccessType AccessType, FEXCore::IR::OrderedNode *ValueNode, FEXCore::IR::OrderedNode *StoreNode) { - ContextMemberInfo *Info = FindMemberInfo(ClassifiedInfo, Offset, Size); +ContextMemberInfo* RCLSE::RecordAccess(ContextInfo* ClassifiedInfo, FEXCore::IR::RegisterClassType RegClass, uint32_t Offset, uint8_t Size, + LastAccessType AccessType, FEXCore::IR::OrderedNode* ValueNode, FEXCore::IR::OrderedNode* StoreNode) { + ContextMemberInfo* Info = FindMemberInfo(ClassifiedInfo, Offset, Size); return RecordAccess(Info, RegClass, Offset, Size, AccessType, ValueNode, StoreNode); } -bool RCLSE::ClassifyContextLoad(FEXCore::IR::IREmitter *IREmit, ContextInfo *LocalInfo, FEXCore::IR::RegisterClassType Class, uint32_t Offset, uint8_t Size, FEXCore::IR::OrderedNode *CodeNode, FEXCore::IR::NodeIterator BlockEnd) { +bool RCLSE::ClassifyContextLoad(FEXCore::IR::IREmitter* IREmit, ContextInfo* LocalInfo, FEXCore::IR::RegisterClassType Class, + uint32_t Offset, uint8_t Size, FEXCore::IR::OrderedNode* CodeNode, FEXCore::IR::NodeIterator BlockEnd) { auto Info = FindMemberInfo(LocalInfo, Offset, Size); ContextMemberInfo PreviousMemberInfoCopy = *Info; RecordAccess(Info, Class, Offset, Size, LastAccessType::READ, CodeNode); - if (PreviousMemberInfoCopy.AccessRegClass == Info->AccessRegClass && - PreviousMemberInfoCopy.AccessOffset == Info->AccessOffset && + if (PreviousMemberInfoCopy.AccessRegClass == Info->AccessRegClass && PreviousMemberInfoCopy.AccessOffset == Info->AccessOffset && PreviousMemberInfoCopy.AccessSize == Size) { // This optimizes two cases: // - Previous access was a load, and we have a redundant load of the same value. @@ -542,10 +547,10 @@ bool RCLSE::ClassifyContextLoad(FEXCore::IR::IREmitter *IREmit, ContextInfo *Loc return false; } -bool RCLSE::ClassifyContextStore(FEXCore::IR::IREmitter *IREmit, ContextInfo *LocalInfo, FEXCore::IR::RegisterClassType Class, uint32_t Offset, uint8_t Size, FEXCore::IR::OrderedNode *CodeNode, FEXCore::IR::OrderedNode *ValueNode) { +bool RCLSE::ClassifyContextStore(FEXCore::IR::IREmitter* IREmit, ContextInfo* LocalInfo, FEXCore::IR::RegisterClassType Class, + uint32_t Offset, uint8_t Size, FEXCore::IR::OrderedNode* CodeNode, FEXCore::IR::OrderedNode* ValueNode) { auto Info = FindMemberInfo(LocalInfo, Offset, Size); - RecordAccess(Info, Class, Offset, Size, LastAccessType::WRITE, ValueNode, - CodeNode); + RecordAccess(Info, Class, Offset, Size, LastAccessType::WRITE, ValueNode, CodeNode); // TODO: Optimize redundant stores. // ContextMemberInfo PreviousMemberInfoCopy = *Info; return false; @@ -583,7 +588,7 @@ bool RCLSE::ClassifyContextStore(FEXCore::IR::IREmitter *IREmit, ContextInfo *Lo * (%%176) StoreContext %175 i128, 0x10, 0xa0 */ -bool RCLSE::RedundantStoreLoadElimination(FEXCore::IR::IREmitter *IREmit) { +bool RCLSE::RedundantStoreLoadElimination(FEXCore::IR::IREmitter* IREmit) { using namespace FEXCore; using namespace FEXCore::IR; @@ -593,7 +598,7 @@ bool RCLSE::RedundantStoreLoadElimination(FEXCore::IR::IREmitter *IREmit) { // XXX: Walk the list and calculate the control flow - ContextInfo &LocalInfo = ClassifiedStruct; + ContextInfo& LocalInfo = ClassifiedStruct; for (auto [BlockNode, BlockHeader] : CurrentIR.GetBlocks()) { auto BlockOp = BlockHeader->CW(); @@ -605,35 +610,28 @@ bool RCLSE::RedundantStoreLoadElimination(FEXCore::IR::IREmitter *IREmit) { if (IROp->Op == OP_STORECONTEXT) { auto Op = IROp->CW(); Changed |= ClassifyContextStore(IREmit, &LocalInfo, Op->Class, Op->Offset, IROp->Size, CodeNode, CurrentIR.GetNode(Op->Value)); - } - else if (IROp->Op == OP_STOREREGISTER) { + } else if (IROp->Op == OP_STOREREGISTER) { auto Op = IROp->CW(); Changed |= ClassifyContextStore(IREmit, &LocalInfo, Op->Class, Op->Offset, IROp->Size, CodeNode, CurrentIR.GetNode(Op->Value)); - } - else if (IROp->Op == OP_LOADREGISTER) { + } else if (IROp->Op == OP_LOADREGISTER) { auto Op = IROp->CW(); Changed |= ClassifyContextLoad(IREmit, &LocalInfo, Op->Class, Op->Offset, IROp->Size, CodeNode, BlockEnd); - } - else if (IROp->Op == OP_LOADCONTEXT) { + } else if (IROp->Op == OP_LOADCONTEXT) { auto Op = IROp->CW(); Changed |= ClassifyContextLoad(IREmit, &LocalInfo, Op->Class, Op->Offset, IROp->Size, CodeNode, BlockEnd); - } - else if (IROp->Op == OP_STOREFLAG) { + } else if (IROp->Op == OP_STOREFLAG) { const auto Op = IROp->CW(); const auto FlagOffset = offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag; auto Info = FindMemberInfo(&LocalInfo, FlagOffset, 1); auto LastStoreNode = Info->StoreNode; - RecordAccess(&LocalInfo, FEXCore::IR::GPRClass, FlagOffset, - 1, LastAccessType::WRITE, CurrentIR.GetNode(Op->Header.Args[0]), CodeNode); + RecordAccess(&LocalInfo, FEXCore::IR::GPRClass, FlagOffset, 1, LastAccessType::WRITE, CurrentIR.GetNode(Op->Header.Args[0]), CodeNode); // Flags don't alias, so we can take the simple route here. Kill any flags that have been overwritten - if (LastStoreNode != nullptr) - { + if (LastStoreNode != nullptr) { IREmit->Remove(LastStoreNode); Changed = true; } - } - else if (IROp->Op == OP_INVALIDATEFLAGS) { + } else if (IROp->Op == OP_INVALIDATEFLAGS) { auto Op = IROp->CW(); // Loop through non-reserved flag stores and eliminate unused ones. @@ -647,23 +645,20 @@ bool RCLSE::RedundantStoreLoadElimination(FEXCore::IR::IREmitter *IREmit) { auto LastStoreNode = Info->StoreNode; // Flags don't alias, so we can take the simple route here. Kill any flags that have been invalidated without a read. - if (LastStoreNode != nullptr) - { + if (LastStoreNode != nullptr) { IREmit->SetWriteCursor(CodeNode); - RecordAccess(&LocalInfo, FEXCore::IR::GPRClass, FlagOffset, - 1, LastAccessType::WRITE, IREmit->_Constant(0), CodeNode); + RecordAccess(&LocalInfo, FEXCore::IR::GPRClass, FlagOffset, 1, LastAccessType::WRITE, IREmit->_Constant(0), CodeNode); IREmit->Remove(LastStoreNode); Changed = true; } } - } - else if (IROp->Op == OP_LOADFLAG) { + } else if (IROp->Op == OP_LOADFLAG) { const auto Op = IROp->CW(); const auto FlagOffset = offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag; auto Info = FindMemberInfo(&LocalInfo, FlagOffset, 1); LastAccessType LastAccess = Info->Accessed; - OrderedNode *LastValueNode = Info->ValueNode; + OrderedNode* LastValueNode = Info->ValueNode; if (IsWriteAccess(LastAccess)) { // 1 byte so always a full write // If the last store matches this load value then we can replace the loaded value with the previous valid one @@ -671,21 +666,17 @@ bool RCLSE::RedundantStoreLoadElimination(FEXCore::IR::IREmitter *IREmit) { IREmit->ReplaceAllUsesWith(CodeNode, LastValueNode); RecordAccess(Info, FEXCore::IR::GPRClass, FlagOffset, 1, LastAccessType::READ, LastValueNode); Changed = true; - } - else if (IsReadAccess(LastAccess)) { + } else if (IsReadAccess(LastAccess)) { IREmit->ReplaceAllUsesWith(CodeNode, LastValueNode); RecordAccess(Info, FEXCore::IR::GPRClass, FlagOffset, 1, LastAccessType::READ, LastValueNode); Changed = true; } - } - else if (IROp->Op == OP_SYSCALL || - IROp->Op == OP_INLINESYSCALL) { - FEXCore::IR::SyscallFlags Flags{}; + } else if (IROp->Op == OP_SYSCALL || IROp->Op == OP_INLINESYSCALL) { + FEXCore::IR::SyscallFlags Flags {}; if (IROp->Op == OP_SYSCALL) { auto Op = IROp->C(); Flags = Op->Flags; - } - else { + } else { auto Op = IROp->C(); Flags = Op->Flags; } @@ -694,10 +685,7 @@ bool RCLSE::RedundantStoreLoadElimination(FEXCore::IR::IREmitter *IREmit) { // We can't track through these ResetClassificationAccesses(&LocalInfo, SupportsAVX); } - } - else if (IROp->Op == OP_STORECONTEXTINDEXED || - IROp->Op == OP_LOADCONTEXTINDEXED || - IROp->Op == OP_BREAK) { + } else if (IROp->Op == OP_STORECONTEXTINDEXED || IROp->Op == OP_LOADCONTEXTINDEXED || IROp->Op == OP_BREAK) { // We can't track through these ResetClassificationAccesses(&LocalInfo, SupportsAVX); } @@ -709,12 +697,12 @@ bool RCLSE::RedundantStoreLoadElimination(FEXCore::IR::IREmitter *IREmit) { return Changed; } -bool RCLSE::Run(FEXCore::IR::IREmitter *IREmit) { +bool RCLSE::Run(FEXCore::IR::IREmitter* IREmit) { FEXCORE_PROFILE_SCOPED("PassManager::RCLSE"); bool Changed = false; // Run up to 5 times - for( int i = 0; i < 5 && RedundantStoreLoadElimination(IREmit); i++) { + for (int i = 0; i < 5 && RedundantStoreLoadElimination(IREmit); i++) { Changed = true; DCE->Run(IREmit); } @@ -722,7 +710,7 @@ bool RCLSE::Run(FEXCore::IR::IREmitter *IREmit) { return Changed; } -} +} // namespace namespace FEXCore::IR { @@ -730,4 +718,4 @@ fextl::unique_ptr CreateContextLoadStoreElimination(bool Supp return fextl::make_unique(SupportsAVX); } -} +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/IR/Passes/DeadStoreElimination.cpp b/FEXCore/Source/Interface/IR/Passes/DeadStoreElimination.cpp index beee2b5bc4..572122e13e 100644 --- a/FEXCore/Source/Interface/IR/Passes/DeadStoreElimination.cpp +++ b/FEXCore/Source/Interface/IR/Passes/DeadStoreElimination.cpp @@ -26,9 +26,10 @@ constexpr int PropagationRounds = 5; class DeadStoreElimination final : public FEXCore::IR::Pass { public: - explicit DeadStoreElimination(bool SupportsAVX_) : SupportsAVX{SupportsAVX_} {} + explicit DeadStoreElimination(bool SupportsAVX_) + : SupportsAVX {SupportsAVX_} {} - bool Run(IREmitter *IREmit) override; + bool Run(IREmitter* IREmit) override; private: bool SupportsAVX; @@ -36,29 +37,28 @@ class DeadStoreElimination final : public FEXCore::IR::Pass { bool IsFPR(uint32_t Offset) const { const auto [begin, end] = [this]() -> std::pair { if (SupportsAVX) { - return { - offsetof(FEXCore::Core::CpuStateFrame, State.xmm.avx.data[0][0]), - offsetof(FEXCore::Core::CpuStateFrame, State.xmm.avx.data[16][0]) - }; + return {offsetof(FEXCore::Core::CpuStateFrame, State.xmm.avx.data[0][0]), + offsetof(FEXCore::Core::CpuStateFrame, State.xmm.avx.data[16][0])}; } else { - return { - offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[0][0]), - offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[16][0]) - }; + return {offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[0][0]), + offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[16][0])}; } }(); - if (Offset < begin || Offset >= end) + if (Offset < begin || Offset >= end) { return false; + } return true; } bool IsTrackedWriteFPR(uint32_t Offset, uint8_t Size) const { - if (Size != 16 && Size != 8 && Size != 4) + if (Size != 16 && Size != 8 && Size != 4) { return false; - if (Offset & 15) + } + if (Offset & 15) { return false; + } return IsFPR(Offset); } @@ -70,56 +70,61 @@ class DeadStoreElimination final : public FEXCore::IR::Pass { const auto begin = offsetof(Core::CpuStateFrame, State.xmm.avx.data[0][0]); - const auto regSize = SupportsAVX ? Core::CPUState::XMM_AVX_REG_SIZE - : Core::CPUState::XMM_SSE_REG_SIZE; + const auto regSize = SupportsAVX ? Core::CPUState::XMM_AVX_REG_SIZE : Core::CPUState::XMM_SSE_REG_SIZE; const auto regn = (Offset - begin) / regSize; const auto bitn = regn * 3; - if (!IsTrackedWriteFPR(Offset, Size)) + if (!IsTrackedWriteFPR(Offset, Size)) { return 7UL << (bitn); + } - if (Size == 16) - return 7UL << (bitn); - else if (Size == 8) - return 3UL << (bitn); - else if (Size == 4) - return 1UL << (bitn); - else + if (Size == 16) { + return 7UL << (bitn); + } else if (Size == 8) { + return 3UL << (bitn); + } else if (Size == 4) { + return 1UL << (bitn); + } else { LOGMAN_MSG_A_FMT("Unexpected FPR size {}", Size); + } return 7UL << (bitn); // Return maximum on failure case } }; struct FlagInfo { - uint64_t reads { 0 }; - uint64_t writes { 0 }; - uint64_t kill { 0 }; + uint64_t reads {0}; + uint64_t writes {0}; + uint64_t kill {0}; }; struct GPRInfo { - uint32_t reads { 0 }; - uint32_t writes { 0 }; - uint32_t kill { 0 }; + uint32_t reads {0}; + uint32_t writes {0}; + uint32_t kill {0}; }; bool IsFullGPR(uint32_t Offset, uint8_t Size) { - if (Size != 8) + if (Size != 8) { return false; - if (Offset & 7) + } + if (Offset & 7) { return false; + } - if (Offset < 8 || Offset >= (17 * 8)) + if (Offset < 8 || Offset >= (17 * 8)) { return false; + } return true; } bool IsGPR(uint32_t Offset) { - if (Offset < 8 || Offset >= (17 * 8)) + if (Offset < 8 || Offset >= (17 * 8)) { return false; + } return true; } @@ -129,13 +134,13 @@ uint32_t GPRBit(uint32_t Offset) { return 0; } - return 1 << ((Offset - 8)/8); + return 1 << ((Offset - 8) / 8); } struct FPRInfo { - uint64_t reads { 0 }; - uint64_t writes { 0 }; - uint64_t kill { 0 }; + uint64_t reads {0}; + uint64_t writes {0}; + uint64_t kill {0}; }; struct Info { @@ -155,7 +160,7 @@ struct Info { * Third pass removes the dead stores. * */ -bool DeadStoreElimination::Run(IREmitter *IREmit) { +bool DeadStoreElimination::Run(IREmitter* IREmit) { FEXCORE_PROFILE_SCOPED("PassManager::DSE"); fextl::unordered_map InfoMap; @@ -170,21 +175,23 @@ bool DeadStoreElimination::Run(IREmitter *IREmit) { for (auto [BlockNode, BlockIROp] : CurrentIR.GetBlocks()) { for (auto [CodeNode, IROp] : CurrentIR.GetCode(BlockNode)) { - auto ClassifyRegisterStore = [this](Info &BlockInfo, uint32_t Offset, uint8_t Size) { + auto ClassifyRegisterStore = [this](Info& BlockInfo, uint32_t Offset, uint8_t Size) { //// GPR //// - if (IsFullGPR(Offset, Size)) + if (IsFullGPR(Offset, Size)) { BlockInfo.gpr.writes |= GPRBit(Offset); - else + } else { BlockInfo.gpr.reads |= GPRBit(Offset); + } //// FPR //// - if (IsTrackedWriteFPR(Offset, Size)) + if (IsTrackedWriteFPR(Offset, Size)) { BlockInfo.fpr.writes |= FPRBit(Offset, Size); - else + } else { BlockInfo.fpr.reads |= FPRBit(Offset, Size); + } }; - auto ClassifyRegisterLoad = [this](Info &BlockInfo, uint32_t Offset, uint8_t Size) { + auto ClassifyRegisterLoad = [this](Info& BlockInfo, uint32_t Offset, uint8_t Size) { //// GPR //// BlockInfo.gpr.reads |= GPRBit(Offset); @@ -199,7 +206,7 @@ bool DeadStoreElimination::Run(IREmitter *IREmit) { auto& BlockInfo = InfoMap[BlockNode]; BlockInfo.flag.writes |= 1UL << Op->Flag; - } else if (IROp->Op == OP_INVALIDATEFLAGS) { + } else if (IROp->Op == OP_INVALIDATEFLAGS) { auto Op = IROp->C(); auto& BlockInfo = InfoMap[BlockNode]; @@ -229,8 +236,7 @@ bool DeadStoreElimination::Run(IREmitter *IREmit) { // Pass 2 // Compute flags/gprs/fprs that are stored, but always ovewritten in the next blocks // Propagate the information a few times to eliminate more - for (int i = 0; i < PropagationRounds; i++) - { + for (int i = 0; i < PropagationRounds; i++) { for (auto [BlockNode, BlockIROp] : CurrentIR.GetBlocks()) { auto CodeBlock = BlockIROp->C(); @@ -238,7 +244,7 @@ bool DeadStoreElimination::Run(IREmitter *IREmit) { if (IROp->Op == OP_JUMP) { auto Op = IROp->C(); - OrderedNode *TargetNode = CurrentIR.GetNode(Op->Header.Args[0]); + OrderedNode* TargetNode = CurrentIR.GetNode(Op->Header.Args[0]); auto& BlockInfo = InfoMap[BlockNode]; auto& TargetInfo = InfoMap[TargetNode]; @@ -272,8 +278,8 @@ bool DeadStoreElimination::Run(IREmitter *IREmit) { } else if (IROp->Op == OP_CONDJUMP) { auto Op = IROp->C(); - OrderedNode *TrueTargetNode = CurrentIR.GetNode(Op->TrueBlock); - OrderedNode *FalseTargetNode = CurrentIR.GetNode(Op->FalseBlock); + OrderedNode* TrueTargetNode = CurrentIR.GetNode(Op->TrueBlock); + OrderedNode* FalseTargetNode = CurrentIR.GetNode(Op->FalseBlock); auto& BlockInfo = InfoMap[BlockNode]; auto& TrueTargetInfo = InfoMap[TrueTargetNode]; @@ -317,8 +323,9 @@ bool DeadStoreElimination::Run(IREmitter *IREmit) { for (auto [BlockNode, BlockIROp] : CurrentIR.GetBlocks()) { for (auto [CodeNode, IROp] : CurrentIR.GetCode(BlockNode)) { - auto RemoveDeadRegisterStore = [this](FEXCore::IR::IREmitter *IREmit, FEXCore::IR::OrderedNode *CodeNode, Info &BlockInfo, uint32_t Offset, uint8_t Size) -> bool { - bool Changed{}; + auto RemoveDeadRegisterStore = [this](FEXCore::IR::IREmitter* IREmit, FEXCore::IR::OrderedNode* CodeNode, Info& BlockInfo, + uint32_t Offset, uint8_t Size) -> bool { + bool Changed {}; //// GPRs //// // If this OP_STOREREGISTER is never read, remove it if (BlockInfo.gpr.kill & GPRBit(Offset)) { @@ -365,4 +372,4 @@ fextl::unique_ptr CreateDeadStoreElimination(bool SupportsAVX return fextl::make_unique(SupportsAVX); } -} +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/IR/Passes/IRCompaction.cpp b/FEXCore/Source/Interface/IR/Passes/IRCompaction.cpp index 79ae90c7cb..64db3d6b5d 100644 --- a/FEXCore/Source/Interface/IR/Passes/IRCompaction.cpp +++ b/FEXCore/Source/Interface/IR/Passes/IRCompaction.cpp @@ -33,27 +33,27 @@ static_assert(sizeof(RemapNode) == 4); class IRCompaction final : public FEXCore::IR::Pass { public: - IRCompaction(FEXCore::Utils::IntrusivePooledAllocator &Allocator); - bool Run(IREmitter *IREmit) override; + IRCompaction(FEXCore::Utils::IntrusivePooledAllocator& Allocator); + bool Run(IREmitter* IREmit) override; private: static constexpr size_t AlignSize = 0x2000; OpDispatchBuilder LocalBuilder; fextl::vector OldToNewRemap; struct CodeBlockData { - OrderedNode *OldNode; - OrderedNode *NewNode; + OrderedNode* OldNode; + OrderedNode* NewNode; }; - fextl::vector GeneratedCodeBlocks{}; + fextl::vector GeneratedCodeBlocks {}; }; -IRCompaction::IRCompaction(FEXCore::Utils::IntrusivePooledAllocator &Allocator) +IRCompaction::IRCompaction(FEXCore::Utils::IntrusivePooledAllocator& Allocator) : LocalBuilder {Allocator} { OldToNewRemap.resize(AlignSize); } -bool IRCompaction::Run(IREmitter *IREmit) { +bool IRCompaction::Run(IREmitter* IREmit) { FEXCORE_PROFILE_SCOPED("PassManager::IRCompaction"); LocalBuilder.ReownOrClaimBuffer(); @@ -64,9 +64,9 @@ bool IRCompaction::Run(IREmitter *IREmit) { if (OldToNewRemap.size() < NodeCount) { OldToNewRemap.resize(std::max(OldToNewRemap.size() * 2U, AlignUp(NodeCount, AlignSize))); } - #ifndef NDEBUG - memset(&OldToNewRemap.at(0), 0xFF, NodeCount * sizeof(RemapNode)); - #endif +#ifndef NDEBUG + memset(&OldToNewRemap.at(0), 0xFF, NodeCount * sizeof(RemapNode)); +#endif GeneratedCodeBlocks.clear(); @@ -99,7 +99,8 @@ bool IRCompaction::Run(IREmitter *IREmit) { // Zero is always zero(invalid) OldToNewRemap[0].NodeID.Invalidate(); - auto LocalHeaderOp = LocalBuilder._IRHeader(OrderedNodeWrapper::WrapOffset(0).GetNode(ListBegin), HeaderOp->OriginalRIP, HeaderOp->BlockCount, HeaderOp->NumHostInstructions); + auto LocalHeaderOp = LocalBuilder._IRHeader(OrderedNodeWrapper::WrapOffset(0).GetNode(ListBegin), HeaderOp->OriginalRIP, + HeaderOp->BlockCount, HeaderOp->NumHostInstructions); OldToNewRemap[CurrentIR.GetID(HeaderNode).Value].NodeID = LocalIR.GetID(LocalHeaderOp.Node); @@ -110,7 +111,7 @@ bool IRCompaction::Run(IREmitter *IREmit) { auto LocalBlockIRNode = LocalBuilder._CodeBlock(LocalHeaderOp, LocalHeaderOp); // Use LocalHeaderOp as a dummy arg for now OldToNewRemap[CurrentIR.GetID(BlockNode).Value].NodeID = LocalIR.GetID(LocalBlockIRNode.Node); - GeneratedCodeBlocks.emplace_back(CodeBlockData{BlockNode, LocalBlockIRNode}); + GeneratedCodeBlocks.emplace_back(CodeBlockData {BlockNode, LocalBlockIRNode}); } // Link the IRHeader to the first code block @@ -119,13 +120,13 @@ bool IRCompaction::Run(IREmitter *IREmit) { { // Copy all of our IR ops over to the new location - for (auto &Block : GeneratedCodeBlocks) { + for (auto& Block : GeneratedCodeBlocks) { // Isolate block contents from any previous headers/blocks LocalBuilder.SetWriteCursor(nullptr); - CodeBlockData FirstNode{}; - CodeBlockData LastNode{}; + CodeBlockData FirstNode {}; + CodeBlockData LastNode {}; uint32_t i {}; for (auto [CodeNode, IROp] : CurrentIR.GetCode(Block.OldNode)) { const size_t OpSize = FEXCore::IR::GetSize(IROp->Op); @@ -167,7 +168,7 @@ bool IRCompaction::Run(IREmitter *IREmit) { { // Fixup the arguments of all the IROps - for (auto &Block : GeneratedCodeBlocks) { + for (auto& Block : GeneratedCodeBlocks) { #if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED auto BlockIROp = LocalIR.GetOp(Block.NewNode); LOGMAN_THROW_AA_FMT(BlockIROp->Header.Op == OP_CODEBLOCK, "IR type failed to be a code block"); @@ -183,10 +184,9 @@ bool IRCompaction::Run(IREmitter *IREmit) { const auto OldArg = LocalIROp->Args[i].ID(); const auto NewArg = OldToNewRemap[OldArg.Value].NodeID; - #ifndef NDEBUG - LOGMAN_THROW_A_FMT(NewArg.Value != UINT32_MAX, - "Tried remapping unfound node %{}", OldArg); - #endif +#ifndef NDEBUG + LOGMAN_THROW_A_FMT(NewArg.Value != UINT32_MAX, "Tried remapping unfound node %{}", OldArg); +#endif LocalIROp->Args[i].NodeOffset = NewArg.Value * sizeof(OrderedNode); } @@ -222,8 +222,8 @@ bool IRCompaction::Run(IREmitter *IREmit) { return true; } -fextl::unique_ptr CreateIRCompaction(FEXCore::Utils::IntrusivePooledAllocator &Allocator) { +fextl::unique_ptr CreateIRCompaction(FEXCore::Utils::IntrusivePooledAllocator& Allocator) { return fextl::make_unique(Allocator); } -} +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/IR/Passes/IRDumperPass.cpp b/FEXCore/Source/Interface/IR/Passes/IRDumperPass.cpp index e3f8c2d2fd..235b87b1d1 100644 --- a/FEXCore/Source/Interface/IR/Passes/IRDumperPass.cpp +++ b/FEXCore/Source/Interface/IR/Passes/IRDumperPass.cpp @@ -18,39 +18,36 @@ namespace FEXCore::IR::Debug { class IRDumper final : public FEXCore::IR::Pass { public: IRDumper(); - bool Run(IREmitter *IREmit) override; + bool Run(IREmitter* IREmit) override; private: FEX_CONFIG_OPT(DumpIR, DUMPIR); - bool DumpToFile{}; - bool DumpToLog{}; + bool DumpToFile {}; + bool DumpToLog {}; }; IRDumper::IRDumper() { const auto DumpIRStr = DumpIR(); if (DumpIRStr == "stderr" || DumpIRStr == "stdout" || DumpIRStr == "no") { // Intentionally do nothing - } - else if (DumpIRStr == "server") { + } else if (DumpIRStr == "server") { DumpToLog = true; - } - else { + } else { DumpToFile = true; } } -bool IRDumper::Run(IREmitter *IREmit) { +bool IRDumper::Run(IREmitter* IREmit) { auto RAPass = Manager->GetPass("RA"); - IR::RegisterAllocationData* RA{}; + IR::RegisterAllocationData* RA {}; if (RAPass) { RA = RAPass->GetAllocationData(); } - FEXCore::File::File FD{}; + FEXCore::File::File FD {}; if (DumpIR() == "stderr") { FD = FEXCore::File::File::GetStdERR(); - } - else if (DumpIR() == "stdout") { + } else if (DumpIR() == "stdout") { FD = FEXCore::File::File::GetStdOUT(); } @@ -62,9 +59,7 @@ bool IRDumper::Run(IREmitter *IREmit) { if (DumpToFile) { const auto fileName = fextl::fmt::format("{}/{:x}{}", DumpIR(), HeaderOp->OriginalRIP, RA ? "-post.ir" : "-pre.ir"); FD = FEXCore::File::File(fileName.c_str(), - FEXCore::File::FileModes::WRITE | - FEXCore::File::FileModes::CREATE | - FEXCore::File::FileModes::TRUNCATE); + FEXCore::File::FileModes::WRITE | FEXCore::File::FileModes::CREATE | FEXCore::File::FileModes::TRUNCATE); } if (FD.IsValid() || DumpToLog) { @@ -72,8 +67,7 @@ bool IRDumper::Run(IREmitter *IREmit) { FEXCore::IR::Dump(&out, &IR, RA); if (FD.IsValid()) { fextl::fmt::print(FD, "IR-{} 0x{:x}:\n{}\n@@@@@\n", RA ? "post" : "pre", HeaderOp->OriginalRIP, out.str()); - } - else { + } else { LogMan::Msg::IFmt("IR-{} 0x{:x}:\n{}\n@@@@@\n", RA ? "post" : "pre", HeaderOp->OriginalRIP, out.str()); } } @@ -84,4 +78,4 @@ bool IRDumper::Run(IREmitter *IREmit) { fextl::unique_ptr CreateIRDumper() { return fextl::make_unique(); } -} +} // namespace FEXCore::IR::Debug diff --git a/FEXCore/Source/Interface/IR/Passes/IRValidation.cpp b/FEXCore/Source/Interface/IR/Passes/IRValidation.cpp index 655b0a96c0..7eb6178fa9 100644 --- a/FEXCore/Source/Interface/IR/Passes/IRValidation.cpp +++ b/FEXCore/Source/Interface/IR/Passes/IRValidation.cpp @@ -33,7 +33,7 @@ IRValidation::~IRValidation() { NodeIsLive.Free(); } -bool IRValidation::Run(IREmitter *IREmit) { +bool IRValidation::Run(IREmitter* IREmit) { FEXCORE_PROFILE_SCOPED("PassManager::IRValidation"); bool HadError = false; @@ -58,7 +58,7 @@ bool IRValidation::Run(IREmitter *IREmit) { LOGMAN_THROW_A_FMT(HeaderOp->Header.Op == OP_IRHEADER, "First op wasn't IRHeader"); #endif - IR::RegisterAllocationData * RAData{}; + IR::RegisterAllocationData* RAData {}; if (Manager->HasPass("RA")) { RAData = Manager->GetPass("RA")->GetAllocationData(); } @@ -74,7 +74,7 @@ bool IRValidation::Run(IREmitter *IREmit) { } const auto BlockID = CurrentIR.GetID(BlockNode); - BlockInfo *CurrentBlock = &OffsetToBlockMap.try_emplace(BlockID).first->second; + BlockInfo* CurrentBlock = &OffsetToBlockMap.try_emplace(BlockID).first->second; for (auto [CodeNode, IROp] : CurrentIR.GetCode(BlockNode)) { const auto ID = CurrentIR.GetID(CodeNode); @@ -98,7 +98,7 @@ bool IRValidation::Run(IREmitter *IREmit) { auto PhyReg = RAData->GetNodeRegister(ID); FEXCore::IR::RegisterClassType ExpectedClass = IR::GetRegClass(IROp->Op); - FEXCore::IR::RegisterClassType AssignedClass = FEXCore::IR::RegisterClassType{PhyReg.Class}; + FEXCore::IR::RegisterClassType AssignedClass = FEXCore::IR::RegisterClassType {PhyReg.Class}; // If no register class was assigned if (AssignedClass == IR::InvalidClass) { @@ -113,10 +113,10 @@ bool IRValidation::Run(IREmitter *IREmit) { } // Assigned class wasn't the expected class and it is a non-complex op - if (AssignedClass != ExpectedClass && - ExpectedClass != IR::ComplexClass) { + if (AssignedClass != ExpectedClass && ExpectedClass != IR::ComplexClass) { HadWarning |= true; - Warnings << "%" << ID << ": Destination had register class " << AssignedClass.Val << " When register class " << ExpectedClass.Val << " Was expected" << std::endl; + Warnings << "%" << ID << ": Destination had register class " << AssignedClass.Val << " When register class " + << ExpectedClass.Val << " Was expected" << std::endl; } } } @@ -146,60 +146,57 @@ bool IRValidation::Run(IREmitter *IREmit) { NodeIsLive.Set(ID.Value); switch (IROp->Op) { - case IR::OP_EXITFUNCTION: { - CurrentBlock->HasExit = true; + case IR::OP_EXITFUNCTION: { + CurrentBlock->HasExit = true; break; - } - case IR::OP_CONDJUMP: { - auto Op = IROp->C(); - - OrderedNode *TrueTargetNode = CurrentIR.GetNode(Op->TrueBlock); - OrderedNode *FalseTargetNode = CurrentIR.GetNode(Op->FalseBlock); + } + case IR::OP_CONDJUMP: { + auto Op = IROp->C(); - CurrentBlock->Successors.emplace_back(TrueTargetNode); - CurrentBlock->Successors.emplace_back(FalseTargetNode); + OrderedNode* TrueTargetNode = CurrentIR.GetNode(Op->TrueBlock); + OrderedNode* FalseTargetNode = CurrentIR.GetNode(Op->FalseBlock); - FEXCore::IR::IROp_Header const *TrueTargetOp = CurrentIR.GetOp(TrueTargetNode); - FEXCore::IR::IROp_Header const *FalseTargetOp = CurrentIR.GetOp(FalseTargetNode); + CurrentBlock->Successors.emplace_back(TrueTargetNode); + CurrentBlock->Successors.emplace_back(FalseTargetNode); - if (TrueTargetOp->Op != OP_CODEBLOCK) { - HadError |= true; - Errors << "CondJump %" << ID << ": True Target Jumps to Op that isn't the begining of a block" << std::endl; - } - else { - auto Block = OffsetToBlockMap.try_emplace(Op->TrueBlock.ID()).first; - Block->second.Predecessors.emplace_back(BlockNode); - } + const FEXCore::IR::IROp_Header* TrueTargetOp = CurrentIR.GetOp(TrueTargetNode); + const FEXCore::IR::IROp_Header* FalseTargetOp = CurrentIR.GetOp(FalseTargetNode); - if (FalseTargetOp->Op != OP_CODEBLOCK) { - HadError |= true; - Errors << "CondJump %" << ID << ": False Target Jumps to Op that isn't the begining of a block" << std::endl; - } - else { - auto Block = OffsetToBlockMap.try_emplace(Op->FalseBlock.ID()).first; - Block->second.Predecessors.emplace_back(BlockNode); - } + if (TrueTargetOp->Op != OP_CODEBLOCK) { + HadError |= true; + Errors << "CondJump %" << ID << ": True Target Jumps to Op that isn't the begining of a block" << std::endl; + } else { + auto Block = OffsetToBlockMap.try_emplace(Op->TrueBlock.ID()).first; + Block->second.Predecessors.emplace_back(BlockNode); + } - break; + if (FalseTargetOp->Op != OP_CODEBLOCK) { + HadError |= true; + Errors << "CondJump %" << ID << ": False Target Jumps to Op that isn't the begining of a block" << std::endl; + } else { + auto Block = OffsetToBlockMap.try_emplace(Op->FalseBlock.ID()).first; + Block->second.Predecessors.emplace_back(BlockNode); } - case IR::OP_JUMP: { - auto Op = IROp->C(); - OrderedNode *TargetNode = CurrentIR.GetNode(Op->Header.Args[0]); - CurrentBlock->Successors.emplace_back(TargetNode); - FEXCore::IR::IROp_Header const *TargetOp = CurrentIR.GetOp(TargetNode); - if (TargetOp->Op != OP_CODEBLOCK) { - HadError |= true; - Errors << "Jump %" << ID << ": Jump to Op that isn't the begining of a block" << std::endl; - } - else { - auto Block = OffsetToBlockMap.try_emplace(Op->Header.Args[0].ID()).first; - Block->second.Predecessors.emplace_back(BlockNode); - } - break; + break; + } + case IR::OP_JUMP: { + auto Op = IROp->C(); + OrderedNode* TargetNode = CurrentIR.GetNode(Op->Header.Args[0]); + CurrentBlock->Successors.emplace_back(TargetNode); + + const FEXCore::IR::IROp_Header* TargetOp = CurrentIR.GetOp(TargetNode); + if (TargetOp->Op != OP_CODEBLOCK) { + HadError |= true; + Errors << "Jump %" << ID << ": Jump to Op that isn't the begining of a block" << std::endl; + } else { + auto Block = OffsetToBlockMap.try_emplace(Op->Header.Args[0].ID()).first; + Block->second.Predecessors.emplace_back(BlockNode); } - default: - // LOGMAN_MSG_A_FMT("Unknown IR Op: {}({})", IROp->Op, FEXCore::IR::GetName(IROp->Op)); + break; + } + default: + // LOGMAN_MSG_A_FMT("Unknown IR Op: {}({})", IROp->Op, FEXCore::IR::GetName(IROp->Op)); break; } } @@ -242,7 +239,7 @@ bool IRValidation::Run(IREmitter *IREmit) { } for (uint32_t i = 0; i < CurrentIR.GetSSACount(); i++) { - auto [Node, IROp] = CurrentIR.at(IR::NodeID{i})(); + auto [Node, IROp] = CurrentIR.at(IR::NodeID {i})(); if (Node->NumUses != Uses[i] && IROp->Op != OP_CODEBLOCK && IROp->Op != OP_IRHEADER) { HadError |= true; Errors << "%" << i << " Has " << Uses[i] << " Uses, but reports " << Node->NumUses << std::endl; @@ -276,4 +273,4 @@ bool IRValidation::Run(IREmitter *IREmit) { fextl::unique_ptr CreateIRValidation() { return fextl::make_unique(); } -} +} // namespace FEXCore::IR::Validation diff --git a/FEXCore/Source/Interface/IR/Passes/InlineCallOptimization.cpp b/FEXCore/Source/Interface/IR/Passes/InlineCallOptimization.cpp index 8aef6ccc0c..e45c9e79ee 100644 --- a/FEXCore/Source/Interface/IR/Passes/InlineCallOptimization.cpp +++ b/FEXCore/Source/Interface/IR/Passes/InlineCallOptimization.cpp @@ -24,12 +24,12 @@ class InlineCallOptimization final : public FEXCore::IR::Pass { public: InlineCallOptimization(const FEXCore::CPUIDEmu* CPUID) : CPUID {CPUID} {} - bool Run(IREmitter *IREmit) override; + bool Run(IREmitter* IREmit) override; private: const FEXCore::CPUIDEmu* CPUID; }; -bool InlineCallOptimization::Run(IREmitter *IREmit) { +bool InlineCallOptimization::Run(IREmitter* IREmit) { FEXCORE_PROFILE_SCOPED("PassManager::SyscallOpt"); bool Changed = false; @@ -61,15 +61,10 @@ bool InlineCallOptimization::Run(IREmitter *IREmit) { if (SyscallDef.HostSyscallNumber != -1) { IREmit->SetWriteCursor(CodeNode); // Skip Args[0] since that is the syscallid - auto InlineSyscall = IREmit->_InlineSyscall( - CurrentIR.GetNode(IROp->Args[1]), - CurrentIR.GetNode(IROp->Args[2]), - CurrentIR.GetNode(IROp->Args[3]), - CurrentIR.GetNode(IROp->Args[4]), - CurrentIR.GetNode(IROp->Args[5]), - CurrentIR.GetNode(IROp->Args[6]), - SyscallDef.HostSyscallNumber, - Op->Flags); + auto InlineSyscall = + IREmit->_InlineSyscall(CurrentIR.GetNode(IROp->Args[1]), CurrentIR.GetNode(IROp->Args[2]), CurrentIR.GetNode(IROp->Args[3]), + CurrentIR.GetNode(IROp->Args[4]), CurrentIR.GetNode(IROp->Args[5]), CurrentIR.GetNode(IROp->Args[6]), + SyscallDef.HostSyscallNumber, Op->Flags); // Replace all syscall uses with this inline one IREmit->ReplaceAllUsesWith(CodeNode, InlineSyscall); @@ -82,11 +77,10 @@ bool InlineCallOptimization::Run(IREmitter *IREmit) { Changed = true; } - } - else if (IROp->Op == FEXCore::IR::OP_CPUID) { + } else if (IROp->Op == FEXCore::IR::OP_CPUID) { auto Op = IROp->CW(); - uint64_t ConstantFunction{}, ConstantLeaf{}; + uint64_t ConstantFunction {}, ConstantLeaf {}; bool IsConstantFunction = IREmit->IsValueConstant(Op->Function, &ConstantFunction); bool IsConstantLeaf = IREmit->IsValueConstant(Op->Leaf, &ConstantLeaf); // If the CPUID function is constant then we can try and optimize. @@ -114,12 +108,12 @@ bool InlineCallOptimization::Run(IREmitter *IREmit) { else if (IROp->Op == FEXCore::IR::OP_XGETBV) { auto Op = IROp->CW(); - uint64_t ConstantFunction{}; - if (IREmit->IsValueConstant(Op->Function, &ConstantFunction) && - CPUID->DoesXCRFunctionReportConstantData(ConstantFunction)) { + uint64_t ConstantFunction {}; + if (IREmit->IsValueConstant(Op->Function, &ConstantFunction) && CPUID->DoesXCRFunctionReportConstantData(ConstantFunction)) { const auto ConstantXCRResult = CPUID->RunXCRFunction(ConstantFunction); IREmit->SetWriteCursor(CodeNode); - auto ElementPair = IREmit->_CreateElementPair(IR::OpSize::i64Bit, IREmit->_Constant(ConstantXCRResult.eax), IREmit->_Constant(ConstantXCRResult.edx)); + auto ElementPair = + IREmit->_CreateElementPair(IR::OpSize::i64Bit, IREmit->_Constant(ConstantXCRResult.eax), IREmit->_Constant(ConstantXCRResult.edx)); // Replace all xgetbv uses with this inline one IREmit->ReplaceAllUsesWith(CodeNode, ElementPair); Changed = true; @@ -133,4 +127,4 @@ fextl::unique_ptr CreateInlineCallOptimization(const FEXCore: return fextl::make_unique(CPUID); } -} +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/IR/Passes/LongDivideRemovalPass.cpp b/FEXCore/Source/Interface/IR/Passes/LongDivideRemovalPass.cpp index 00a3c446bd..455dff3c79 100644 --- a/FEXCore/Source/Interface/IR/Passes/LongDivideRemovalPass.cpp +++ b/FEXCore/Source/Interface/IR/Passes/LongDivideRemovalPass.cpp @@ -19,13 +19,13 @@ namespace FEXCore::IR { class LongDivideEliminationPass final : public FEXCore::IR::Pass { public: - bool Run(IREmitter *IREmit) override; + bool Run(IREmitter* IREmit) override; private: - bool IsZeroOp(IREmitter *IREmit, OrderedNodeWrapper Arg); - bool IsSextOp(IREmitter *IREmit, OrderedNodeWrapper Lower, OrderedNodeWrapper Upper); + bool IsZeroOp(IREmitter* IREmit, OrderedNodeWrapper Arg); + bool IsSextOp(IREmitter* IREmit, OrderedNodeWrapper Lower, OrderedNodeWrapper Upper); }; -bool LongDivideEliminationPass::IsZeroOp(IREmitter *IREmit, OrderedNodeWrapper Arg) { +bool LongDivideEliminationPass::IsZeroOp(IREmitter* IREmit, OrderedNodeWrapper Arg) { uint64_t Value; if (IREmit->IsValueConstant(Arg, &Value)) { @@ -35,11 +35,11 @@ bool LongDivideEliminationPass::IsZeroOp(IREmitter *IREmit, OrderedNodeWrapper A return false; } -bool LongDivideEliminationPass::IsSextOp(IREmitter *IREmit, OrderedNodeWrapper Lower, OrderedNodeWrapper Upper) { +bool LongDivideEliminationPass::IsSextOp(IREmitter* IREmit, OrderedNodeWrapper Lower, OrderedNodeWrapper Upper) { // We need to check if the upper source is a sext of the lower source auto UpperIROp = IREmit->GetOpHeader(Upper); if (UpperIROp->Op == OP_SBFE) { - auto Op = UpperIROp->C(); + auto Op = UpperIROp->C(); if (Op->Width == 1 && Op->lsb == 63) { // CQO: OrderedNode *Upper = _Sbfe(1, Size * 8 - 1, Src); // If the lower is the upper in this case then it can be optimized @@ -49,7 +49,7 @@ bool LongDivideEliminationPass::IsSextOp(IREmitter *IREmit, OrderedNodeWrapper L return false; } -bool LongDivideEliminationPass::Run(IREmitter *IREmit) { +bool LongDivideEliminationPass::Run(IREmitter* IREmit) { FEXCORE_PROFILE_SCOPED("PassManager::LDE"); bool Changed = false; @@ -59,41 +59,36 @@ bool LongDivideEliminationPass::Run(IREmitter *IREmit) { for (auto [BlockNode, BlockHeader] : CurrentIR.GetBlocks()) { for (auto [CodeNode, IROp] : CurrentIR.GetCode(BlockNode)) { if (IROp->Size == 8) { - if (IROp->Op == OP_LDIV || - IROp->Op == OP_LREM) { + if (IROp->Op == OP_LDIV || IROp->Op == OP_LREM) { auto Op = IROp->C(); // Check upper Op to see if it came from a CQO // CQO: OrderedNode *Upper = _Sbfe(1, Size * 8 - 1, Src); // If it does then it we only need a 64bit SDIV if (IsSextOp(IREmit, Op->Lower, Op->Upper)) { IREmit->SetWriteCursor(CodeNode); - OrderedNode *Lower = CurrentIR.GetNode(Op->Lower); - OrderedNode *Divisor = CurrentIR.GetNode(Op->Divisor); - OrderedNode *SDivOp{}; + OrderedNode* Lower = CurrentIR.GetNode(Op->Lower); + OrderedNode* Divisor = CurrentIR.GetNode(Op->Divisor); + OrderedNode* SDivOp {}; if (IROp->Op == OP_LDIV) { SDivOp = IREmit->_Div(OpSize::i64Bit, Lower, Divisor); - } - else { + } else { SDivOp = IREmit->_Rem(OpSize::i64Bit, Lower, Divisor); } IREmit->ReplaceAllUsesWith(CodeNode, SDivOp); Changed = true; } - } - else if (IROp->Op == OP_LUDIV || - IROp->Op == OP_LUREM) { + } else if (IROp->Op == OP_LUDIV || IROp->Op == OP_LUREM) { auto Op = IROp->C(); // Check upper Op to see if it came from a zeroing op // If it does then it we only need a 64bit UDIV if (IsZeroOp(IREmit, Op->Upper)) { IREmit->SetWriteCursor(CodeNode); - OrderedNode *Lower = CurrentIR.GetNode(Op->Lower); - OrderedNode *Divisor = CurrentIR.GetNode(Op->Divisor); - OrderedNode *UDivOp{}; + OrderedNode* Lower = CurrentIR.GetNode(Op->Lower); + OrderedNode* Divisor = CurrentIR.GetNode(Op->Divisor); + OrderedNode* UDivOp {}; if (IROp->Op == OP_LUDIV) { UDivOp = IREmit->_UDiv(OpSize::i64Bit, Lower, Divisor); - } - else { + } else { UDivOp = IREmit->_URem(OpSize::i64Bit, Lower, Divisor); } IREmit->ReplaceAllUsesWith(CodeNode, UDivOp); @@ -112,4 +107,4 @@ bool LongDivideEliminationPass::Run(IREmitter *IREmit) { fextl::unique_ptr CreateLongDivideEliminationPass() { return fextl::make_unique(); } -} +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/IR/Passes/RAValidation.cpp b/FEXCore/Source/Interface/IR/Passes/RAValidation.cpp index f506d6c619..e50316900b 100644 --- a/FEXCore/Source/Interface/IR/Passes/RAValidation.cpp +++ b/FEXCore/Source/Interface/IR/Passes/RAValidation.cpp @@ -21,11 +21,11 @@ namespace FEXCore::IR::Validation { // Hold the mapping of physical registers to the SSA id it holds at any given point in the IR struct RegState { - static constexpr IR::NodeID UninitializedValue{0}; - static constexpr IR::NodeID InvalidReg {0xffff'ffff}; - static constexpr IR::NodeID CorruptedPair {0xffff'fffe}; - static constexpr IR::NodeID ClobberedValue {0xffff'fffd}; - static constexpr IR::NodeID StaticAssigned {0xffff'ff00}; + static constexpr IR::NodeID UninitializedValue {0}; + static constexpr IR::NodeID InvalidReg {0xffff'ffff}; + static constexpr IR::NodeID CorruptedPair {0xffff'fffe}; + static constexpr IR::NodeID ClobberedValue {0xffff'fffd}; + static constexpr IR::NodeID StaticAssigned {0xffff'ff00}; // This class makes some assumptions about how the host registers are arranged and mapped to virtual registers: // 1. There will be less than 32 GPRs and 32 FPRs @@ -42,24 +42,20 @@ struct RegState { // PhysicalRegisters aren't fully mapped until assembly emission // We need to apply a generic mapping here to catch any aliasing switch (Reg.Class) { - case GPRClass: - GPRs[Reg.Reg] = ssa; - return true; + case GPRClass: GPRs[Reg.Reg] = ssa; return true; case GPRFixedClass: // On arm64, there are 16 Fixed and 9 normal GPRsFixed[Reg.Reg] = ssa; return true; - case FPRClass: - FPRs[Reg.Reg] = ssa; - return true; + case FPRClass: FPRs[Reg.Reg] = ssa; return true; case FPRFixedClass: // On arm64, there are 16 Fixed and 12 normal FPRsFixed[Reg.Reg] = ssa; return true; case GPRPairClass: // Alias paired registers onto both - GPRs[Reg.Reg*2] = ssa; - GPRs[Reg.Reg*2 + 1] = ssa; + GPRs[Reg.Reg * 2] = ssa; + GPRs[Reg.Reg * 2 + 1] = ssa; return true; } return false; @@ -69,18 +65,14 @@ struct RegState { // Or an error value there isn't a (sane) SSA id IR::NodeID Get(PhysicalRegister Reg) const { switch (Reg.Class) { - case GPRClass: - return GPRs[Reg.Reg]; - case GPRFixedClass: - return GPRsFixed[Reg.Reg]; - case FPRClass: - return FPRs[Reg.Reg]; - case FPRFixedClass: - return FPRsFixed[Reg.Reg]; + case GPRClass: return GPRs[Reg.Reg]; + case GPRFixedClass: return GPRsFixed[Reg.Reg]; + case FPRClass: return FPRs[Reg.Reg]; + case FPRFixedClass: return FPRsFixed[Reg.Reg]; case GPRPairClass: // Make sure both halves of the Pair contain the same SSA - if (GPRs[Reg.Reg*2] == GPRs[Reg.Reg*2 + 1]) { - return GPRs[Reg.Reg*2]; + if (GPRs[Reg.Reg * 2] == GPRs[Reg.Reg * 2 + 1]) { + return GPRs[Reg.Reg * 2]; } return CorruptedPair; } @@ -148,25 +140,25 @@ struct RegState { // Mark them as Clobbered. // Useful for backwards edges, where using an SSA from before the void Filter(IR::NodeID MaxSSA) { - for (auto &gpr : GPRs) { + for (auto& gpr : GPRs) { if (gpr > MaxSSA) { gpr = ClobberedValue; } } - for (auto &gpr : GPRsFixed) { + for (auto& gpr : GPRsFixed) { if (gpr > MaxSSA) { gpr = ClobberedValue; } } - for (auto &fpr : FPRs) { + for (auto& fpr : FPRs) { if (fpr > MaxSSA) { fpr = ClobberedValue; } } - for (auto &fpr : FPRsFixed) { + for (auto& fpr : FPRsFixed) { if (fpr > MaxSSA) { fpr = ClobberedValue; } @@ -189,13 +181,13 @@ struct RegState { fextl::unordered_map Spills; public: - uint32_t Version{}; // Used to force regeneration of RegStates after following backward edges + uint32_t Version {}; // Used to force regeneration of RegStates after following backward edges }; class RAValidation final : public FEXCore::IR::Pass { public: ~RAValidation() {} - bool Run(IREmitter *IREmit) override; + bool Run(IREmitter* IREmit) override; private: // Holds the calculated RegState at the exit of each block @@ -206,8 +198,10 @@ class RAValidation final : public FEXCore::IR::Pass { }; -bool RAValidation::Run(IREmitter *IREmit) { - if (!Manager->HasPass("RA")) return false; +bool RAValidation::Run(IREmitter* IREmit) { + if (!Manager->HasPass("RA")) { + return false; + } FEXCORE_PROFILE_SCOPED("PassManager::RAValidation"); @@ -230,8 +224,7 @@ bool RAValidation::Run(IREmitter *IREmit) { auto CurrentIR = IREmit->ViewIR(); uint32_t CurrentVersion = 1; // Incremented every backwards edge - while (!BlocksToVisit.empty()) - { + while (!BlocksToVisit.empty()) { auto BlockNode = BlocksToVisit.front(); const auto BlockID = CurrentIR.GetID(BlockNode); auto& BlockInfo = OffsetToBlockMap[BlockID]; @@ -276,7 +269,7 @@ bool RAValidation::Run(IREmitter *IREmit) { auto& BlockRegState = BlockExitState.try_emplace(BlockID).first->second; bool EmptyRegState = true; - auto Intersect = [&] (RegState& Other) { + auto Intersect = [&](RegState& Other) { if (EmptyRegState) { BlockRegState = Other; EmptyRegState = false; @@ -307,8 +300,9 @@ bool RAValidation::Run(IREmitter *IREmit) { const auto ArgID = Arg.ID(); const auto PhyReg = RAData->GetNodeRegister(ArgID); - if (PhyReg.IsInvalid()) + if (PhyReg.IsInvalid()) { return; + } auto CurrentSSAAtReg = BlockRegState.Get(PhyReg); if (CurrentSSAAtReg == RegState::InvalidReg) { @@ -317,30 +311,28 @@ bool RAValidation::Run(IREmitter *IREmit) { } else if (CurrentSSAAtReg == RegState::CorruptedPair) { HadError |= true; - auto Lower = BlockRegState.Get(PhysicalRegister(GPRClass, uint8_t(PhyReg.Reg*2) + 1)); - auto Upper = BlockRegState.Get(PhysicalRegister(GPRClass, PhyReg.Reg*2 + 1)); + auto Lower = BlockRegState.Get(PhysicalRegister(GPRClass, uint8_t(PhyReg.Reg * 2) + 1)); + auto Upper = BlockRegState.Get(PhysicalRegister(GPRClass, PhyReg.Reg * 2 + 1)); - Errors << fextl::fmt::format("%{}: Arg[{}] expects paired reg{} to contain %{}, but it actually contains {{%{}, %{}}}\n", - ID, i, PhyReg.Reg, ArgID, Lower, Upper); + Errors << fextl::fmt::format("%{}: Arg[{}] expects paired reg{} to contain %{}, but it actually contains {{%{}, %{}}}\n", ID, i, + PhyReg.Reg, ArgID, Lower, Upper); } else if (CurrentSSAAtReg == RegState::UninitializedValue) { HadError |= true; - Errors << fextl::fmt::format("%{}: Arg[{}] expects reg{} to contain %{}, but it is uninitialized\n", - ID, i, PhyReg.Reg, ArgID); + Errors << fextl::fmt::format("%{}: Arg[{}] expects reg{} to contain %{}, but it is uninitialized\n", ID, i, PhyReg.Reg, ArgID); } else if (CurrentSSAAtReg == RegState::ClobberedValue) { HadError |= true; - Errors << fextl::fmt::format("%{}: Arg[{}] expects reg{} to contain %{}, but contents vary depending on control flow\n", - ID, i, PhyReg.Reg, ArgID); + Errors << fextl::fmt::format("%{}: Arg[{}] expects reg{} to contain %{}, but contents vary depending on control flow\n", ID, i, + PhyReg.Reg, ArgID); } else if (CurrentSSAAtReg != ArgID) { HadError |= true; - Errors << fextl::fmt::format("%{}: Arg[{}] expects reg{} to contain %{}, but it actually contains %{}\n", - ID, i, PhyReg.Reg, ArgID, CurrentSSAAtReg); + Errors << fextl::fmt::format("%{}: Arg[{}] expects reg{} to contain %{}, but it actually contains %{}\n", ID, i, PhyReg.Reg, + ArgID, CurrentSSAAtReg); } }; - switch (IROp->Op) - { + switch (IROp->Op) { case OP_SPILLREGISTER: { auto SpillRegister = IROp->C(); CheckArg(0, SpillRegister->Value); @@ -360,15 +352,15 @@ bool RAValidation::Run(IREmitter *IREmit) { if (Value == RegState::UninitializedValue) { HadError |= true; Errors << fextl::fmt::format("%{}: FillRegister expected %{} in Slot {}, but was undefined in at least one control flow path\n", - ID, ExpectedValue, FillRegister->Slot); + ID, ExpectedValue, FillRegister->Slot); } else if (Value == RegState::ClobberedValue) { HadError |= true; - Errors << fextl::fmt::format("%{}: FillRegister expected %{} in Slot {}, but contents vary depending on control flow\n", - ID, ExpectedValue, FillRegister->Slot); + Errors << fextl::fmt::format("%{}: FillRegister expected %{} in Slot {}, but contents vary depending on control flow\n", ID, + ExpectedValue, FillRegister->Slot); } else if (Value != ExpectedValue) { HadError |= true; - Errors << fextl::fmt::format("%{}: FillRegister expected %{} in Slot {}, but it actually contains %{}\n", - ID, ExpectedValue, FillRegister->Slot, Value); + Errors << fextl::fmt::format("%{}: FillRegister expected %{} in Slot {}, but it actually contains %{}\n", ID, ExpectedValue, + FillRegister->Slot, Value); } break; } @@ -445,7 +437,6 @@ bool RAValidation::Run(IREmitter *IREmit) { Errors << "(Backwards): "; } Errors << fextl::fmt::format("Block {} ", SuccessorID); - } Errors << "\n\n"; @@ -453,7 +444,6 @@ bool RAValidation::Run(IREmitter *IREmit) { break; } - } if (HadError) { @@ -472,4 +462,4 @@ bool RAValidation::Run(IREmitter *IREmit) { fextl::unique_ptr CreateRAValidation() { return fextl::make_unique(); } -} +} // namespace FEXCore::IR::Validation diff --git a/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp b/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp index 1e222351c0..de68849c96 100644 --- a/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp +++ b/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp @@ -54,208 +54,197 @@ struct FlagInfo { class DeadFlagCalculationEliminination final : public FEXCore::IR::Pass { public: - bool Run(IREmitter *IREmit) override; + bool Run(IREmitter* IREmit) override; private: - FlagInfo Classify(IROp_Header *Node); + FlagInfo Classify(IROp_Header* Node); unsigned FlagForOffset(unsigned Offset); unsigned FlagsForCondClassType(CondClassType Cond); }; -unsigned -DeadFlagCalculationEliminination::FlagForOffset(unsigned Offset) -{ - return Offset == offsetof(FEXCore::Core::CPUState, pf_raw) ? FLAG_P : - Offset == offsetof(FEXCore::Core::CPUState, af_raw) ? FLAG_A : - 0; +unsigned DeadFlagCalculationEliminination::FlagForOffset(unsigned Offset) { + return Offset == offsetof(FEXCore::Core::CPUState, pf_raw) ? FLAG_P : Offset == offsetof(FEXCore::Core::CPUState, af_raw) ? FLAG_A : 0; }; -unsigned -DeadFlagCalculationEliminination::FlagsForCondClassType(CondClassType Cond) -{ +unsigned DeadFlagCalculationEliminination::FlagsForCondClassType(CondClassType Cond) { switch (Cond) { - case COND_AL: - return 0; + case COND_AL: return 0; case COND_MI: - case COND_PL: - return FLAG_N; + case COND_PL: return FLAG_N; case COND_EQ: - case COND_NEQ: - return FLAG_Z; + case COND_NEQ: return FLAG_Z; case COND_UGE: - case COND_ULT: - return FLAG_C; + case COND_ULT: return FLAG_C; case COND_VS: case COND_VC: case COND_FU: - case COND_FNU: - return FLAG_V; + case COND_FNU: return FLAG_V; case COND_UGT: - case COND_ULE: - return FLAG_Z | FLAG_C; + case COND_ULE: return FLAG_Z | FLAG_C; case COND_SGE: case COND_SLT: case COND_FLU: - case COND_FGE: - return FLAG_N | FLAG_V; + case COND_FGE: return FLAG_N | FLAG_V; case COND_SGT: case COND_SLE: case COND_FLEU: - case COND_FGT: - return FLAG_N | FLAG_Z | FLAG_V; + case COND_FGT: return FLAG_N | FLAG_Z | FLAG_V; - default: - LOGMAN_THROW_AA_FMT(false, "unknown cond class type"); - return FLAG_NZCV; + default: LOGMAN_THROW_AA_FMT(false, "unknown cond class type"); return FLAG_NZCV; } } -FlagInfo -DeadFlagCalculationEliminination::Classify(IROp_Header *IROp) -{ +FlagInfo DeadFlagCalculationEliminination::Classify(IROp_Header* IROp) { switch (IROp->Op) { - case OP_ANDWITHFLAGS: - return { - .Write = FLAG_NZCV, - .CanReplace = true, - .Replacement = OP_AND, - }; - - case OP_ADDNZCV: - case OP_SUBNZCV: - case OP_TESTNZ: - case OP_FCMP: - case OP_STORENZCV: - return { - .Write = FLAG_NZCV, - .CanEliminate = true, - }; - - case OP_AXFLAG: - // Per the Arm spec, axflag reads Z/V/C but not N. It writes all flags. - return { - .Read = FLAG_ZCV, - .Write = FLAG_NZCV, - .CanEliminate = true, - }; - - case OP_CARRYINVERT: - return { - .Read = FLAG_C, - .Write = FLAG_C, - .CanEliminate = true, - }; - - case OP_LOADNZCV: - return {.Read = FLAG_NZCV}; - - case OP_ADCNZCV: - case OP_SBBNZCV: - return { - .Read = FLAG_C, - .Write = FLAG_NZCV, - .CanEliminate = true, - }; - - case OP_NZCVSELECT: { - auto Op = IROp->CW(); - return {.Read = FlagsForCondClassType(Op->Cond)}; - } - - case OP_CONDJUMP: { - auto Op = IROp->CW(); - if (!Op->FromNZCV) - break; + case OP_ANDWITHFLAGS: + return { + .Write = FLAG_NZCV, + .CanReplace = true, + .Replacement = OP_AND, + }; + + case OP_ADDNZCV: + case OP_SUBNZCV: + case OP_TESTNZ: + case OP_FCMP: + case OP_STORENZCV: + return { + .Write = FLAG_NZCV, + .CanEliminate = true, + }; + + case OP_AXFLAG: + // Per the Arm spec, axflag reads Z/V/C but not N. It writes all flags. + return { + .Read = FLAG_ZCV, + .Write = FLAG_NZCV, + .CanEliminate = true, + }; + + case OP_CARRYINVERT: + return { + .Read = FLAG_C, + .Write = FLAG_C, + .CanEliminate = true, + }; + + case OP_LOADNZCV: return {.Read = FLAG_NZCV}; + + case OP_ADCNZCV: + case OP_SBBNZCV: + return { + .Read = FLAG_C, + .Write = FLAG_NZCV, + .CanEliminate = true, + }; + + case OP_NZCVSELECT: { + auto Op = IROp->CW(); + return {.Read = FlagsForCondClassType(Op->Cond)}; + } - return {.Read = FlagsForCondClassType(Op->Cond)}; + case OP_CONDJUMP: { + auto Op = IROp->CW(); + if (!Op->FromNZCV) { + break; } - case OP_CONDADDNZCV: { - auto Op = IROp->CW(); - return { - .Read = FlagsForCondClassType(Op->Cond), - .Write = FLAG_NZCV, - .CanEliminate = true, - }; - } + return {.Read = FlagsForCondClassType(Op->Cond)}; + } - case OP_RMIFNZCV: { - auto Op = IROp->CW(); + case OP_CONDADDNZCV: { + auto Op = IROp->CW(); + return { + .Read = FlagsForCondClassType(Op->Cond), + .Write = FLAG_NZCV, + .CanEliminate = true, + }; + } - static_assert(FLAG_N == (1 << 3), "rmif mask lines up with our bits"); - static_assert(FLAG_Z == (1 << 2), "rmif mask lines up with our bits"); - static_assert(FLAG_C == (1 << 1), "rmif mask lines up with our bits"); - static_assert(FLAG_V == (1 << 0), "rmif mask lines up with our bits"); + case OP_RMIFNZCV: { + auto Op = IROp->CW(); - return { - .Write = Op->Mask, - .CanEliminate = true, - }; - } + static_assert(FLAG_N == (1 << 3), "rmif mask lines up with our bits"); + static_assert(FLAG_Z == (1 << 2), "rmif mask lines up with our bits"); + static_assert(FLAG_C == (1 << 1), "rmif mask lines up with our bits"); + static_assert(FLAG_V == (1 << 0), "rmif mask lines up with our bits"); - case OP_INVALIDATEFLAGS: { - auto Op = IROp->CW(); - unsigned Flags = 0; + return { + .Write = Op->Mask, + .CanEliminate = true, + }; + } - // TODO: Make this translation less silly - if (Op->Flags & (1u << X86State::RFLAG_SF_RAW_LOC)) - Flags |= FLAG_N; + case OP_INVALIDATEFLAGS: { + auto Op = IROp->CW(); + unsigned Flags = 0; - if (Op->Flags & (1u << X86State::RFLAG_ZF_RAW_LOC)) - Flags |= FLAG_Z; + // TODO: Make this translation less silly + if (Op->Flags & (1u << X86State::RFLAG_SF_RAW_LOC)) { + Flags |= FLAG_N; + } - if (Op->Flags & (1u << X86State::RFLAG_CF_RAW_LOC)) - Flags |= FLAG_C; + if (Op->Flags & (1u << X86State::RFLAG_ZF_RAW_LOC)) { + Flags |= FLAG_Z; + } - if (Op->Flags & (1u << X86State::RFLAG_OF_RAW_LOC)) - Flags |= FLAG_V; + if (Op->Flags & (1u << X86State::RFLAG_CF_RAW_LOC)) { + Flags |= FLAG_C; + } - if (Op->Flags & (1u << X86State::RFLAG_PF_RAW_LOC)) - Flags |= FLAG_P; + if (Op->Flags & (1u << X86State::RFLAG_OF_RAW_LOC)) { + Flags |= FLAG_V; + } - if (Op->Flags & (1u << X86State::RFLAG_AF_RAW_LOC)) - Flags |= FLAG_A; + if (Op->Flags & (1u << X86State::RFLAG_PF_RAW_LOC)) { + Flags |= FLAG_P; + } - // The mental model of InvalidateFlags is writing undefined values to all - // of the selected flags, allowing the write-after-write optimizations to - // optimize invalidate-after-write for free. - return { - .Write = Flags, - .CanEliminate = true, - }; + if (Op->Flags & (1u << X86State::RFLAG_AF_RAW_LOC)) { + Flags |= FLAG_A; } - case OP_LOADREGISTER: { - auto Op = IROp->CW(); - if (Op->Class != GPRClass || Op->StaticClass != GPRFixedClass) - break; + // The mental model of InvalidateFlags is writing undefined values to all + // of the selected flags, allowing the write-after-write optimizations to + // optimize invalidate-after-write for free. + return { + .Write = Flags, + .CanEliminate = true, + }; + } - return {.Read = FlagForOffset(Op->Offset)}; + case OP_LOADREGISTER: { + auto Op = IROp->CW(); + if (Op->Class != GPRClass || Op->StaticClass != GPRFixedClass) { + break; } - case OP_STOREREGISTER: { - auto Op = IROp->CW(); - if (Op->Class != GPRClass || Op->StaticClass != GPRFixedClass) - break; - - LOGMAN_THROW_A_FMT(!Op->IsPrewrite, "PF/AF writes are fixed-form"); - unsigned Flag = FlagForOffset(Op->Offset); + return {.Read = FlagForOffset(Op->Offset)}; + } - return { - .Write = Flag, - .CanEliminate = Flag != 0, - }; + case OP_STOREREGISTER: { + auto Op = IROp->CW(); + if (Op->Class != GPRClass || Op->StaticClass != GPRFixedClass) { + break; } - default: - break; + LOGMAN_THROW_A_FMT(!Op->IsPrewrite, "PF/AF writes are fixed-form"); + unsigned Flag = FlagForOffset(Op->Offset); + + return { + .Write = Flag, + .CanEliminate = Flag != 0, + }; + } + + default: break; } return {.Trivial = true}; @@ -264,7 +253,7 @@ DeadFlagCalculationEliminination::Classify(IROp_Header *IROp) /** * @brief This pass removes flag calculations that will otherwise be unused INSIDE of that block */ -bool DeadFlagCalculationEliminination::Run(IREmitter *IREmit) { +bool DeadFlagCalculationEliminination::Run(IREmitter* IREmit) { FEXCORE_PROFILE_SCOPED("PassManager::DFE"); bool Changed = false; @@ -320,8 +309,9 @@ bool DeadFlagCalculationEliminination::Run(IREmitter *IREmit) { // If we eliminated the instruction, we eliminate its read too. This // check is required to ensure the pass converges locally in a single // iteration. - if (!Eliminated) + if (!Eliminated) { FlagsRead |= Info.Read; + } } // Iterate in reverse @@ -339,4 +329,4 @@ fextl::unique_ptr CreateDeadFlagCalculationEliminination() { return fextl::make_unique(); } -} +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp b/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp index 5e49432a13..acf551cdcd 100644 --- a/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp +++ b/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp @@ -58,8 +58,8 @@ namespace { struct RegisterNode { struct VolatileHeader { - IR::NodeID BlockID{UINT32_MAX}; - uint32_t SpillSlot{UINT32_MAX}; + IR::NodeID BlockID {UINT32_MAX}; + uint32_t SpillSlot {UINT32_MAX}; uint64_t Padding; }; @@ -73,40 +73,40 @@ namespace { struct RegisterSet { fextl::vector Classes; uint32_t ClassCount; - uint32_t Conflicts[ 8 * 8 * 32 * 32]; + uint32_t Conflicts[8 * 8 * 32 * 32]; }; struct LiveRange { - IR::NodeID Begin{UINT32_MAX}; - IR::NodeID End{UINT32_MAX}; - uint32_t RematCost{0}; - IR::NodeID PreWritten{0}; - PhysicalRegister PrefferedRegister{PhysicalRegister::Invalid()}; - bool Written{false}; - bool Global{false}; + IR::NodeID Begin {UINT32_MAX}; + IR::NodeID End {UINT32_MAX}; + uint32_t RematCost {0}; + IR::NodeID PreWritten {0}; + PhysicalRegister PrefferedRegister {PhysicalRegister::Invalid()}; + bool Written {false}; + bool Global {false}; }; struct SpillStackUnit { IR::NodeID Node; IR::RegisterClassType Class; LiveRange SpillRange; - IR::OrderedNode *SpilledNode; + IR::OrderedNode* SpilledNode; }; struct RegisterGraph : public FEXCore::Allocator::FEXAllocOperators { IR::RegisterAllocationData::UniquePtr AllocData; RegisterSet Set; - fextl::vector Nodes{}; - uint32_t NodeCount{}; + fextl::vector Nodes {}; + uint32_t NodeCount {}; fextl::vector SpillStack; fextl::unordered_map> BlockPredecessors; fextl::unordered_map> VisitedNodePredecessors; }; - void ResetRegisterGraph(RegisterGraph *Graph, uint64_t NodeCount); + void ResetRegisterGraph(RegisterGraph* Graph, uint64_t NodeCount); - RegisterGraph *AllocateRegisterGraph(uint32_t ClassCount) { - RegisterGraph *Graph = new RegisterGraph{}; + RegisterGraph* AllocateRegisterGraph(uint32_t ClassCount) { + RegisterGraph* Graph = new RegisterGraph {}; // Allocate the register set Graph->Set.ClassCount = ClassCount; @@ -118,24 +118,25 @@ namespace { } - void AllocatePhysicalRegisters(RegisterGraph *Graph, FEXCore::IR::RegisterClassType Class, uint32_t Count) { + void AllocatePhysicalRegisters(RegisterGraph* Graph, FEXCore::IR::RegisterClassType Class, uint32_t Count) { Graph->Set.Classes[Class].CountMask = (1 << Count) - 1; Graph->Set.Classes[Class].PhysicalCount = Count; } - void SetConflict(RegisterGraph *Graph, PhysicalRegister RegAndClass, PhysicalRegister ConflictRegAndClass) { + void SetConflict(RegisterGraph* Graph, PhysicalRegister RegAndClass, PhysicalRegister ConflictRegAndClass) { uint32_t Index = (ConflictRegAndClass.Class << 8) | RegAndClass.Raw; Graph->Set.Conflicts[Index] |= 1 << ConflictRegAndClass.Reg; } - uint32_t GetConflicts(RegisterGraph *Graph, PhysicalRegister RegAndClass, FEXCore::IR::RegisterClassType ConflictClass) { + uint32_t GetConflicts(RegisterGraph* Graph, PhysicalRegister RegAndClass, FEXCore::IR::RegisterClassType ConflictClass) { uint32_t Index = (ConflictClass.Val << 8) | RegAndClass.Raw; return Graph->Set.Conflicts[Index]; } - void VirtualAddRegisterConflict(RegisterGraph *Graph, FEXCore::IR::RegisterClassType ClassConflict, uint32_t RegConflict, FEXCore::IR::RegisterClassType Class, uint32_t Reg) { + void VirtualAddRegisterConflict(RegisterGraph* Graph, FEXCore::IR::RegisterClassType ClassConflict, uint32_t RegConflict, + FEXCore::IR::RegisterClassType Class, uint32_t Reg) { auto RegAndClass = PhysicalRegister(Class, Reg); auto RegAndClassConflict = PhysicalRegister(ClassConflict, RegConflict); @@ -145,11 +146,11 @@ namespace { SetConflict(Graph, RegAndClassConflict, RegAndClass); } - void FreeRegisterGraph(RegisterGraph *Graph) { + void FreeRegisterGraph(RegisterGraph* Graph) { delete Graph; } - void ResetRegisterGraph(RegisterGraph *Graph, uint64_t NodeCount) { + void ResetRegisterGraph(RegisterGraph* Graph, uint64_t NodeCount) { NodeCount = FEXCore::AlignUp(NodeCount, REGISTER_NODES_PER_PAGE); // Clear to free the Bucketlists which have unique_ptrs @@ -162,46 +163,47 @@ namespace { Graph->NodeCount = NodeCount; } - void SetNodeClass(RegisterGraph *Graph, IR::NodeID Node, FEXCore::IR::RegisterClassType Class) { + void SetNodeClass(RegisterGraph* Graph, IR::NodeID Node, FEXCore::IR::RegisterClassType Class) { Graph->AllocData->Map[Node.Value].Class = Class.Val; } - FEXCore::IR::RegisterClassType GetRegClassFromNode(FEXCore::IR::IRListView *IR, FEXCore::IR::IROp_Header *IROp) { + FEXCore::IR::RegisterClassType GetRegClassFromNode(FEXCore::IR::IRListView* IR, FEXCore::IR::IROp_Header* IROp) { using namespace FEXCore; FEXCore::IR::RegisterClassType Class = IR::GetRegClass(IROp->Op); - if (Class != FEXCore::IR::ComplexClass) + if (Class != FEXCore::IR::ComplexClass) { return Class; + } // Complex register class handling switch (IROp->Op) { - case IR::OP_LOADCONTEXT: { - auto Op = IROp->C(); - return Op->Class; - break; - } - case IR::OP_LOADREGISTER: { - auto Op = IROp->C(); - return Op->Class; - break; - } - case IR::OP_LOADCONTEXTINDEXED: { - auto Op = IROp->C(); - return Op->Class; - break; - } - case IR::OP_LOADMEM: - case IR::OP_LOADMEMTSO: { - auto Op = IROp->C(); - return Op->Class; - break; - } - case IR::OP_FILLREGISTER: { - auto Op = IROp->C(); - return Op->Class; - break; - } - default: break; + case IR::OP_LOADCONTEXT: { + auto Op = IROp->C(); + return Op->Class; + break; + } + case IR::OP_LOADREGISTER: { + auto Op = IROp->C(); + return Op->Class; + break; + } + case IR::OP_LOADCONTEXTINDEXED: { + auto Op = IROp->C(); + return Op->Class; + break; + } + case IR::OP_LOADMEM: + case IR::OP_LOADMEMTSO: { + auto Op = IROp->C(); + return Op->Class; + break; + } + case IR::OP_FILLREGISTER: { + auto Op = IROp->C(); + return Op->Class; + break; + } + default: break; } // Unreachable @@ -209,576 +211,557 @@ namespace { }; // Walk the IR and set the node classes - void FindNodeClasses(RegisterGraph *Graph, FEXCore::IR::IRListView *IR) { + void FindNodeClasses(RegisterGraph* Graph, FEXCore::IR::IRListView* IR) { for (auto [CodeNode, IROp] : IR->GetAllCode()) { // If the destination hasn't yet been set then set it now if (GetHasDest(IROp->Op)) { const auto ID = IR->GetID(CodeNode); Graph->AllocData->Map[ID.Value] = PhysicalRegister(GetRegClassFromNode(IR, IROp), INVALID_REG); } else { - //Graph->AllocData->Map[IR->GetID(CodeNode)] = PhysicalRegister::Invalid(); + // Graph->AllocData->Map[IR->GetID(CodeNode)] = PhysicalRegister::Invalid(); } } } } // Anonymous namespace - class ConstrainedRAPass final : public RegisterAllocationPass { - public: - ConstrainedRAPass(FEXCore::IR::Pass* _CompactionPass, bool SupportsAVX); - ~ConstrainedRAPass(); - bool Run(IREmitter *IREmit) override; +class ConstrainedRAPass final : public RegisterAllocationPass { +public: + ConstrainedRAPass(FEXCore::IR::Pass* _CompactionPass, bool SupportsAVX); + ~ConstrainedRAPass(); + bool Run(IREmitter* IREmit) override; - void AllocateRegisterSet(uint32_t ClassCount) override; - void AddRegisters(FEXCore::IR::RegisterClassType Class, uint32_t RegisterCount) override; - void AddRegisterConflict(FEXCore::IR::RegisterClassType ClassConflict, uint32_t RegConflict, FEXCore::IR::RegisterClassType Class, uint32_t Reg) override; + void AllocateRegisterSet(uint32_t ClassCount) override; + void AddRegisters(FEXCore::IR::RegisterClassType Class, uint32_t RegisterCount) override; + void AddRegisterConflict(FEXCore::IR::RegisterClassType ClassConflict, uint32_t RegConflict, FEXCore::IR::RegisterClassType Class, + uint32_t Reg) override; - /** - * @brief Returns the register and class encoded together - * Top 32bits is the class, lower 32bits is the register - */ - RegisterAllocationData* GetAllocationData() override; - RegisterAllocationData::UniquePtr PullAllocationData() override; + /** + * @brief Returns the register and class encoded together + * Top 32bits is the class, lower 32bits is the register + */ + RegisterAllocationData* GetAllocationData() override; + RegisterAllocationData::UniquePtr PullAllocationData() override; - private: - using BlockInterferences = fextl::vector; +private: + using BlockInterferences = fextl::vector; - IR::NodeID SpillPointId; + IR::NodeID SpillPointId; - fextl::vector> SpanStart; - fextl::vector> SpanEnd; + fextl::vector> SpanStart; + fextl::vector> SpanEnd; - RegisterGraph *Graph; - FEXCore::IR::Pass* CompactionPass; - bool SupportsAVX; + RegisterGraph* Graph; + FEXCore::IR::Pass* CompactionPass; + bool SupportsAVX; - fextl::vector LiveRanges; + fextl::vector LiveRanges; - fextl::unordered_map LocalBlockInterferences; - BlockInterferences GlobalBlockInterferences; + fextl::unordered_map LocalBlockInterferences; + BlockInterferences GlobalBlockInterferences; - [[nodiscard]] static constexpr uint32_t InfoMake(uint32_t id, uint32_t Class) { - return id | (Class << 24); - } - [[nodiscard]] static constexpr uint32_t InfoIDClass(uint32_t info) { - return info & 0xffff'ffff; - } - [[nodiscard]] static constexpr IR::NodeID InfoID(uint32_t info) { - return IR::NodeID{info & 0xff'ffff}; - } - [[nodiscard]] static constexpr uint32_t InfoClass(uint32_t info) { - return info & 0xff00'0000; - } + [[nodiscard]] + static constexpr uint32_t InfoMake(uint32_t id, uint32_t Class) { + return id | (Class << 24); + } + [[nodiscard]] + static constexpr uint32_t InfoIDClass(uint32_t info) { + return info & 0xffff'ffff; + } + [[nodiscard]] + static constexpr IR::NodeID InfoID(uint32_t info) { + return IR::NodeID {info & 0xff'ffff}; + } + [[nodiscard]] + static constexpr uint32_t InfoClass(uint32_t info) { + return info & 0xff00'0000; + } - void SpillOne(FEXCore::IR::IREmitter *IREmit); - - void CalculateLiveRange(FEXCore::IR::IRListView *IR); - void OptimizeStaticRegisters(FEXCore::IR::IRListView *IR); - void CalculateBlockInterferences(FEXCore::IR::IRListView *IR); - void CalculateBlockNodeInterference(FEXCore::IR::IRListView *IR); - void CalculateNodeInterference(FEXCore::IR::IRListView *IR); - void AllocateVirtualRegisters(); - void CalculatePredecessors(FEXCore::IR::IRListView *IR); - void RecursiveLiveRangeExpansion(FEXCore::IR::IRListView *IR, - IR::NodeID Node, IR::NodeID DefiningBlockID, - LiveRange *LiveRange, - const fextl::unordered_set &Predecessors, - fextl::unordered_set &VisitedPredecessors); - - FEXCore::IR::AllNodesIterator FindFirstUse(FEXCore::IR::IREmitter *IREmit, FEXCore::IR::OrderedNode* Node, FEXCore::IR::AllNodesIterator Begin, FEXCore::IR::AllNodesIterator End); - FEXCore::IR::AllNodesIterator FindLastUseBefore(FEXCore::IR::IREmitter *IREmit, FEXCore::IR::OrderedNode* Node, FEXCore::IR::AllNodesIterator Begin, FEXCore::IR::AllNodesIterator End); - - std::optional FindNodeToSpill(IREmitter *IREmit, - RegisterNode *RegisterNode, - IR::NodeID CurrentLocation, - LiveRange const *OpLiveRange, - int32_t RematCost = -1); - uint32_t FindSpillSlot(IR::NodeID Node, FEXCore::IR::RegisterClassType RegisterClass); - - bool RunAllocateVirtualRegisters(IREmitter *IREmit); - }; + void SpillOne(FEXCore::IR::IREmitter* IREmit); - ConstrainedRAPass::ConstrainedRAPass(FEXCore::IR::Pass* _CompactionPass, bool _SupportsAVX) - : CompactionPass {_CompactionPass}, SupportsAVX{_SupportsAVX} { - } + void CalculateLiveRange(FEXCore::IR::IRListView* IR); + void OptimizeStaticRegisters(FEXCore::IR::IRListView* IR); + void CalculateBlockInterferences(FEXCore::IR::IRListView* IR); + void CalculateBlockNodeInterference(FEXCore::IR::IRListView* IR); + void CalculateNodeInterference(FEXCore::IR::IRListView* IR); + void AllocateVirtualRegisters(); + void CalculatePredecessors(FEXCore::IR::IRListView* IR); + void RecursiveLiveRangeExpansion(FEXCore::IR::IRListView* IR, IR::NodeID Node, IR::NodeID DefiningBlockID, LiveRange* LiveRange, + const fextl::unordered_set& Predecessors, fextl::unordered_set& VisitedPredecessors); - ConstrainedRAPass::~ConstrainedRAPass() { - FreeRegisterGraph(Graph); - } + FEXCore::IR::AllNodesIterator FindFirstUse(FEXCore::IR::IREmitter* IREmit, FEXCore::IR::OrderedNode* Node, + FEXCore::IR::AllNodesIterator Begin, FEXCore::IR::AllNodesIterator End); + FEXCore::IR::AllNodesIterator FindLastUseBefore(FEXCore::IR::IREmitter* IREmit, FEXCore::IR::OrderedNode* Node, + FEXCore::IR::AllNodesIterator Begin, FEXCore::IR::AllNodesIterator End); - void ConstrainedRAPass::AllocateRegisterSet(uint32_t ClassCount) { - LOGMAN_THROW_AA_FMT(ClassCount <= INVALID_CLASS, "Up to {} classes supported", INVALID_CLASS); + std::optional FindNodeToSpill(IREmitter* IREmit, RegisterNode* RegisterNode, IR::NodeID CurrentLocation, + const LiveRange* OpLiveRange, int32_t RematCost = -1); + uint32_t FindSpillSlot(IR::NodeID Node, FEXCore::IR::RegisterClassType RegisterClass); - Graph = AllocateRegisterGraph(ClassCount); + bool RunAllocateVirtualRegisters(IREmitter* IREmit); +}; - // Add identity conflicts - for (uint32_t Class = 0; Class < INVALID_CLASS; Class++) { - for (uint32_t Reg = 0; Reg < INVALID_REG; Reg++) { - AddRegisterConflict(RegisterClassType{Class}, Reg, RegisterClassType{Class}, Reg); - } +ConstrainedRAPass::ConstrainedRAPass(FEXCore::IR::Pass* _CompactionPass, bool _SupportsAVX) + : CompactionPass {_CompactionPass} + , SupportsAVX {_SupportsAVX} {} + +ConstrainedRAPass::~ConstrainedRAPass() { + FreeRegisterGraph(Graph); +} + +void ConstrainedRAPass::AllocateRegisterSet(uint32_t ClassCount) { + LOGMAN_THROW_AA_FMT(ClassCount <= INVALID_CLASS, "Up to {} classes supported", INVALID_CLASS); + + Graph = AllocateRegisterGraph(ClassCount); + + // Add identity conflicts + for (uint32_t Class = 0; Class < INVALID_CLASS; Class++) { + for (uint32_t Reg = 0; Reg < INVALID_REG; Reg++) { + AddRegisterConflict(RegisterClassType {Class}, Reg, RegisterClassType {Class}, Reg); } } +} - void ConstrainedRAPass::AddRegisters(FEXCore::IR::RegisterClassType Class, uint32_t RegisterCount) { - LOGMAN_THROW_AA_FMT(RegisterCount <= INVALID_REG, "Up to {} regs supported", INVALID_REG); +void ConstrainedRAPass::AddRegisters(FEXCore::IR::RegisterClassType Class, uint32_t RegisterCount) { + LOGMAN_THROW_AA_FMT(RegisterCount <= INVALID_REG, "Up to {} regs supported", INVALID_REG); - AllocatePhysicalRegisters(Graph, Class, RegisterCount); - } + AllocatePhysicalRegisters(Graph, Class, RegisterCount); +} - void ConstrainedRAPass::AddRegisterConflict(FEXCore::IR::RegisterClassType ClassConflict, uint32_t RegConflict, FEXCore::IR::RegisterClassType Class, uint32_t Reg) { - VirtualAddRegisterConflict(Graph, ClassConflict, RegConflict, Class, Reg); - } +void ConstrainedRAPass::AddRegisterConflict(FEXCore::IR::RegisterClassType ClassConflict, uint32_t RegConflict, + FEXCore::IR::RegisterClassType Class, uint32_t Reg) { + VirtualAddRegisterConflict(Graph, ClassConflict, RegConflict, Class, Reg); +} - RegisterAllocationData* ConstrainedRAPass::GetAllocationData() { - return Graph->AllocData.get(); - } +RegisterAllocationData* ConstrainedRAPass::GetAllocationData() { + return Graph->AllocData.get(); +} - RegisterAllocationData::UniquePtr ConstrainedRAPass::PullAllocationData() { - return std::move(Graph->AllocData); - } +RegisterAllocationData::UniquePtr ConstrainedRAPass::PullAllocationData() { + return std::move(Graph->AllocData); +} - void ConstrainedRAPass::RecursiveLiveRangeExpansion(IR::IRListView *IR, - IR::NodeID Node, IR::NodeID DefiningBlockID, - LiveRange *LiveRange, - const fextl::unordered_set &Predecessors, - fextl::unordered_set &VisitedPredecessors) { - for (auto PredecessorId: Predecessors) { - if (DefiningBlockID != PredecessorId && !VisitedPredecessors.contains(PredecessorId)) { - // do the magic - VisitedPredecessors.insert(PredecessorId); +void ConstrainedRAPass::RecursiveLiveRangeExpansion(IR::IRListView* IR, IR::NodeID Node, IR::NodeID DefiningBlockID, LiveRange* LiveRange, + const fextl::unordered_set& Predecessors, + fextl::unordered_set& VisitedPredecessors) { + for (auto PredecessorId : Predecessors) { + if (DefiningBlockID != PredecessorId && !VisitedPredecessors.contains(PredecessorId)) { + // do the magic + VisitedPredecessors.insert(PredecessorId); - auto [_, IROp] = *IR->at(PredecessorId); + auto [_, IROp] = *IR->at(PredecessorId); - auto Op = IROp->C(); - const auto BeginID = Op->Begin.ID(); - const auto LastID = Op->Last.ID(); + auto Op = IROp->C(); + const auto BeginID = Op->Begin.ID(); + const auto LastID = Op->Last.ID(); - LOGMAN_THROW_AA_FMT(Op->Header.Op == OP_CODEBLOCK, "Block not defined by codeblock?"); + LOGMAN_THROW_AA_FMT(Op->Header.Op == OP_CODEBLOCK, "Block not defined by codeblock?"); - LiveRange->Begin = std::min(LiveRange->Begin, BeginID); - LiveRange->End = std::max(LiveRange->End, BeginID); + LiveRange->Begin = std::min(LiveRange->Begin, BeginID); + LiveRange->End = std::max(LiveRange->End, BeginID); - LiveRange->Begin = std::min(LiveRange->Begin, LastID); - LiveRange->End = std::max(LiveRange->End, LastID); + LiveRange->Begin = std::min(LiveRange->Begin, LastID); + LiveRange->End = std::max(LiveRange->End, LastID); - RecursiveLiveRangeExpansion(IR, Node, DefiningBlockID, LiveRange, - Graph->BlockPredecessors[PredecessorId], - VisitedPredecessors); - } + RecursiveLiveRangeExpansion(IR, Node, DefiningBlockID, LiveRange, Graph->BlockPredecessors[PredecessorId], VisitedPredecessors); } } +} - [[nodiscard]] static uint32_t CalculateRematCost(IROps Op) { - constexpr uint32_t DEFAULT_REMAT_COST = 1000; +[[nodiscard]] +static uint32_t CalculateRematCost(IROps Op) { + constexpr uint32_t DEFAULT_REMAT_COST = 1000; - switch (Op) { - case IR::OP_CONSTANT: - return 1; + switch (Op) { + case IR::OP_CONSTANT: return 1; - case IR::OP_LOADFLAG: - case IR::OP_LOADCONTEXT: - case IR::OP_LOADREGISTER: - return 10; + case IR::OP_LOADFLAG: + case IR::OP_LOADCONTEXT: + case IR::OP_LOADREGISTER: return 10; - case IR::OP_LOADMEM: - case IR::OP_LOADMEMTSO: - return 100; + case IR::OP_LOADMEM: + case IR::OP_LOADMEMTSO: return 100; - case IR::OP_FILLREGISTER: - return DEFAULT_REMAT_COST + 1; + case IR::OP_FILLREGISTER: return DEFAULT_REMAT_COST + 1; - default: - return DEFAULT_REMAT_COST; - } + default: return DEFAULT_REMAT_COST; } +} - void ConstrainedRAPass::CalculateLiveRange(FEXCore::IR::IRListView *IR) { - using namespace FEXCore; - size_t Nodes = IR->GetSSACount(); - LiveRanges.clear(); - LiveRanges.resize(Nodes); +void ConstrainedRAPass::CalculateLiveRange(FEXCore::IR::IRListView* IR) { + using namespace FEXCore; + size_t Nodes = IR->GetSSACount(); + LiveRanges.clear(); + LiveRanges.resize(Nodes); - for (auto [BlockNode, BlockHeader] : IR->GetBlocks()) { - const auto BlockNodeID = IR->GetID(BlockNode); - for (auto [CodeNode, IROp] : IR->GetCode(BlockNode)) { - const auto Node = IR->GetID(CodeNode); - auto& NodeLiveRange = LiveRanges[Node.Value]; - - // If the destination hasn't yet been set then set it now - if (GetHasDest(IROp->Op)) { - LOGMAN_THROW_AA_FMT(NodeLiveRange.Begin.Value == UINT32_MAX, - "Node begin already defined?"); - NodeLiveRange.Begin = Node; - // Default to ending right where after it starts - NodeLiveRange.End = IR::NodeID{Node.Value + 1}; - } + for (auto [BlockNode, BlockHeader] : IR->GetBlocks()) { + const auto BlockNodeID = IR->GetID(BlockNode); + for (auto [CodeNode, IROp] : IR->GetCode(BlockNode)) { + const auto Node = IR->GetID(CodeNode); + auto& NodeLiveRange = LiveRanges[Node.Value]; + + // If the destination hasn't yet been set then set it now + if (GetHasDest(IROp->Op)) { + LOGMAN_THROW_AA_FMT(NodeLiveRange.Begin.Value == UINT32_MAX, "Node begin already defined?"); + NodeLiveRange.Begin = Node; + // Default to ending right where after it starts + NodeLiveRange.End = IR::NodeID {Node.Value + 1}; + } + + // Calculate remat cost + NodeLiveRange.RematCost = CalculateRematCost(IROp->Op); - // Calculate remat cost - NodeLiveRange.RematCost = CalculateRematCost(IROp->Op); + // Set this node's block ID + Graph->Nodes[Node.Value].Head.BlockID = BlockNodeID; - // Set this node's block ID - Graph->Nodes[Node.Value].Head.BlockID = BlockNodeID; + // FillRegister's SSA arg is only there for verification, and we don't want it + // to impact the live range. + if (IROp->Op == OP_FILLREGISTER) { + continue; + } - // FillRegister's SSA arg is only there for verification, and we don't want it - // to impact the live range. - if (IROp->Op == OP_FILLREGISTER) { + const uint8_t NumArgs = IR::GetRAArgs(IROp->Op); + for (uint8_t i = 0; i < NumArgs; ++i) { + const auto& Arg = IROp->Args[i]; + + if (Arg.IsInvalid()) { + continue; + } + if (IR->GetOp(Arg)->Op == OP_INLINECONSTANT) { + continue; + } + if (IR->GetOp(Arg)->Op == OP_INLINEENTRYPOINTOFFSET) { + continue; + } + if (IR->GetOp(Arg)->Op == OP_IRHEADER) { continue; } - const uint8_t NumArgs = IR::GetRAArgs(IROp->Op); - for (uint8_t i = 0; i < NumArgs; ++i) { - const auto& Arg = IROp->Args[i]; + const auto ArgNode = Arg.ID(); + auto& ArgNodeLiveRange = LiveRanges[ArgNode.Value]; + LOGMAN_THROW_AA_FMT(ArgNodeLiveRange.Begin.Value != UINT32_MAX, "%{} used by %{} before defined?", ArgNode, Node); - if (Arg.IsInvalid()) { - continue; - } - if (IR->GetOp(Arg)->Op == OP_INLINECONSTANT) { - continue; - } - if (IR->GetOp(Arg)->Op == OP_INLINEENTRYPOINTOFFSET) { - continue; - } - if (IR->GetOp(Arg)->Op == OP_IRHEADER) { - continue; - } + const auto ArgNodeBlockID = Graph->Nodes[ArgNode.Value].Head.BlockID; + if (ArgNodeBlockID == BlockNodeID) { + // Set the node end to be at least here + ArgNodeLiveRange.End = Node; + } else { + ArgNodeLiveRange.Global = true; - const auto ArgNode = Arg.ID(); - auto& ArgNodeLiveRange = LiveRanges[ArgNode.Value]; - LOGMAN_THROW_AA_FMT(ArgNodeLiveRange.Begin.Value != UINT32_MAX, - "%{} used by %{} before defined?", ArgNode, Node); - - const auto ArgNodeBlockID = Graph->Nodes[ArgNode.Value].Head.BlockID; - if (ArgNodeBlockID == BlockNodeID) { - // Set the node end to be at least here - ArgNodeLiveRange.End = Node; - } else { - ArgNodeLiveRange.Global = true; - - // Grow the live range to include this use - ArgNodeLiveRange.Begin = std::min(ArgNodeLiveRange.Begin, Node); - ArgNodeLiveRange.End = std::max(ArgNodeLiveRange.End, Node); - - // Can't spill this range, it is MB - ArgNodeLiveRange.RematCost = -1; - - // Include any blocks this value passes through in the live range - RecursiveLiveRangeExpansion(IR, ArgNode, ArgNodeBlockID, &ArgNodeLiveRange, - Graph->BlockPredecessors[BlockNodeID], - Graph->VisitedNodePredecessors[ArgNode]); - } + // Grow the live range to include this use + ArgNodeLiveRange.Begin = std::min(ArgNodeLiveRange.Begin, Node); + ArgNodeLiveRange.End = std::max(ArgNodeLiveRange.End, Node); + + // Can't spill this range, it is MB + ArgNodeLiveRange.RematCost = -1; + + // Include any blocks this value passes through in the live range + RecursiveLiveRangeExpansion(IR, ArgNode, ArgNodeBlockID, &ArgNodeLiveRange, Graph->BlockPredecessors[BlockNodeID], + Graph->VisitedNodePredecessors[ArgNode]); } } } } +} - void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView *IR) { +void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView* IR) { - // Helpers + // Helpers - // Is an OP_STOREREGISTER eligible to write directly to the SRA reg? - auto IsPreWritable = [this](uint8_t Size, RegisterClassType StaticClass) { - LOGMAN_THROW_A_FMT(StaticClass == GPRFixedClass || StaticClass == FPRFixedClass, "Unexpected static class {}", StaticClass); - if (StaticClass == GPRFixedClass) { - return Size == 8 || Size == 4; - } else if (StaticClass == FPRFixedClass) { - return Size == 16 || (Size == 32 && SupportsAVX); - } - return false; // Unknown - }; + // Is an OP_STOREREGISTER eligible to write directly to the SRA reg? + auto IsPreWritable = [this](uint8_t Size, RegisterClassType StaticClass) { + LOGMAN_THROW_A_FMT(StaticClass == GPRFixedClass || StaticClass == FPRFixedClass, "Unexpected static class {}", StaticClass); + if (StaticClass == GPRFixedClass) { + return Size == 8 || Size == 4; + } else if (StaticClass == FPRFixedClass) { + return Size == 16 || (Size == 32 && SupportsAVX); + } + return false; // Unknown + }; - // Is an OP_LOADREGISTER eligible to read directly from the SRA reg? - auto IsAliasable = [this](uint8_t Size, RegisterClassType StaticClass, uint32_t Offset) { - LOGMAN_THROW_A_FMT(StaticClass == GPRFixedClass || StaticClass == FPRFixedClass, "Unexpected static class {}", StaticClass); - if (StaticClass == GPRFixedClass) { - // We need more meta info to support not-size-of-reg - return (Size == 8 || Size == 4) && ((Offset & 7) == 0); - } else if (StaticClass == FPRFixedClass) { - // We need more meta info to support not-size-of-reg - if (Size == 32 && SupportsAVX && (Offset & 31) == 0) { - return true; - } - return (Size == 16 /*|| Size == 8 || Size == 4*/) && ((Offset & 15) == 0); + // Is an OP_LOADREGISTER eligible to read directly from the SRA reg? + auto IsAliasable = [this](uint8_t Size, RegisterClassType StaticClass, uint32_t Offset) { + LOGMAN_THROW_A_FMT(StaticClass == GPRFixedClass || StaticClass == FPRFixedClass, "Unexpected static class {}", StaticClass); + if (StaticClass == GPRFixedClass) { + // We need more meta info to support not-size-of-reg + return (Size == 8 || Size == 4) && ((Offset & 7) == 0); + } else if (StaticClass == FPRFixedClass) { + // We need more meta info to support not-size-of-reg + if (Size == 32 && SupportsAVX && (Offset & 31) == 0) { + return true; } - return false; // Unknown - }; + return (Size == 16 /*|| Size == 8 || Size == 4*/) && ((Offset & 15) == 0); + } + return false; // Unknown + }; - const auto GetFPRBeginAndEnd = [this]() -> std::pair { - if (SupportsAVX) { - return { - offsetof(FEXCore::Core::CpuStateFrame, State.xmm.avx.data[0][0]), - offsetof(FEXCore::Core::CpuStateFrame, State.xmm.avx.data[16][0]), - }; - } else { - return { - offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[0][0]), - offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[16][0]), - }; - } - }; + const auto GetFPRBeginAndEnd = [this]() -> std::pair { + if (SupportsAVX) { + return { + offsetof(FEXCore::Core::CpuStateFrame, State.xmm.avx.data[0][0]), + offsetof(FEXCore::Core::CpuStateFrame, State.xmm.avx.data[16][0]), + }; + } else { + return { + offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[0][0]), + offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[16][0]), + }; + } + }; - // Get SRA Reg and Class from a Context offset - const auto GetRegAndClassFromOffset = [&, this](uint32_t Offset) { - const auto beginGpr = offsetof(FEXCore::Core::CpuStateFrame, State.gregs[0]); - const auto endGpr = offsetof(FEXCore::Core::CpuStateFrame, State.gregs[16]); - const auto pf = offsetof(FEXCore::Core::CpuStateFrame, State.pf_raw); - const auto af = offsetof(FEXCore::Core::CpuStateFrame, State.af_raw); - - const auto [beginFpr, endFpr] = GetFPRBeginAndEnd(); - - LOGMAN_THROW_AA_FMT((Offset >= beginGpr && Offset < endGpr) || (Offset >= beginFpr && Offset < endFpr) || (Offset == pf) || (Offset == af), "Unexpected Offset {}", Offset); - - unsigned FlagOffset = - Graph->Set.Classes[GPRFixedClass.Val].PhysicalCount - 2; - - if (Offset == pf) { - return PhysicalRegister(GPRFixedClass, FlagOffset); - } else if (Offset == af) { - return PhysicalRegister(GPRFixedClass, FlagOffset + 1); - } else if (Offset >= beginGpr && Offset < endGpr) { - auto reg = (Offset - beginGpr) / Core::CPUState::GPR_REG_SIZE; - return PhysicalRegister(GPRFixedClass, reg); - } else if (Offset >= beginFpr && Offset < endFpr) { - const auto size = SupportsAVX ? Core::CPUState::XMM_AVX_REG_SIZE - : Core::CPUState::XMM_SSE_REG_SIZE; - const auto reg = (Offset - beginFpr) / size; - return PhysicalRegister(FPRFixedClass, reg); - } + // Get SRA Reg and Class from a Context offset + const auto GetRegAndClassFromOffset = [&, this](uint32_t Offset) { + const auto beginGpr = offsetof(FEXCore::Core::CpuStateFrame, State.gregs[0]); + const auto endGpr = offsetof(FEXCore::Core::CpuStateFrame, State.gregs[16]); + const auto pf = offsetof(FEXCore::Core::CpuStateFrame, State.pf_raw); + const auto af = offsetof(FEXCore::Core::CpuStateFrame, State.af_raw); + + const auto [beginFpr, endFpr] = GetFPRBeginAndEnd(); + + LOGMAN_THROW_AA_FMT((Offset >= beginGpr && Offset < endGpr) || (Offset >= beginFpr && Offset < endFpr) || (Offset == pf) || (Offset == af), + "Unexpected Offset {}", Offset); + + unsigned FlagOffset = Graph->Set.Classes[GPRFixedClass.Val].PhysicalCount - 2; + + if (Offset == pf) { + return PhysicalRegister(GPRFixedClass, FlagOffset); + } else if (Offset == af) { + return PhysicalRegister(GPRFixedClass, FlagOffset + 1); + } else if (Offset >= beginGpr && Offset < endGpr) { + auto reg = (Offset - beginGpr) / Core::CPUState::GPR_REG_SIZE; + return PhysicalRegister(GPRFixedClass, reg); + } else if (Offset >= beginFpr && Offset < endFpr) { + const auto size = SupportsAVX ? Core::CPUState::XMM_AVX_REG_SIZE : Core::CPUState::XMM_SSE_REG_SIZE; + const auto reg = (Offset - beginFpr) / size; + return PhysicalRegister(FPRFixedClass, reg); + } - return PhysicalRegister::Invalid(); - }; + return PhysicalRegister::Invalid(); + }; - auto GprSize = Graph->Set.Classes[GPRFixedClass.Val].PhysicalCount; - auto MapsSize = Graph->Set.Classes[GPRFixedClass.Val].PhysicalCount + Graph->Set.Classes[FPRFixedClass.Val].PhysicalCount; - LiveRange* StaticMaps[MapsSize]; - - // Get a StaticMap entry from context offset - const auto GetStaticMapFromOffset = [&](uint32_t Offset) -> LiveRange** { - const auto beginGpr = offsetof(FEXCore::Core::CpuStateFrame, State.gregs[0]); - const auto endGpr = offsetof(FEXCore::Core::CpuStateFrame, State.gregs[16]); - const auto pf = offsetof(FEXCore::Core::CpuStateFrame, State.pf_raw); - const auto af = offsetof(FEXCore::Core::CpuStateFrame, State.af_raw); - - const auto [beginFpr, endFpr] = GetFPRBeginAndEnd(); - - LOGMAN_THROW_AA_FMT((Offset >= beginGpr && Offset < endGpr) || (Offset >= beginFpr && Offset < endFpr) || (Offset == pf) || (Offset == af), "Unexpected Offset {}", Offset); - - unsigned FlagOffset = - Graph->Set.Classes[GPRFixedClass.Val].PhysicalCount - 2; - - if (Offset == pf) { - return &StaticMaps[FlagOffset]; - } else if (Offset == af) { - return &StaticMaps[FlagOffset + 1]; - } else if (Offset >= beginGpr && Offset < endGpr) { - auto reg = (Offset - beginGpr) / Core::CPUState::GPR_REG_SIZE; - return &StaticMaps[reg]; - } else if (Offset >= beginFpr && Offset < endFpr) { - const auto size = SupportsAVX ? Core::CPUState::XMM_AVX_REG_SIZE - : Core::CPUState::XMM_SSE_REG_SIZE; - const auto reg = (Offset - beginFpr) / size; - return &StaticMaps[GprSize + reg]; - } + auto GprSize = Graph->Set.Classes[GPRFixedClass.Val].PhysicalCount; + auto MapsSize = Graph->Set.Classes[GPRFixedClass.Val].PhysicalCount + Graph->Set.Classes[FPRFixedClass.Val].PhysicalCount; + LiveRange* StaticMaps[MapsSize]; + + // Get a StaticMap entry from context offset + const auto GetStaticMapFromOffset = [&](uint32_t Offset) -> LiveRange** { + const auto beginGpr = offsetof(FEXCore::Core::CpuStateFrame, State.gregs[0]); + const auto endGpr = offsetof(FEXCore::Core::CpuStateFrame, State.gregs[16]); + const auto pf = offsetof(FEXCore::Core::CpuStateFrame, State.pf_raw); + const auto af = offsetof(FEXCore::Core::CpuStateFrame, State.af_raw); + + const auto [beginFpr, endFpr] = GetFPRBeginAndEnd(); + + LOGMAN_THROW_AA_FMT((Offset >= beginGpr && Offset < endGpr) || (Offset >= beginFpr && Offset < endFpr) || (Offset == pf) || (Offset == af), + "Unexpected Offset {}", Offset); + + unsigned FlagOffset = Graph->Set.Classes[GPRFixedClass.Val].PhysicalCount - 2; + + if (Offset == pf) { + return &StaticMaps[FlagOffset]; + } else if (Offset == af) { + return &StaticMaps[FlagOffset + 1]; + } else if (Offset >= beginGpr && Offset < endGpr) { + auto reg = (Offset - beginGpr) / Core::CPUState::GPR_REG_SIZE; + return &StaticMaps[reg]; + } else if (Offset >= beginFpr && Offset < endFpr) { + const auto size = SupportsAVX ? Core::CPUState::XMM_AVX_REG_SIZE : Core::CPUState::XMM_SSE_REG_SIZE; + const auto reg = (Offset - beginFpr) / size; + return &StaticMaps[GprSize + reg]; + } - return nullptr; - }; + return nullptr; + }; - // Get a StaticMap entry from reg and class - const auto GetStaticMapFromReg = [&](IR::PhysicalRegister PhyReg) -> LiveRange** { - LOGMAN_THROW_A_FMT(PhyReg.Class == GPRFixedClass.Val || PhyReg.Class == FPRFixedClass.Val, "Unexpected Class {}", PhyReg.Class); + // Get a StaticMap entry from reg and class + const auto GetStaticMapFromReg = [&](IR::PhysicalRegister PhyReg) -> LiveRange** { + LOGMAN_THROW_A_FMT(PhyReg.Class == GPRFixedClass.Val || PhyReg.Class == FPRFixedClass.Val, "Unexpected Class {}", PhyReg.Class); - if (PhyReg.Class == GPRFixedClass.Val) { - return &StaticMaps[PhyReg.Reg]; - } else if (PhyReg.Class == FPRFixedClass.Val) { - return &StaticMaps[GprSize + PhyReg.Reg]; - } + if (PhyReg.Class == GPRFixedClass.Val) { + return &StaticMaps[PhyReg.Reg]; + } else if (PhyReg.Class == FPRFixedClass.Val) { + return &StaticMaps[GprSize + PhyReg.Reg]; + } - return nullptr; - }; + return nullptr; + }; - // First pass: Mark pre-writes - for (auto [BlockNode, BlockHeader] : IR->GetBlocks()) { - for (auto [CodeNode, IROp] : IR->GetCode(BlockNode)) { - const auto Node = IR->GetID(CodeNode); + // First pass: Mark pre-writes + for (auto [BlockNode, BlockHeader] : IR->GetBlocks()) { + for (auto [CodeNode, IROp] : IR->GetCode(BlockNode)) { + const auto Node = IR->GetID(CodeNode); - if (IROp->Op == OP_STOREREGISTER) { - auto Op = IROp->C(); - const auto OpID = Op->Value.ID(); - auto& OpLiveRange = LiveRanges[OpID.Value]; + if (IROp->Op == OP_STOREREGISTER) { + auto Op = IROp->C(); + const auto OpID = Op->Value.ID(); + auto& OpLiveRange = LiveRanges[OpID.Value]; - if (IsPreWritable(IROp->Size, Op->StaticClass) - && OpLiveRange.PrefferedRegister.IsInvalid() - && !OpLiveRange.Global) { + if (IsPreWritable(IROp->Size, Op->StaticClass) && OpLiveRange.PrefferedRegister.IsInvalid() && !OpLiveRange.Global) { - // Pre-write and sra-allocate in the defining node - this might be undone if a read before the actual store happens - SRA_DEBUG("Prewritting ssa{} (Store in ssa{})\n", OpID, Node); - OpLiveRange.PrefferedRegister = GetRegAndClassFromOffset(Op->Offset); - OpLiveRange.PreWritten = Node; - SetNodeClass(Graph, OpID, Op->StaticClass); - } + // Pre-write and sra-allocate in the defining node - this might be undone if a read before the actual store happens + SRA_DEBUG("Prewritting ssa{} (Store in ssa{})\n", OpID, Node); + OpLiveRange.PrefferedRegister = GetRegAndClassFromOffset(Op->Offset); + OpLiveRange.PreWritten = Node; + SetNodeClass(Graph, OpID, Op->StaticClass); } } } + } - // Second pass: - // - Demote pre-writes if read after pre-write - // - Mark read-aliases - // - Demote read-aliases if SRA reg is written before the alias's last read - for (auto [BlockNode, BlockHeader] : IR->GetBlocks()) { - memset(StaticMaps, 0, MapsSize * sizeof(LiveRange*)); - for (auto [CodeNode, IROp] : IR->GetCode(BlockNode)) { - const auto Node = IR->GetID(CodeNode); - auto& NodeLiveRange = LiveRanges[Node.Value]; - - // Check for read-after-write and demote if it happens - const uint8_t NumArgs = IR::GetRAArgs(IROp->Op); - for (uint8_t i = 0; i < NumArgs; ++i) { - const auto& Arg = IROp->Args[i]; + // Second pass: + // - Demote pre-writes if read after pre-write + // - Mark read-aliases + // - Demote read-aliases if SRA reg is written before the alias's last read + for (auto [BlockNode, BlockHeader] : IR->GetBlocks()) { + memset(StaticMaps, 0, MapsSize * sizeof(LiveRange*)); + for (auto [CodeNode, IROp] : IR->GetCode(BlockNode)) { + const auto Node = IR->GetID(CodeNode); + auto& NodeLiveRange = LiveRanges[Node.Value]; + + // Check for read-after-write and demote if it happens + const uint8_t NumArgs = IR::GetRAArgs(IROp->Op); + for (uint8_t i = 0; i < NumArgs; ++i) { + const auto& Arg = IROp->Args[i]; - if (Arg.IsInvalid()) { - continue; - } - if (IR->GetOp(Arg)->Op == OP_INLINECONSTANT) { - continue; - } - if (IR->GetOp(Arg)->Op == OP_INLINEENTRYPOINTOFFSET) { - continue; - } - if (IR->GetOp(Arg)->Op == OP_IRHEADER) { - continue; - } + if (Arg.IsInvalid()) { + continue; + } + if (IR->GetOp(Arg)->Op == OP_INLINECONSTANT) { + continue; + } + if (IR->GetOp(Arg)->Op == OP_INLINEENTRYPOINTOFFSET) { + continue; + } + if (IR->GetOp(Arg)->Op == OP_IRHEADER) { + continue; + } - const auto ArgNode = Arg.ID(); - auto& ArgNodeLiveRange = LiveRanges[ArgNode.Value]; + const auto ArgNode = Arg.ID(); + auto& ArgNodeLiveRange = LiveRanges[ArgNode.Value]; - // ACCESSED after write, let's not SRA this one - if (ArgNodeLiveRange.Written) { - SRA_DEBUG("Demoting ssa{} because accessed after write in ssa{}\n", ArgNode, Node); - ArgNodeLiveRange.PrefferedRegister = PhysicalRegister::Invalid(); - auto ArgNodeNode = IR->GetNode(Arg); - SetNodeClass(Graph, ArgNode, GetRegClassFromNode(IR, ArgNodeNode->Op(IR->GetData()))); - } + // ACCESSED after write, let's not SRA this one + if (ArgNodeLiveRange.Written) { + SRA_DEBUG("Demoting ssa{} because accessed after write in ssa{}\n", ArgNode, Node); + ArgNodeLiveRange.PrefferedRegister = PhysicalRegister::Invalid(); + auto ArgNodeNode = IR->GetNode(Arg); + SetNodeClass(Graph, ArgNode, GetRegClassFromNode(IR, ArgNodeNode->Op(IR->GetData()))); } + } - // This op defines a span - if (GetHasDest(IROp->Op)) { - // If this is a pre-write, update the StaticMap so we track writes - if (!NodeLiveRange.PrefferedRegister.IsInvalid()) { - SRA_DEBUG("ssa{} is a pre-write\n", Node); - auto StaticMap = GetStaticMapFromReg(NodeLiveRange.PrefferedRegister); - if ((*StaticMap)) { - SRA_DEBUG("Markng ssa{} as written because ssa{} writes to sra{}\n", - (*StaticMap) - &LiveRanges[0], Node, -1 /*vreg*/); - (*StaticMap)->Written = true; - } - (*StaticMap) = &NodeLiveRange; + // This op defines a span + if (GetHasDest(IROp->Op)) { + // If this is a pre-write, update the StaticMap so we track writes + if (!NodeLiveRange.PrefferedRegister.IsInvalid()) { + SRA_DEBUG("ssa{} is a pre-write\n", Node); + auto StaticMap = GetStaticMapFromReg(NodeLiveRange.PrefferedRegister); + if ((*StaticMap)) { + SRA_DEBUG("Markng ssa{} as written because ssa{} writes to sra{}\n", (*StaticMap) - &LiveRanges[0], Node, -1 /*vreg*/); + (*StaticMap)->Written = true; } + (*StaticMap) = &NodeLiveRange; + } - // Opcode is an SRA read - // Check if - // - There is not a pre-write before this read. If there is one, demote to no pre-write - // - Try to read-alias if possible - if (IROp->Op == OP_LOADREGISTER) { - auto Op = IROp->C(); + // Opcode is an SRA read + // Check if + // - There is not a pre-write before this read. If there is one, demote to no pre-write + // - Try to read-alias if possible + if (IROp->Op == OP_LOADREGISTER) { + auto Op = IROp->C(); - auto StaticMap = GetStaticMapFromOffset(Op->Offset); + auto StaticMap = GetStaticMapFromOffset(Op->Offset); - // Make sure there wasn't a store pre-written before this read - if ((*StaticMap) && (*StaticMap)->PreWritten.IsValid()) { - const auto ID = IR::NodeID((*StaticMap) - &LiveRanges[0]); + // Make sure there wasn't a store pre-written before this read + if ((*StaticMap) && (*StaticMap)->PreWritten.IsValid()) { + const auto ID = IR::NodeID((*StaticMap) - &LiveRanges[0]); - SRA_DEBUG("ssa{} cannot be a pre-write because ssa{} reads from sra{} before storereg", - ID, Node, -1 /*vreg*/); - (*StaticMap)->PrefferedRegister = PhysicalRegister::Invalid(); - (*StaticMap)->PreWritten.Invalidate(); - SetNodeClass(Graph, ID, Op->Class); - } + SRA_DEBUG("ssa{} cannot be a pre-write because ssa{} reads from sra{} before storereg", ID, Node, -1 /*vreg*/); + (*StaticMap)->PrefferedRegister = PhysicalRegister::Invalid(); + (*StaticMap)->PreWritten.Invalidate(); + SetNodeClass(Graph, ID, Op->Class); + } - // if not sra-allocated and full size, sra-allocate - if (!NodeLiveRange.Global && NodeLiveRange.PrefferedRegister.IsInvalid()) { - // only full size reads can be aliased - if (IsAliasable(IROp->Size, Op->StaticClass, Op->Offset)) { - // We can only track a single active span. - // Marking here as written is overly agressive, but - // there might be write(s) later on the instruction stream - if ((*StaticMap)) { - SRA_DEBUG( - "Marking ssa{} as written because ssa{} re-loads sra{}, " - "and we can't track possible future writes\n", - (*StaticMap) - &LiveRanges[0], Node, -1 /*vreg*/); - (*StaticMap)->Written = true; - } - - NodeLiveRange.PrefferedRegister = GetRegAndClassFromOffset(Op->Offset); //0, 1, and so on - (*StaticMap) = &NodeLiveRange; - SetNodeClass(Graph, Node, Op->StaticClass); - SRA_DEBUG("Marking ssa{} as allocated to sra{}\n", Node, -1 /*vreg*/); + // if not sra-allocated and full size, sra-allocate + if (!NodeLiveRange.Global && NodeLiveRange.PrefferedRegister.IsInvalid()) { + // only full size reads can be aliased + if (IsAliasable(IROp->Size, Op->StaticClass, Op->Offset)) { + // We can only track a single active span. + // Marking here as written is overly agressive, but + // there might be write(s) later on the instruction stream + if ((*StaticMap)) { + SRA_DEBUG("Marking ssa{} as written because ssa{} re-loads sra{}, " + "and we can't track possible future writes\n", + (*StaticMap) - &LiveRanges[0], Node, -1 /*vreg*/); + (*StaticMap)->Written = true; } + + NodeLiveRange.PrefferedRegister = GetRegAndClassFromOffset(Op->Offset); // 0, 1, and so on + (*StaticMap) = &NodeLiveRange; + SetNodeClass(Graph, Node, Op->StaticClass); + SRA_DEBUG("Marking ssa{} as allocated to sra{}\n", Node, -1 /*vreg*/); } } } + } - // OP is an OP_STOREREGISTER - // - If there was a matching pre-write, clear the pre-write flag as the register is no longer pre-written - // - Mark the SRA span as written, so that any further reads demote it from read-aliases if they happen - if (IROp->Op == OP_STOREREGISTER) { - const auto Op = IROp->C(); - const auto OpID = Op->Value.ID(); - auto& OpLiveRange = LiveRanges[OpID.Value]; - - auto StaticMap = GetStaticMapFromOffset(Op->Offset); - // if a read pending, it has been writting - if ((*StaticMap)) { - // writes to self don't invalidate the span - if ((*StaticMap)->PreWritten != Node) { - SRA_DEBUG("Marking ssa{} as written because ssa{} writes to sra{} with value ssa{}. Write size is {}\n", - ID, Node, -1 /*vreg*/, OpID, IROp->Size); - (*StaticMap)->Written = true; - } - } - if (OpLiveRange.PreWritten == Node) { - // no longer pre-written - OpLiveRange.PreWritten.Invalidate(); - SRA_DEBUG("Marking ssa{} as no longer pre-written as ssa{} is a storereg for sra{}\n", - OpID, Node, -1 /*vreg*/); + // OP is an OP_STOREREGISTER + // - If there was a matching pre-write, clear the pre-write flag as the register is no longer pre-written + // - Mark the SRA span as written, so that any further reads demote it from read-aliases if they happen + if (IROp->Op == OP_STOREREGISTER) { + const auto Op = IROp->C(); + const auto OpID = Op->Value.ID(); + auto& OpLiveRange = LiveRanges[OpID.Value]; + + auto StaticMap = GetStaticMapFromOffset(Op->Offset); + // if a read pending, it has been writting + if ((*StaticMap)) { + // writes to self don't invalidate the span + if ((*StaticMap)->PreWritten != Node) { + SRA_DEBUG("Marking ssa{} as written because ssa{} writes to sra{} with value ssa{}. Write size is {}\n", ID, Node, -1 /*vreg*/, + OpID, IROp->Size); + (*StaticMap)->Written = true; } } + if (OpLiveRange.PreWritten == Node) { + // no longer pre-written + OpLiveRange.PreWritten.Invalidate(); + SRA_DEBUG("Marking ssa{} as no longer pre-written as ssa{} is a storereg for sra{}\n", OpID, Node, -1 /*vreg*/); + } } } } +} - void ConstrainedRAPass::CalculateBlockInterferences(FEXCore::IR::IRListView *IR) { - using namespace FEXCore; +void ConstrainedRAPass::CalculateBlockInterferences(FEXCore::IR::IRListView* IR) { + using namespace FEXCore; - for (auto [BlockNode, BlockHeader] : IR->GetBlocks()) { - auto BlockIROp = BlockHeader->CW(); - LOGMAN_THROW_AA_FMT(BlockIROp->Header.Op == IR::OP_CODEBLOCK, "IR type failed to be a code block"); + for (auto [BlockNode, BlockHeader] : IR->GetBlocks()) { + auto BlockIROp = BlockHeader->CW(); + LOGMAN_THROW_AA_FMT(BlockIROp->Header.Op == IR::OP_CODEBLOCK, "IR type failed to be a code block"); - const auto BlockNodeID = IR->GetID(BlockNode); - const auto BlockBeginID = BlockIROp->Begin.ID(); - const auto BlockLastID = BlockIROp->Last.ID(); + const auto BlockNodeID = IR->GetID(BlockNode); + const auto BlockBeginID = BlockIROp->Begin.ID(); + const auto BlockLastID = BlockIROp->Last.ID(); - auto& BlockInterferenceVector = LocalBlockInterferences.try_emplace(BlockNodeID).first->second; - BlockInterferenceVector.reserve(BlockLastID.Value - BlockBeginID.Value); + auto& BlockInterferenceVector = LocalBlockInterferences.try_emplace(BlockNodeID).first->second; + BlockInterferenceVector.reserve(BlockLastID.Value - BlockBeginID.Value); - for (auto [CodeNode, IROp] : IR->GetCode(BlockNode)) { - const auto Node = IR->GetID(CodeNode); - LiveRange& NodeLiveRange = LiveRanges[Node.Value]; + for (auto [CodeNode, IROp] : IR->GetCode(BlockNode)) { + const auto Node = IR->GetID(CodeNode); + LiveRange& NodeLiveRange = LiveRanges[Node.Value]; - if (NodeLiveRange.Begin >= BlockBeginID && - NodeLiveRange.End <= BlockLastID) { - // If the live range of this node is FULLY inside of the block - // Then add it to the block specific interference list - BlockInterferenceVector.emplace_back(Node); - } - else { - // If the live range is not fully inside the block then add it to the global interference list - GlobalBlockInterferences.emplace_back(Node); - } + if (NodeLiveRange.Begin >= BlockBeginID && NodeLiveRange.End <= BlockLastID) { + // If the live range of this node is FULLY inside of the block + // Then add it to the block specific interference list + BlockInterferenceVector.emplace_back(Node); + } else { + // If the live range is not fully inside the block then add it to the global interference list + GlobalBlockInterferences.emplace_back(Node); } } } +} - void ConstrainedRAPass::CalculateBlockNodeInterference(FEXCore::IR::IRListView *IR) { - #if 0 +void ConstrainedRAPass::CalculateBlockNodeInterference(FEXCore::IR::IRListView* IR) { +#if 0 const auto AddInterference = [&](IR::NodeID Node1, IR::NodeID Node2) { RegisterNode *Node = &Graph->Nodes[Node1.Value]; Node->Interference.Set(Node2); @@ -839,640 +822,625 @@ namespace { Interferences.clear(); } } - #endif - } +#endif +} - void ConstrainedRAPass::CalculateNodeInterference(FEXCore::IR::IRListView *IR) { - const auto AddInterference = [this](IR::NodeID Node1, IR::NodeID Node2) { - RegisterNode *Node = &Graph->Nodes[Node1.Value]; - Node->Interferences.Append(Node2); - }; +void ConstrainedRAPass::CalculateNodeInterference(FEXCore::IR::IRListView* IR) { + const auto AddInterference = [this](IR::NodeID Node1, IR::NodeID Node2) { + RegisterNode* Node = &Graph->Nodes[Node1.Value]; + Node->Interferences.Append(Node2); + }; - const uint32_t NodeCount = IR->GetSSACount(); + const uint32_t NodeCount = IR->GetSSACount(); - // Now that we have all the live ranges calculated we need to add them to our interference graph + // Now that we have all the live ranges calculated we need to add them to our interference graph - const auto GetClass = [](PhysicalRegister PhyReg) { - if (PhyReg.Class == IR::GPRPairClass.Val) - return IR::GPRClass.Val; - else - return (uint32_t)PhyReg.Class; - }; + const auto GetClass = [](PhysicalRegister PhyReg) { + if (PhyReg.Class == IR::GPRPairClass.Val) { + return IR::GPRClass.Val; + } else { + return (uint32_t)PhyReg.Class; + } + }; - // SpanStart/SpanEnd assume SSA id will fit in 24bits - LOGMAN_THROW_AA_FMT(NodeCount <= 0xff'ffff, "Block too large for Spans"); + // SpanStart/SpanEnd assume SSA id will fit in 24bits + LOGMAN_THROW_AA_FMT(NodeCount <= 0xff'ffff, "Block too large for Spans"); - SpanStart.resize(NodeCount); - SpanEnd.resize(NodeCount); - for (uint32_t i = 0; i < NodeCount; ++i) { - const auto& NodeLiveRange = LiveRanges[i]; + SpanStart.resize(NodeCount); + SpanEnd.resize(NodeCount); + for (uint32_t i = 0; i < NodeCount; ++i) { + const auto& NodeLiveRange = LiveRanges[i]; - if (NodeLiveRange.Begin.Value != UINT32_MAX) { - LOGMAN_THROW_A_FMT(NodeLiveRange.Begin < NodeLiveRange.End , "Span must Begin before Ending"); + if (NodeLiveRange.Begin.Value != UINT32_MAX) { + LOGMAN_THROW_A_FMT(NodeLiveRange.Begin < NodeLiveRange.End, "Span must Begin before Ending"); - const auto Class = GetClass(Graph->AllocData->Map[i]); - SpanStart[NodeLiveRange.Begin.Value].Append(InfoMake(i, Class)); - SpanEnd[NodeLiveRange.End.Value] .Append(InfoMake(i, Class)); - } + const auto Class = GetClass(Graph->AllocData->Map[i]); + SpanStart[NodeLiveRange.Begin.Value].Append(InfoMake(i, Class)); + SpanEnd[NodeLiveRange.End.Value].Append(InfoMake(i, Class)); } + } - BucketList<32, uint32_t> Active; - for (size_t OpNodeId = 0; OpNodeId < IR->GetSSACount(); OpNodeId++) { - // Expire end intervals first - SpanEnd[OpNodeId].Iterate([&](uint32_t EdgeInfo) { - Active.Erase(InfoIDClass(EdgeInfo)); - }); - - // Add starting invervals - SpanStart[OpNodeId].Iterate([&](uint32_t EdgeInfo) { - // Starts here - Active.Iterate([&](uint32_t ActiveInfo) { - if (InfoClass(ActiveInfo) == InfoClass(EdgeInfo)) { - AddInterference(InfoID(ActiveInfo), InfoID(EdgeInfo)); - AddInterference(InfoID(EdgeInfo), InfoID(ActiveInfo)); - } - }); - Active.Append(EdgeInfo); + BucketList<32, uint32_t> Active; + for (size_t OpNodeId = 0; OpNodeId < IR->GetSSACount(); OpNodeId++) { + // Expire end intervals first + SpanEnd[OpNodeId].Iterate([&](uint32_t EdgeInfo) { Active.Erase(InfoIDClass(EdgeInfo)); }); + + // Add starting invervals + SpanStart[OpNodeId].Iterate([&](uint32_t EdgeInfo) { + // Starts here + Active.Iterate([&](uint32_t ActiveInfo) { + if (InfoClass(ActiveInfo) == InfoClass(EdgeInfo)) { + AddInterference(InfoID(ActiveInfo), InfoID(EdgeInfo)); + AddInterference(InfoID(EdgeInfo), InfoID(ActiveInfo)); + } }); - } - - LOGMAN_THROW_AA_FMT(Active.Items[0] == 0, "Interference bug"); - SpanStart.clear(); - SpanEnd.clear(); + Active.Append(EdgeInfo); + }); } - void ConstrainedRAPass::AllocateVirtualRegisters() { - for (uint32_t i = 0; i < Graph->NodeCount; ++i) { - RegisterNode *CurrentNode = &Graph->Nodes[i]; - auto &CurrentRegAndClass = Graph->AllocData->Map[i]; - if (CurrentRegAndClass == PhysicalRegister::Invalid()) - continue; - - auto LiveRange = &LiveRanges[i]; + LOGMAN_THROW_AA_FMT(Active.Items[0] == 0, "Interference bug"); + SpanStart.clear(); + SpanEnd.clear(); +} - FEXCore::IR::RegisterClassType RegClass = FEXCore::IR::RegisterClassType{CurrentRegAndClass.Class}; - auto RegAndClass = PhysicalRegister::Invalid(); - RegisterClass *RAClass = &Graph->Set.Classes[RegClass]; +void ConstrainedRAPass::AllocateVirtualRegisters() { + for (uint32_t i = 0; i < Graph->NodeCount; ++i) { + RegisterNode* CurrentNode = &Graph->Nodes[i]; + auto& CurrentRegAndClass = Graph->AllocData->Map[i]; + if (CurrentRegAndClass == PhysicalRegister::Invalid()) { + continue; + } - if (!LiveRange->PrefferedRegister.IsInvalid()) { - RegAndClass = LiveRange->PrefferedRegister; - } else { - uint32_t RegisterConflicts = 0; - CurrentNode->Interferences.Iterate([&](const IR::NodeID InterferenceNode) { - RegisterConflicts |= GetConflicts(Graph, Graph->AllocData->Map[InterferenceNode.Value], {RegClass}); - }); + auto LiveRange = &LiveRanges[i]; - RegisterConflicts = (~RegisterConflicts) & RAClass->CountMask; + FEXCore::IR::RegisterClassType RegClass = FEXCore::IR::RegisterClassType {CurrentRegAndClass.Class}; + auto RegAndClass = PhysicalRegister::Invalid(); + RegisterClass* RAClass = &Graph->Set.Classes[RegClass]; - int Reg = FindFirstSetBit(RegisterConflicts); - if (Reg != 0) { - RegAndClass = PhysicalRegister({RegClass}, Reg-1); - } - } + if (!LiveRange->PrefferedRegister.IsInvalid()) { + RegAndClass = LiveRange->PrefferedRegister; + } else { + uint32_t RegisterConflicts = 0; + CurrentNode->Interferences.Iterate([&](const IR::NodeID InterferenceNode) { + RegisterConflicts |= GetConflicts(Graph, Graph->AllocData->Map[InterferenceNode.Value], {RegClass}); + }); - // If we failed to find a virtual register then use INVALID_REG and mark allocation as failed - if (RegAndClass.IsInvalid()) { - RegAndClass = IR::PhysicalRegister(RegClass, INVALID_REG); - HadFullRA = false; - SpillPointId = IR::NodeID{i}; + RegisterConflicts = (~RegisterConflicts) & RAClass->CountMask; - CurrentRegAndClass = RegAndClass; - // Must spill and restart - return; + int Reg = FindFirstSetBit(RegisterConflicts); + if (Reg != 0) { + RegAndClass = PhysicalRegister({RegClass}, Reg - 1); } + } + + // If we failed to find a virtual register then use INVALID_REG and mark allocation as failed + if (RegAndClass.IsInvalid()) { + RegAndClass = IR::PhysicalRegister(RegClass, INVALID_REG); + HadFullRA = false; + SpillPointId = IR::NodeID {i}; CurrentRegAndClass = RegAndClass; + // Must spill and restart + return; } + + CurrentRegAndClass = RegAndClass; } +} - FEXCore::IR::AllNodesIterator ConstrainedRAPass::FindFirstUse(FEXCore::IR::IREmitter *IREmit, FEXCore::IR::OrderedNode* Node, FEXCore::IR::AllNodesIterator Begin, FEXCore::IR::AllNodesIterator End) { - using namespace FEXCore::IR; - const auto SearchID = IREmit->ViewIR().GetID(Node); +FEXCore::IR::AllNodesIterator ConstrainedRAPass::FindFirstUse(FEXCore::IR::IREmitter* IREmit, FEXCore::IR::OrderedNode* Node, + FEXCore::IR::AllNodesIterator Begin, FEXCore::IR::AllNodesIterator End) { + using namespace FEXCore::IR; + const auto SearchID = IREmit->ViewIR().GetID(Node); - while(1) { - auto [RealNode, IROp] = Begin(); + while (1) { + auto [RealNode, IROp] = Begin(); - const uint8_t NumArgs = FEXCore::IR::GetRAArgs(IROp->Op); - for (uint8_t i = 0; i < NumArgs; ++i) { - const auto ArgNode = IROp->Args[i].ID(); - if (ArgNode == SearchID) { - return Begin; - } - } - - // CodeLast is inclusive. So we still need to dump the CodeLast op as well - if (Begin == End) { - break; + const uint8_t NumArgs = FEXCore::IR::GetRAArgs(IROp->Op); + for (uint8_t i = 0; i < NumArgs; ++i) { + const auto ArgNode = IROp->Args[i].ID(); + if (ArgNode == SearchID) { + return Begin; } + } - ++Begin; + // CodeLast is inclusive. So we still need to dump the CodeLast op as well + if (Begin == End) { + break; } - return AllNodesIterator::Invalid(); + ++Begin; } - FEXCore::IR::AllNodesIterator ConstrainedRAPass::FindLastUseBefore(FEXCore::IR::IREmitter *IREmit, FEXCore::IR::OrderedNode* Node, FEXCore::IR::AllNodesIterator Begin, FEXCore::IR::AllNodesIterator End) { - auto CurrentIR = IREmit->ViewIR(); - const auto SearchID = CurrentIR.GetID(Node); + return AllNodesIterator::Invalid(); +} - while (1) { - using namespace FEXCore::IR; - auto [RealNode, IROp] = End(); +FEXCore::IR::AllNodesIterator ConstrainedRAPass::FindLastUseBefore(FEXCore::IR::IREmitter* IREmit, FEXCore::IR::OrderedNode* Node, + FEXCore::IR::AllNodesIterator Begin, FEXCore::IR::AllNodesIterator End) { + auto CurrentIR = IREmit->ViewIR(); + const auto SearchID = CurrentIR.GetID(Node); - if (Node == RealNode) { - // We walked back all the way to the definition of the IR op - return End; - } + while (1) { + using namespace FEXCore::IR; + auto [RealNode, IROp] = End(); - const uint8_t NumArgs = FEXCore::IR::GetRAArgs(IROp->Op); - for (uint8_t i = 0; i < NumArgs; ++i) { - const auto ArgNode = IROp->Args[i].ID(); - if (ArgNode == SearchID) { - return End; - } - } + if (Node == RealNode) { + // We walked back all the way to the definition of the IR op + return End; + } - // CodeLast is inclusive. So we still need to dump the CodeLast op as well - if (Begin == End) { - break; + const uint8_t NumArgs = FEXCore::IR::GetRAArgs(IROp->Op); + for (uint8_t i = 0; i < NumArgs; ++i) { + const auto ArgNode = IROp->Args[i].ID(); + if (ArgNode == SearchID) { + return End; } + } - --End; + // CodeLast is inclusive. So we still need to dump the CodeLast op as well + if (Begin == End) { + break; } - return FEXCore::IR::AllNodesIterator::Invalid(); + --End; } - std::optional ConstrainedRAPass::FindNodeToSpill(IREmitter *IREmit, - RegisterNode *RegisterNode, - IR::NodeID CurrentLocation, - LiveRange const *OpLiveRange, - int32_t RematCost) { - auto IR = IREmit->ViewIR(); - - IR::NodeID InterferenceIdToSpill{}; - uint32_t InterferenceFarthestNextUse = 0; - - IR::OrderedNodeWrapper NodeOpBegin = IR::OrderedNodeWrapper::WrapOffset(CurrentLocation.Value * sizeof(IR::OrderedNode)); - IR::OrderedNodeWrapper NodeOpEnd = IR::OrderedNodeWrapper::WrapOffset(OpLiveRange->End.Value * sizeof(IR::OrderedNode)); - auto NodeOpBeginIter = IR.at(NodeOpBegin); - auto NodeOpEndIter = IR.at(NodeOpEnd); - - // Couldn't find register to spill - // Be more aggressive - if (InterferenceIdToSpill.IsInvalid()) { - RegisterNode->Interferences.Iterate([&](IR::NodeID InterferenceNode) { - auto *InterferenceLiveRange = &LiveRanges[InterferenceNode.Value]; - if (InterferenceLiveRange->RematCost == -1 || - (RematCost != -1 && InterferenceLiveRange->RematCost != RematCost)) { - return; - } + return FEXCore::IR::AllNodesIterator::Invalid(); +} - //if ((RegisterNode->Head.RegAndClass>>32) != (InterferenceNode->Head.RegAndClass>>32)) - // return; - - // If this node's live range fully encompasses the live range of the interference node - // then spilling that interference node will not lower RA - // | Our Node | Interference | - // | ========================================== | - // | 0 - Assign | | - // | 1 | Assign | - // | 2 | | - // | 3 | Last Use | - // | 4 | | - // | 5 - Last Use | | - // | Range - (0, 5] | (1, 3] | - if (OpLiveRange->Begin <= InterferenceLiveRange->Begin && - OpLiveRange->End >= InterferenceLiveRange->End) { - return; - } +std::optional ConstrainedRAPass::FindNodeToSpill(IREmitter* IREmit, RegisterNode* RegisterNode, IR::NodeID CurrentLocation, + const LiveRange* OpLiveRange, int32_t RematCost) { + auto IR = IREmit->ViewIR(); - auto [InterferenceOrderedNode, _] = IR.at(InterferenceNode)(); - auto InterferenceNodeOpBeginIter = IR.at(InterferenceLiveRange->Begin); - auto InterferenceNodeOpEndIter = IR.at(InterferenceLiveRange->End); - - // If the nodes live range is entirely encompassed by the interference node's range - // then spilling that range will /potentially/ lower RA - // Will only lower register pressure if the interference node does NOT have a use inside of - // this live range's use - // | Our Node | Interference | - // | ========================================== | - // | 0 | Assign | - // | 1 - Assign | (No Use) | - // | 2 | (No Use) | - // | 3 - Last Use | (No Use) | - // | 4 | | - // | 5 | Last Use | - // | Range - (1, 3] | (0, 5] | - if (CurrentLocation > InterferenceLiveRange->Begin && - OpLiveRange->End < InterferenceLiveRange->End) { - - // This will only save register pressure if the interference node - // does NOT have a use inside of this this node's live range - // Search only inside the source node's live range to see if there is a use - auto FirstUseLocation = FindFirstUse(IREmit, InterferenceOrderedNode, NodeOpBeginIter, NodeOpEndIter); - if (FirstUseLocation == IR::NodeIterator::Invalid()) { - // Looks like there isn't a usage of this interference node inside our node's live range - // This means it is safe to spill this node and it'll result in in lower RA - // Proper calculation of cost to spill would be to calculate the two distances from - // (Node->Begin - InterferencePrevUse) + (InterferenceNextUse - Node->End) - // This would ensure something will spill earlier if its previous use and next use are farther away - auto InterferenceNodeNextUse = FindFirstUse(IREmit, InterferenceOrderedNode, NodeOpBeginIter, InterferenceNodeOpEndIter); - auto InterferenceNodePrevUse = FindLastUseBefore(IREmit, InterferenceOrderedNode, InterferenceNodeOpBeginIter, NodeOpBeginIter); - LOGMAN_THROW_A_FMT(InterferenceNodeNextUse != IR::NodeIterator::Invalid(), "Couldn't find next usage of op"); - // If there is no use of the interference op prior to our op then it only has initial definition - if (InterferenceNodePrevUse == IR::NodeIterator::Invalid()) { - InterferenceNodePrevUse = InterferenceNodeOpBeginIter; - } + IR::NodeID InterferenceIdToSpill {}; + uint32_t InterferenceFarthestNextUse = 0; - const auto NextUseDistance = InterferenceNodeNextUse.ID().Value - CurrentLocation.Value; - if (NextUseDistance >= InterferenceFarthestNextUse) { - InterferenceIdToSpill = InterferenceNode; - InterferenceFarthestNextUse = NextUseDistance; - } + IR::OrderedNodeWrapper NodeOpBegin = IR::OrderedNodeWrapper::WrapOffset(CurrentLocation.Value * sizeof(IR::OrderedNode)); + IR::OrderedNodeWrapper NodeOpEnd = IR::OrderedNodeWrapper::WrapOffset(OpLiveRange->End.Value * sizeof(IR::OrderedNode)); + auto NodeOpBeginIter = IR.at(NodeOpBegin); + auto NodeOpEndIter = IR.at(NodeOpEnd); + + // Couldn't find register to spill + // Be more aggressive + if (InterferenceIdToSpill.IsInvalid()) { + RegisterNode->Interferences.Iterate([&](IR::NodeID InterferenceNode) { + auto* InterferenceLiveRange = &LiveRanges[InterferenceNode.Value]; + if (InterferenceLiveRange->RematCost == -1 || (RematCost != -1 && InterferenceLiveRange->RematCost != RematCost)) { + return; + } + + // if ((RegisterNode->Head.RegAndClass>>32) != (InterferenceNode->Head.RegAndClass>>32)) + // return; + + // If this node's live range fully encompasses the live range of the interference node + // then spilling that interference node will not lower RA + // | Our Node | Interference | + // | ========================================== | + // | 0 - Assign | | + // | 1 | Assign | + // | 2 | | + // | 3 | Last Use | + // | 4 | | + // | 5 - Last Use | | + // | Range - (0, 5] | (1, 3] | + if (OpLiveRange->Begin <= InterferenceLiveRange->Begin && OpLiveRange->End >= InterferenceLiveRange->End) { + return; + } + + auto [InterferenceOrderedNode, _] = IR.at(InterferenceNode)(); + auto InterferenceNodeOpBeginIter = IR.at(InterferenceLiveRange->Begin); + auto InterferenceNodeOpEndIter = IR.at(InterferenceLiveRange->End); + + // If the nodes live range is entirely encompassed by the interference node's range + // then spilling that range will /potentially/ lower RA + // Will only lower register pressure if the interference node does NOT have a use inside of + // this live range's use + // | Our Node | Interference | + // | ========================================== | + // | 0 | Assign | + // | 1 - Assign | (No Use) | + // | 2 | (No Use) | + // | 3 - Last Use | (No Use) | + // | 4 | | + // | 5 | Last Use | + // | Range - (1, 3] | (0, 5] | + if (CurrentLocation > InterferenceLiveRange->Begin && OpLiveRange->End < InterferenceLiveRange->End) { + + // This will only save register pressure if the interference node + // does NOT have a use inside of this this node's live range + // Search only inside the source node's live range to see if there is a use + auto FirstUseLocation = FindFirstUse(IREmit, InterferenceOrderedNode, NodeOpBeginIter, NodeOpEndIter); + if (FirstUseLocation == IR::NodeIterator::Invalid()) { + // Looks like there isn't a usage of this interference node inside our node's live range + // This means it is safe to spill this node and it'll result in in lower RA + // Proper calculation of cost to spill would be to calculate the two distances from + // (Node->Begin - InterferencePrevUse) + (InterferenceNextUse - Node->End) + // This would ensure something will spill earlier if its previous use and next use are farther away + auto InterferenceNodeNextUse = FindFirstUse(IREmit, InterferenceOrderedNode, NodeOpBeginIter, InterferenceNodeOpEndIter); + auto InterferenceNodePrevUse = FindLastUseBefore(IREmit, InterferenceOrderedNode, InterferenceNodeOpBeginIter, NodeOpBeginIter); + LOGMAN_THROW_A_FMT(InterferenceNodeNextUse != IR::NodeIterator::Invalid(), "Couldn't find next usage of op"); + // If there is no use of the interference op prior to our op then it only has initial definition + if (InterferenceNodePrevUse == IR::NodeIterator::Invalid()) { + InterferenceNodePrevUse = InterferenceNodeOpBeginIter; + } + + const auto NextUseDistance = InterferenceNodeNextUse.ID().Value - CurrentLocation.Value; + if (NextUseDistance >= InterferenceFarthestNextUse) { + InterferenceIdToSpill = InterferenceNode; + InterferenceFarthestNextUse = NextUseDistance; } } - }); - } + } + }); + } - if (InterferenceIdToSpill.IsInvalid()) { - RegisterNode->Interferences.Iterate([&](IR::NodeID InterferenceNode) { - auto *InterferenceLiveRange = &LiveRanges[InterferenceNode.Value]; - if (InterferenceLiveRange->RematCost == -1 || - (RematCost != -1 && InterferenceLiveRange->RematCost != RematCost)) { - return; - } + if (InterferenceIdToSpill.IsInvalid()) { + RegisterNode->Interferences.Iterate([&](IR::NodeID InterferenceNode) { + auto* InterferenceLiveRange = &LiveRanges[InterferenceNode.Value]; + if (InterferenceLiveRange->RematCost == -1 || (RematCost != -1 && InterferenceLiveRange->RematCost != RematCost)) { + return; + } - // If this node's live range fully encompasses the live range of the interference node - // then spilling that interference node will not lower RA - // | Our Node | Interference | - // | ========================================== | - // | 0 - Assign | | - // | 1 | Assign | - // | 2 | | - // | 3 | Last Use | - // | 4 | | - // | 5 - Last Use | | - // | Range - (0, 5] | (1, 3] | - if (OpLiveRange->Begin <= InterferenceLiveRange->Begin && - OpLiveRange->End >= InterferenceLiveRange->End) { - return; - } + // If this node's live range fully encompasses the live range of the interference node + // then spilling that interference node will not lower RA + // | Our Node | Interference | + // | ========================================== | + // | 0 - Assign | | + // | 1 | Assign | + // | 2 | | + // | 3 | Last Use | + // | 4 | | + // | 5 - Last Use | | + // | Range - (0, 5] | (1, 3] | + if (OpLiveRange->Begin <= InterferenceLiveRange->Begin && OpLiveRange->End >= InterferenceLiveRange->End) { + return; + } - auto [InterferenceOrderedNode, _] = IR.at(InterferenceNode)(); - auto InterferenceNodeOpEndIter = IR.at(InterferenceLiveRange->End); - - bool Found{}; - - // If the node's live range intersects the interference node - // but the interference node only overlaps the beginning of our live range - // then spilling the register will lower register pressure if there is not - // a use of the interference register at the same node as assignment - // (So we can spill just before current node assignment) - // | Our Node | Interference | - // | ========================================== | - // | 0 | Assign | - // | 1 - Assign | (No Use) | - // | 2 | (No Use) | - // | 3 | Last Use | - // | 4 | | - // | 5 - Last Use | | - // | Range - (1, 5] | (0, 3] | - if (!Found && - CurrentLocation > InterferenceLiveRange->Begin && - OpLiveRange->End > InterferenceLiveRange->End) { - auto FirstUseLocation = FindFirstUse(IREmit, InterferenceOrderedNode, NodeOpBeginIter, NodeOpBeginIter); - - if (FirstUseLocation == IR::NodeIterator::Invalid()) { - // This means that the assignment of our register doesn't use this interference node - // So we are safe to spill this interference node before assignment of our current node - const auto InterferenceNodeNextUse = FindFirstUse(IREmit, InterferenceOrderedNode, NodeOpBeginIter, InterferenceNodeOpEndIter); - const auto NextUseDistance = InterferenceNodeNextUse.ID().Value - CurrentLocation.Value; - if (NextUseDistance >= InterferenceFarthestNextUse) { - Found = true; - - InterferenceIdToSpill = InterferenceNode; - InterferenceFarthestNextUse = NextUseDistance; - } + auto [InterferenceOrderedNode, _] = IR.at(InterferenceNode)(); + auto InterferenceNodeOpEndIter = IR.at(InterferenceLiveRange->End); + + bool Found {}; + + // If the node's live range intersects the interference node + // but the interference node only overlaps the beginning of our live range + // then spilling the register will lower register pressure if there is not + // a use of the interference register at the same node as assignment + // (So we can spill just before current node assignment) + // | Our Node | Interference | + // | ========================================== | + // | 0 | Assign | + // | 1 - Assign | (No Use) | + // | 2 | (No Use) | + // | 3 | Last Use | + // | 4 | | + // | 5 - Last Use | | + // | Range - (1, 5] | (0, 3] | + if (!Found && CurrentLocation > InterferenceLiveRange->Begin && OpLiveRange->End > InterferenceLiveRange->End) { + auto FirstUseLocation = FindFirstUse(IREmit, InterferenceOrderedNode, NodeOpBeginIter, NodeOpBeginIter); + + if (FirstUseLocation == IR::NodeIterator::Invalid()) { + // This means that the assignment of our register doesn't use this interference node + // So we are safe to spill this interference node before assignment of our current node + const auto InterferenceNodeNextUse = FindFirstUse(IREmit, InterferenceOrderedNode, NodeOpBeginIter, InterferenceNodeOpEndIter); + const auto NextUseDistance = InterferenceNodeNextUse.ID().Value - CurrentLocation.Value; + if (NextUseDistance >= InterferenceFarthestNextUse) { + Found = true; + + InterferenceIdToSpill = InterferenceNode; + InterferenceFarthestNextUse = NextUseDistance; } } + } - // If the node's live range intersects the interference node - // but the interference node only overlaps the end of our live range - // then spilling the register will lower register pressure if there is - // not a use of the interference register at the same node as the other node's - // last use - // | Our Node | Interference | - // | ========================================== | - // | 0 - Assign | | - // | 1 | | - // | 2 | Assign | - // | 3 - Last Use | (No Use) | - // | 4 | (No Use) | - // | 5 | Last Use | - // | Range - (1, 3] | (2, 5] | - - // XXX: This route has a bug in it so it is purposely disabled for now - if (false && !Found && - CurrentLocation <= InterferenceLiveRange->Begin && - OpLiveRange->End <= InterferenceLiveRange->End) { - auto FirstUseLocation = FindFirstUse(IREmit, InterferenceOrderedNode, NodeOpEndIter, NodeOpEndIter); - - if (FirstUseLocation == IR::NodeIterator::Invalid()) { - // This means that the assignment of our the interference register doesn't overlap - // with the final usage of our register, we can spill it and reduce usage - const auto InterferenceNodeNextUse = FindFirstUse(IREmit, InterferenceOrderedNode, NodeOpBeginIter, InterferenceNodeOpEndIter); - const auto NextUseDistance = InterferenceNodeNextUse.ID().Value - CurrentLocation.Value; - if (NextUseDistance >= InterferenceFarthestNextUse) { - Found = true; - - InterferenceIdToSpill = InterferenceNode; - InterferenceFarthestNextUse = NextUseDistance; - } + // If the node's live range intersects the interference node + // but the interference node only overlaps the end of our live range + // then spilling the register will lower register pressure if there is + // not a use of the interference register at the same node as the other node's + // last use + // | Our Node | Interference | + // | ========================================== | + // | 0 - Assign | | + // | 1 | | + // | 2 | Assign | + // | 3 - Last Use | (No Use) | + // | 4 | (No Use) | + // | 5 | Last Use | + // | Range - (1, 3] | (2, 5] | + + // XXX: This route has a bug in it so it is purposely disabled for now + if (false && !Found && CurrentLocation <= InterferenceLiveRange->Begin && OpLiveRange->End <= InterferenceLiveRange->End) { + auto FirstUseLocation = FindFirstUse(IREmit, InterferenceOrderedNode, NodeOpEndIter, NodeOpEndIter); + + if (FirstUseLocation == IR::NodeIterator::Invalid()) { + // This means that the assignment of our the interference register doesn't overlap + // with the final usage of our register, we can spill it and reduce usage + const auto InterferenceNodeNextUse = FindFirstUse(IREmit, InterferenceOrderedNode, NodeOpBeginIter, InterferenceNodeOpEndIter); + const auto NextUseDistance = InterferenceNodeNextUse.ID().Value - CurrentLocation.Value; + if (NextUseDistance >= InterferenceFarthestNextUse) { + Found = true; + + InterferenceIdToSpill = InterferenceNode; + InterferenceFarthestNextUse = NextUseDistance; } } - }); - } + } + }); + } - // If we are looking for a specific node then we can safely return not found - if (RematCost != -1 && InterferenceIdToSpill.IsInvalid()) { - return std::nullopt; - } + // If we are looking for a specific node then we can safely return not found + if (RematCost != -1 && InterferenceIdToSpill.IsInvalid()) { + return std::nullopt; + } - // Heuristics failed to spill ? - if (InterferenceIdToSpill.IsInvalid()) { - // Panic spill: Spill any value not used by the current op - fextl::set CurrentNodes; + // Heuristics failed to spill ? + if (InterferenceIdToSpill.IsInvalid()) { + // Panic spill: Spill any value not used by the current op + fextl::set CurrentNodes; - // Get all used nodes for current IR op - { - auto CurrentNode = IR.GetNode(NodeOpBegin); - auto IROp = CurrentNode->Op(IR.GetData()); + // Get all used nodes for current IR op + { + auto CurrentNode = IR.GetNode(NodeOpBegin); + auto IROp = CurrentNode->Op(IR.GetData()); - CurrentNodes.insert(NodeOpBegin.ID()); + CurrentNodes.insert(NodeOpBegin.ID()); - for (int i = 0; i < IR::GetRAArgs(IROp->Op); i++) { - CurrentNodes.insert(IROp->Args[i].ID()); - } + for (int i = 0; i < IR::GetRAArgs(IROp->Op); i++) { + CurrentNodes.insert(IROp->Args[i].ID()); } + } - RegisterNode->Interferences.Find([&](IR::NodeID InterferenceNode) { - auto *InterferenceLiveRange = &LiveRanges[InterferenceNode.Value]; - if (InterferenceLiveRange->RematCost == -1 || - (RematCost != -1 && InterferenceLiveRange->RematCost != RematCost)) { - return false; - } - - if (!CurrentNodes.contains(InterferenceNode)) { - InterferenceIdToSpill = InterferenceNode; - LogMan::Msg::DFmt("Panic spilling %{}, Live Range[{}, {})", InterferenceIdToSpill, InterferenceLiveRange->Begin, InterferenceLiveRange->End); - return true; - } + RegisterNode->Interferences.Find([&](IR::NodeID InterferenceNode) { + auto* InterferenceLiveRange = &LiveRanges[InterferenceNode.Value]; + if (InterferenceLiveRange->RematCost == -1 || (RematCost != -1 && InterferenceLiveRange->RematCost != RematCost)) { return false; - }); - } + } - if (InterferenceIdToSpill.IsInvalid()) { - int j = 0; - LogMan::Msg::DFmt("node %{}, was dumped in to virtual reg {}. Live Range[{}, {})", - CurrentLocation, -1, - OpLiveRange->Begin, OpLiveRange->End); + if (!CurrentNodes.contains(InterferenceNode)) { + InterferenceIdToSpill = InterferenceNode; + LogMan::Msg::DFmt("Panic spilling %{}, Live Range[{}, {})", InterferenceIdToSpill, InterferenceLiveRange->Begin, + InterferenceLiveRange->End); + return true; + } + return false; + }); + } - RegisterNode->Interferences.Iterate([&](IR::NodeID InterferenceNode) { - auto *InterferenceLiveRange = &LiveRanges[InterferenceNode.Value]; + if (InterferenceIdToSpill.IsInvalid()) { + int j = 0; + LogMan::Msg::DFmt("node %{}, was dumped in to virtual reg {}. Live Range[{}, {})", CurrentLocation, -1, OpLiveRange->Begin, OpLiveRange->End); - LogMan::Msg::DFmt("\tInt{}: %{} Remat: {} [{}, {})", j++, InterferenceNode, InterferenceLiveRange->RematCost, InterferenceLiveRange->Begin, InterferenceLiveRange->End); - }); - } - LOGMAN_THROW_A_FMT(InterferenceIdToSpill.IsValid(), "Couldn't find Node to spill"); + RegisterNode->Interferences.Iterate([&](IR::NodeID InterferenceNode) { + auto* InterferenceLiveRange = &LiveRanges[InterferenceNode.Value]; - return InterferenceIdToSpill; + LogMan::Msg::DFmt("\tInt{}: %{} Remat: {} [{}, {})", j++, InterferenceNode, InterferenceLiveRange->RematCost, + InterferenceLiveRange->Begin, InterferenceLiveRange->End); + }); } + LOGMAN_THROW_A_FMT(InterferenceIdToSpill.IsValid(), "Couldn't find Node to spill"); - uint32_t ConstrainedRAPass::FindSpillSlot(IR::NodeID Node, FEXCore::IR::RegisterClassType RegisterClass) { - RegisterNode& CurrentNode = Graph->Nodes[Node.Value]; - const auto& NodeLiveRange = LiveRanges[Node.Value]; + return InterferenceIdToSpill; +} - if (ReuseSpillSlots) { - for (uint32_t i = 0; i < Graph->SpillStack.size(); ++i) { - SpillStackUnit& SpillUnit = Graph->SpillStack[i]; +uint32_t ConstrainedRAPass::FindSpillSlot(IR::NodeID Node, FEXCore::IR::RegisterClassType RegisterClass) { + RegisterNode& CurrentNode = Graph->Nodes[Node.Value]; + const auto& NodeLiveRange = LiveRanges[Node.Value]; - if (NodeLiveRange.Begin <= SpillUnit.SpillRange.End && - SpillUnit.SpillRange.Begin <= NodeLiveRange.End) { - SpillUnit.SpillRange.Begin = std::min(SpillUnit.SpillRange.Begin, NodeLiveRange.Begin); - SpillUnit.SpillRange.End = std::max(SpillUnit.SpillRange.End, NodeLiveRange.End); - CurrentNode.Head.SpillSlot = i; - return i; - } + if (ReuseSpillSlots) { + for (uint32_t i = 0; i < Graph->SpillStack.size(); ++i) { + SpillStackUnit& SpillUnit = Graph->SpillStack[i]; + + if (NodeLiveRange.Begin <= SpillUnit.SpillRange.End && SpillUnit.SpillRange.Begin <= NodeLiveRange.End) { + SpillUnit.SpillRange.Begin = std::min(SpillUnit.SpillRange.Begin, NodeLiveRange.Begin); + SpillUnit.SpillRange.End = std::max(SpillUnit.SpillRange.End, NodeLiveRange.End); + CurrentNode.Head.SpillSlot = i; + return i; } } - - // Couldn't find a spill slot so just make a new one - auto StackItem = Graph->SpillStack.emplace_back(SpillStackUnit{Node, RegisterClass}); - StackItem.SpillRange.Begin = NodeLiveRange.Begin; - StackItem.SpillRange.End = NodeLiveRange.End; - CurrentNode.Head.SpillSlot = SpillSlotCount; - SpillSlotCount++; - return CurrentNode.Head.SpillSlot; } - void ConstrainedRAPass::SpillOne(FEXCore::IR::IREmitter *IREmit) { - using namespace FEXCore; + // Couldn't find a spill slot so just make a new one + auto StackItem = Graph->SpillStack.emplace_back(SpillStackUnit {Node, RegisterClass}); + StackItem.SpillRange.Begin = NodeLiveRange.Begin; + StackItem.SpillRange.End = NodeLiveRange.End; + CurrentNode.Head.SpillSlot = SpillSlotCount; + SpillSlotCount++; + return CurrentNode.Head.SpillSlot; +} - auto IR = IREmit->ViewIR(); - auto LastCursor = IREmit->GetWriteCursor(); - auto [CodeNode, IROp] = IR.at(SpillPointId)(); - - LOGMAN_THROW_AA_FMT(GetHasDest(IROp->Op), "Can't spill with no dest"); - - const auto Node = IR.GetID(CodeNode); - RegisterNode *CurrentNode = &Graph->Nodes[Node.Value]; - auto &CurrentRegAndClass = Graph->AllocData->Map[Node.Value]; - LiveRange *OpLiveRange = &LiveRanges[Node.Value]; - - // If this node is allocated above the number of physical registers - // we have then we need to search the interference list and spill the one - // that is cheapest - const bool NeedsToSpill = CurrentRegAndClass.Reg == INVALID_REG; - - if (NeedsToSpill) { - bool Spilled = false; - - // First let's just check for constants that we can just rematerialize instead of spilling - if (const auto InterferenceNode = FindNodeToSpill(IREmit, CurrentNode, Node, OpLiveRange, 1)) { - // We want to end the live range of this value here and continue it on first use - auto [ConstantNode, _] = IR.at(*InterferenceNode)(); - auto ConstantIROp = IR.GetOp(ConstantNode); - - // First op post Spill - auto NextIter = IR.at(CodeNode); - auto FirstUseLocation = FindFirstUse(IREmit, ConstantNode, NextIter, NodeIterator::Invalid()); - - LOGMAN_THROW_A_FMT(FirstUseLocation != IR::NodeIterator::Invalid(), - "At %{} Spilling Op %{} but Failure to find op use", - Node, *InterferenceNode); - - if (FirstUseLocation != IR::NodeIterator::Invalid()) { - --FirstUseLocation; - auto [FirstUseOrderedNode, _] = FirstUseLocation(); - IREmit->SetWriteCursor(FirstUseOrderedNode); - auto FilledConstant = IREmit->_Constant(ConstantIROp->Constant); - IREmit->ReplaceUsesWithAfter(ConstantNode, FilledConstant, FirstUseLocation); - Spilled = true; - } +void ConstrainedRAPass::SpillOne(FEXCore::IR::IREmitter* IREmit) { + using namespace FEXCore; + + auto IR = IREmit->ViewIR(); + auto LastCursor = IREmit->GetWriteCursor(); + auto [CodeNode, IROp] = IR.at(SpillPointId)(); + + LOGMAN_THROW_AA_FMT(GetHasDest(IROp->Op), "Can't spill with no dest"); + + const auto Node = IR.GetID(CodeNode); + RegisterNode* CurrentNode = &Graph->Nodes[Node.Value]; + auto& CurrentRegAndClass = Graph->AllocData->Map[Node.Value]; + LiveRange* OpLiveRange = &LiveRanges[Node.Value]; + + // If this node is allocated above the number of physical registers + // we have then we need to search the interference list and spill the one + // that is cheapest + const bool NeedsToSpill = CurrentRegAndClass.Reg == INVALID_REG; + + if (NeedsToSpill) { + bool Spilled = false; + + // First let's just check for constants that we can just rematerialize instead of spilling + if (const auto InterferenceNode = FindNodeToSpill(IREmit, CurrentNode, Node, OpLiveRange, 1)) { + // We want to end the live range of this value here and continue it on first use + auto [ConstantNode, _] = IR.at(*InterferenceNode)(); + auto ConstantIROp = IR.GetOp(ConstantNode); + + // First op post Spill + auto NextIter = IR.at(CodeNode); + auto FirstUseLocation = FindFirstUse(IREmit, ConstantNode, NextIter, NodeIterator::Invalid()); + + LOGMAN_THROW_A_FMT(FirstUseLocation != IR::NodeIterator::Invalid(), "At %{} Spilling Op %{} but Failure to find op use", Node, + *InterferenceNode); + + if (FirstUseLocation != IR::NodeIterator::Invalid()) { + --FirstUseLocation; + auto [FirstUseOrderedNode, _] = FirstUseLocation(); + IREmit->SetWriteCursor(FirstUseOrderedNode); + auto FilledConstant = IREmit->_Constant(ConstantIROp->Constant); + IREmit->ReplaceUsesWithAfter(ConstantNode, FilledConstant, FirstUseLocation); + Spilled = true; } + } - // If we didn't remat a constant then we need to do some real spilling - if (!Spilled) { - if (const auto InterferenceNode = FindNodeToSpill(IREmit, CurrentNode, Node, OpLiveRange)) { - const auto InterferenceRegClass = IR::RegisterClassType{Graph->AllocData->Map[InterferenceNode->Value].Class}; - const uint32_t SpillSlot = FindSpillSlot(*InterferenceNode, InterferenceRegClass); + // If we didn't remat a constant then we need to do some real spilling + if (!Spilled) { + if (const auto InterferenceNode = FindNodeToSpill(IREmit, CurrentNode, Node, OpLiveRange)) { + const auto InterferenceRegClass = IR::RegisterClassType {Graph->AllocData->Map[InterferenceNode->Value].Class}; + const uint32_t SpillSlot = FindSpillSlot(*InterferenceNode, InterferenceRegClass); #if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED - LOGMAN_THROW_A_FMT(SpillSlot != UINT32_MAX, "Interference Node doesn't have a spill slot!"); - LOGMAN_THROW_A_FMT(InterferenceRegClass != UINT32_MAX, "Interference node never assigned a register class?"); + LOGMAN_THROW_A_FMT(SpillSlot != UINT32_MAX, "Interference Node doesn't have a spill slot!"); + LOGMAN_THROW_A_FMT(InterferenceRegClass != UINT32_MAX, "Interference node never assigned a register class?"); #endif - // This is the op that we need to dump - auto [InterferenceOrderedNode, InterferenceIROp] = IR.at(*InterferenceNode)(); + // This is the op that we need to dump + auto [InterferenceOrderedNode, InterferenceIROp] = IR.at(*InterferenceNode)(); - // This will find the last use of this definition - // Walks from CodeBegin -> BlockBegin to find the last Use - // Which this is walking backwards to find the first use - auto LastUseIterator = FindLastUseBefore(IREmit, InterferenceOrderedNode, NodeIterator::Invalid(), IR.at(CodeNode)); - if (LastUseIterator != AllNodesIterator::Invalid()) { - auto [LastUseNode, LastUseIROp] = LastUseIterator(); + // This will find the last use of this definition + // Walks from CodeBegin -> BlockBegin to find the last Use + // Which this is walking backwards to find the first use + auto LastUseIterator = FindLastUseBefore(IREmit, InterferenceOrderedNode, NodeIterator::Invalid(), IR.at(CodeNode)); + if (LastUseIterator != AllNodesIterator::Invalid()) { + auto [LastUseNode, LastUseIROp] = LastUseIterator(); - // Set the write cursor to point of last usage - IREmit->SetWriteCursor(LastUseNode); - } else { - // There is no last use -- use the definition as last use - IREmit->SetWriteCursor(InterferenceOrderedNode); - } + // Set the write cursor to point of last usage + IREmit->SetWriteCursor(LastUseNode); + } else { + // There is no last use -- use the definition as last use + IREmit->SetWriteCursor(InterferenceOrderedNode); + } - // Actually spill the node now - auto SpillOp = IREmit->_SpillRegister(InterferenceOrderedNode, SpillSlot, InterferenceRegClass); - SpillOp.first->Header.Size = InterferenceIROp->Size; - SpillOp.first->Header.ElementSize = InterferenceIROp->ElementSize; - - { - // Search from the point of spilling to find the first use - // Set the write cursor to the first location found and fill at that point - auto FirstIter = IR.at(SpillOp.Node); - // Just past the spill - ++FirstIter; - auto FirstUseLocation = FindFirstUse(IREmit, InterferenceOrderedNode, FirstIter, NodeIterator::Invalid()); - - LOGMAN_THROW_A_FMT(FirstUseLocation != NodeIterator::Invalid(), - "At %{} Spilling Op %{} but Failure to find op use", - Node, *InterferenceNode); - - if (FirstUseLocation != IR::NodeIterator::Invalid()) { - // We want to fill just before the first use - --FirstUseLocation; - auto [FirstUseOrderedNode, _] = FirstUseLocation(); - - IREmit->SetWriteCursor(FirstUseOrderedNode); - - auto FilledInterference = IREmit->_FillRegister(InterferenceOrderedNode, SpillSlot, InterferenceRegClass); - FilledInterference.first->Header.Size = InterferenceIROp->Size; - FilledInterference.first->Header.ElementSize = InterferenceIROp->ElementSize; - IREmit->ReplaceUsesWithAfter(InterferenceOrderedNode, - FilledInterference, - FilledInterference); - } + // Actually spill the node now + auto SpillOp = IREmit->_SpillRegister(InterferenceOrderedNode, SpillSlot, InterferenceRegClass); + SpillOp.first->Header.Size = InterferenceIROp->Size; + SpillOp.first->Header.ElementSize = InterferenceIROp->ElementSize; + + { + // Search from the point of spilling to find the first use + // Set the write cursor to the first location found and fill at that point + auto FirstIter = IR.at(SpillOp.Node); + // Just past the spill + ++FirstIter; + auto FirstUseLocation = FindFirstUse(IREmit, InterferenceOrderedNode, FirstIter, NodeIterator::Invalid()); + + LOGMAN_THROW_A_FMT(FirstUseLocation != NodeIterator::Invalid(), "At %{} Spilling Op %{} but Failure to find op use", Node, + *InterferenceNode); + + if (FirstUseLocation != IR::NodeIterator::Invalid()) { + // We want to fill just before the first use + --FirstUseLocation; + auto [FirstUseOrderedNode, _] = FirstUseLocation(); + + IREmit->SetWriteCursor(FirstUseOrderedNode); + + auto FilledInterference = IREmit->_FillRegister(InterferenceOrderedNode, SpillSlot, InterferenceRegClass); + FilledInterference.first->Header.Size = InterferenceIROp->Size; + FilledInterference.first->Header.ElementSize = InterferenceIROp->ElementSize; + IREmit->ReplaceUsesWithAfter(InterferenceOrderedNode, FilledInterference, FilledInterference); } } - IREmit->SetWriteCursor(LastCursor); } + IREmit->SetWriteCursor(LastCursor); } } +} - bool ConstrainedRAPass::RunAllocateVirtualRegisters(FEXCore::IR::IREmitter *IREmit) { - using namespace FEXCore; - bool Changed = false; - - GlobalBlockInterferences.clear(); - LocalBlockInterferences.clear(); +bool ConstrainedRAPass::RunAllocateVirtualRegisters(FEXCore::IR::IREmitter* IREmit) { + using namespace FEXCore; + bool Changed = false; - auto IR = IREmit->ViewIR(); + GlobalBlockInterferences.clear(); + LocalBlockInterferences.clear(); - uint32_t SSACount = IR.GetSSACount(); + auto IR = IREmit->ViewIR(); - ResetRegisterGraph(Graph, SSACount); - FindNodeClasses(Graph, &IR); - CalculateLiveRange(&IR); - OptimizeStaticRegisters(&IR); + uint32_t SSACount = IR.GetSSACount(); - // Linear forward scan based interference calculation is faster for smaller blocks - // Smarter block based interference calculation is faster for larger blocks - /*if (SSACount >= 2048) { - CalculateBlockInterferences(&IR); - CalculateBlockNodeInterference(&IR); - } - else*/ { - CalculateNodeInterference(&IR); - } - AllocateVirtualRegisters(); + ResetRegisterGraph(Graph, SSACount); + FindNodeClasses(Graph, &IR); + CalculateLiveRange(&IR); + OptimizeStaticRegisters(&IR); - return Changed; + // Linear forward scan based interference calculation is faster for smaller blocks + // Smarter block based interference calculation is faster for larger blocks + /*if (SSACount >= 2048) { + CalculateBlockInterferences(&IR); + CalculateBlockNodeInterference(&IR); } + else*/ + { + CalculateNodeInterference(&IR); + } + AllocateVirtualRegisters(); + + return Changed; +} - void ConstrainedRAPass::CalculatePredecessors(FEXCore::IR::IRListView *IR) { - Graph->BlockPredecessors.clear(); +void ConstrainedRAPass::CalculatePredecessors(FEXCore::IR::IRListView* IR) { + Graph->BlockPredecessors.clear(); - for (auto [BlockNode, BlockIROp] : IR->GetBlocks()) { - auto CodeBlock = BlockIROp->C(); + for (auto [BlockNode, BlockIROp] : IR->GetBlocks()) { + auto CodeBlock = BlockIROp->C(); - auto IROp = IR->GetNode(IR->GetNode(CodeBlock->Last)->Header.Previous)->Op(IR->GetData()); - if (IROp->Op == OP_JUMP) { - auto Op = IROp->C(); - Graph->BlockPredecessors[Op->TargetBlock.ID()].insert(IR->GetID(BlockNode)); - } else if (IROp->Op == OP_CONDJUMP) { - auto Op = IROp->C(); - Graph->BlockPredecessors[Op->TrueBlock.ID()].insert(IR->GetID(BlockNode)); - Graph->BlockPredecessors[Op->FalseBlock.ID()].insert(IR->GetID(BlockNode)); - } + auto IROp = IR->GetNode(IR->GetNode(CodeBlock->Last)->Header.Previous)->Op(IR->GetData()); + if (IROp->Op == OP_JUMP) { + auto Op = IROp->C(); + Graph->BlockPredecessors[Op->TargetBlock.ID()].insert(IR->GetID(BlockNode)); + } else if (IROp->Op == OP_CONDJUMP) { + auto Op = IROp->C(); + Graph->BlockPredecessors[Op->TrueBlock.ID()].insert(IR->GetID(BlockNode)); + Graph->BlockPredecessors[Op->FalseBlock.ID()].insert(IR->GetID(BlockNode)); } } +} - bool ConstrainedRAPass::Run(IREmitter *IREmit) { - FEXCORE_PROFILE_SCOPED("PassManager::RA"); - bool Changed = false; - - auto IR = IREmit->ViewIR(); +bool ConstrainedRAPass::Run(IREmitter* IREmit) { + FEXCORE_PROFILE_SCOPED("PassManager::RA"); + bool Changed = false; - SpillSlotCount = 0; - Graph->SpillStack.clear(); + auto IR = IREmit->ViewIR(); - CalculatePredecessors(&IR); + SpillSlotCount = 0; + Graph->SpillStack.clear(); - while (1) { - HadFullRA = true; + CalculatePredecessors(&IR); - // Virtual allocation pass runs the compaction pass per run - Changed |= RunAllocateVirtualRegisters(IREmit); + while (1) { + HadFullRA = true; - if (HadFullRA) { - break; - } + // Virtual allocation pass runs the compaction pass per run + Changed |= RunAllocateVirtualRegisters(IREmit); - SpillOne(IREmit); - Changed = true; - // We need to rerun compaction after spilling - CompactionPass->Run(IREmit); + if (HadFullRA) { + break; } - Graph->AllocData->SpillSlotCount = Graph->SpillStack.size(); - - return Changed; + SpillOne(IREmit); + Changed = true; + // We need to rerun compaction after spilling + CompactionPass->Run(IREmit); } - fextl::unique_ptr CreateRegisterAllocationPass(FEXCore::IR::Pass* CompactionPass, bool SupportsAVX) { - return fextl::make_unique(CompactionPass, SupportsAVX); - } + Graph->AllocData->SpillSlotCount = Graph->SpillStack.size(); + + return Changed; +} + +fextl::unique_ptr CreateRegisterAllocationPass(FEXCore::IR::Pass* CompactionPass, bool SupportsAVX) { + return fextl::make_unique(CompactionPass, SupportsAVX); } +} // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp b/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp index 39f83e8fe0..f6f1cab183 100644 --- a/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp +++ b/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp @@ -25,19 +25,19 @@ desc: Sanity Checking #include namespace { - struct BlockInfo { - fextl::vector Predecessors; - fextl::vector Successors; - }; -} +struct BlockInfo { + fextl::vector Predecessors; + fextl::vector Successors; +}; +} // namespace namespace FEXCore::IR::Validation { class ValueDominanceValidation final : public FEXCore::IR::Pass { public: - bool Run(IREmitter *IREmit) override; + bool Run(IREmitter* IREmit) override; }; -bool ValueDominanceValidation::Run(IREmitter *IREmit) { +bool ValueDominanceValidation::Run(IREmitter* IREmit) { FEXCORE_PROFILE_SCOPED("PassManager::ValueDominanceValidation"); bool HadError = false; @@ -48,43 +48,43 @@ bool ValueDominanceValidation::Run(IREmitter *IREmit) { for (auto [BlockNode, BlockHeader] : CurrentIR.GetBlocks()) { - BlockInfo *CurrentBlock = &OffsetToBlockMap.try_emplace(CurrentIR.GetID(BlockNode)).first->second; + BlockInfo* CurrentBlock = &OffsetToBlockMap.try_emplace(CurrentIR.GetID(BlockNode)).first->second; for (auto [CodeNode, IROp] : CurrentIR.GetCode(BlockNode)) { switch (IROp->Op) { - case IR::OP_CONDJUMP: { - auto Op = IROp->CW(); + case IR::OP_CONDJUMP: { + auto Op = IROp->CW(); - OrderedNode *TrueTargetNode = CurrentIR.GetNode(Op->TrueBlock); - OrderedNode *FalseTargetNode = CurrentIR.GetNode(Op->FalseBlock); + OrderedNode* TrueTargetNode = CurrentIR.GetNode(Op->TrueBlock); + OrderedNode* FalseTargetNode = CurrentIR.GetNode(Op->FalseBlock); - CurrentBlock->Successors.emplace_back(TrueTargetNode); - CurrentBlock->Successors.emplace_back(FalseTargetNode); - - { - auto Block = &OffsetToBlockMap.try_emplace(Op->TrueBlock.ID()).first->second; - Block->Predecessors.emplace_back(BlockNode); - } + CurrentBlock->Successors.emplace_back(TrueTargetNode); + CurrentBlock->Successors.emplace_back(FalseTargetNode); - { - auto Block = &OffsetToBlockMap.try_emplace(Op->FalseBlock.ID()).first->second; - Block->Predecessors.emplace_back(BlockNode); - } + { + auto Block = &OffsetToBlockMap.try_emplace(Op->TrueBlock.ID()).first->second; + Block->Predecessors.emplace_back(BlockNode); + } - break; + { + auto Block = &OffsetToBlockMap.try_emplace(Op->FalseBlock.ID()).first->second; + Block->Predecessors.emplace_back(BlockNode); } - case IR::OP_JUMP: { - auto Op = IROp->CW(); - OrderedNode *TargetNode = CurrentIR.GetNode(Op->Header.Args[0]); - CurrentBlock->Successors.emplace_back(TargetNode); - - { - auto Block = OffsetToBlockMap.try_emplace(Op->Header.Args[0].ID()).first; - Block->second.Predecessors.emplace_back(BlockNode); - } - break; + + break; + } + case IR::OP_JUMP: { + auto Op = IROp->CW(); + OrderedNode* TargetNode = CurrentIR.GetNode(Op->Header.Args[0]); + CurrentBlock->Successors.emplace_back(TargetNode); + + { + auto Block = OffsetToBlockMap.try_emplace(Op->Header.Args[0].ID()).first; + Block->second.Predecessors.emplace_back(BlockNode); } - default: break; + break; + } + default: break; } } } @@ -97,14 +97,17 @@ bool ValueDominanceValidation::Run(IREmitter *IREmit) { const uint8_t NumArgs = IR::GetRAArgs(IROp->Op); for (uint32_t i = 0; i < NumArgs; ++i) { - if (IROp->Args[i].IsInvalid()) continue; - if (CurrentIR.GetOp(IROp->Args[i])->Op == OP_IRHEADER) continue; + if (IROp->Args[i].IsInvalid()) { + continue; + } + if (CurrentIR.GetOp(IROp->Args[i])->Op == OP_IRHEADER) { + continue; + } OrderedNodeWrapper Arg = IROp->Args[i]; // We must ensure domininance of all SSA arguments - if (Arg.ID() >= BlockIROp->Begin.ID() && - Arg.ID() < BlockIROp->Last.ID()) { + if (Arg.ID() >= BlockIROp->Begin.ID() && Arg.ID() < BlockIROp->Last.ID()) { // If the SSA argument is defined INSIDE this block // then it must only be declared prior to this instruction // Eg: Valid @@ -122,8 +125,7 @@ bool ValueDominanceValidation::Run(IREmitter *IREmit) { HadError |= true; Errors << "Inst %" << CodeID << ": Arg[" << i << "] %" << Arg.ID() << " definition does not dominate this use!" << std::endl; } - } - else if (Arg.ID() < BlockIROp->Begin.ID()) { + } else if (Arg.ID() < BlockIROp->Begin.ID()) { // If the SSA argument is defined BEFORE this block // then THIS block needs to be dominated by the flow of blocks up until this point @@ -149,13 +151,13 @@ bool ValueDominanceValidation::Run(IREmitter *IREmit) { // ... // We need to walk the predecessors to see if the value comes from there - fextl::set Predecessors { BlockNode }; + fextl::set Predecessors {BlockNode}; // Recursively gather all predecessors of BlockNode for (auto NodeIt = Predecessors.begin(); NodeIt != Predecessors.end();) { auto PredBlock = &OffsetToBlockMap.try_emplace(CurrentIR.GetID(*NodeIt)).first->second; ++NodeIt; - for (auto *Pred : PredBlock->Predecessors) { + for (auto* Pred : PredBlock->Predecessors) { if (Predecessors.insert(Pred).second) { // New blocks added, so repeat from the beginning to pull in their predecessors NodeIt = Predecessors.begin(); @@ -168,8 +170,7 @@ bool ValueDominanceValidation::Run(IREmitter *IREmit) { for (auto* Pred : Predecessors) { auto PredIROp = CurrentIR.GetOp(Pred); - if (Arg.ID() >= PredIROp->Begin.ID() && - Arg.ID() < PredIROp->Last.ID()) { + if (Arg.ID() >= PredIROp->Begin.ID() && Arg.ID() < PredIROp->Last.ID()) { FoundPredDefine = true; break; } @@ -178,10 +179,10 @@ bool ValueDominanceValidation::Run(IREmitter *IREmit) { if (!FoundPredDefine) { HadError |= true; - Errors << "Inst %" << CodeID << ": Arg[" << i << "] %" << Arg.ID() << " definition does not dominate this use! But was defined before this block!" << std::endl; + Errors << "Inst %" << CodeID << ": Arg[" << i << "] %" << Arg.ID() + << " definition does not dominate this use! But was defined before this block!" << std::endl; } - } - else if (Arg.ID() > BlockIROp->Last.ID()) { + } else if (Arg.ID() > BlockIROp->Last.ID()) { // If this SSA argument is defined AFTER this block then it is just completely broken // Eg: Invalid // CodeBlock_1: @@ -212,4 +213,4 @@ fextl::unique_ptr CreateValueDominanceValidation() { return fextl::make_unique(); } -} +} // namespace FEXCore::IR::Validation diff --git a/FEXCore/Source/Utils/Allocator.cpp b/FEXCore/Source/Utils/Allocator.cpp index b4a4d55388..a46ae8f7ce 100644 --- a/FEXCore/Source/Utils/Allocator.cpp +++ b/FEXCore/Source/Utils/Allocator.cpp @@ -26,332 +26,327 @@ #include extern "C" { - typedef void* (*mmap_hook_type)( - void *addr, size_t length, int prot, int flags, - int fd, off_t offset); - typedef int (*munmap_hook_type)(void *addr, size_t length); +typedef void* (*mmap_hook_type)(void* addr, size_t length, int prot, int flags, int fd, off_t offset); +typedef int (*munmap_hook_type)(void* addr, size_t length); #ifdef ENABLE_JEMALLOC - extern mmap_hook_type je___mmap_hook; - extern munmap_hook_type je___munmap_hook; +extern mmap_hook_type je___mmap_hook; +extern munmap_hook_type je___munmap_hook; #endif } namespace fextl::pmr { - static fextl::pmr::default_resource FEXDefaultResource; - std::pmr::memory_resource* get_default_resource() { - return &FEXDefaultResource; - } +static fextl::pmr::default_resource FEXDefaultResource; +std::pmr::memory_resource* get_default_resource() { + return &FEXDefaultResource; } +} // namespace fextl::pmr #ifndef _WIN32 namespace FEXCore::Allocator { - MMAP_Hook mmap {::mmap}; - MUNMAP_Hook munmap {::munmap}; +MMAP_Hook mmap {::mmap}; +MUNMAP_Hook munmap {::munmap}; - uint64_t HostVASize{}; +uint64_t HostVASize {}; - using GLIBC_MALLOC_Hook = void*(*)(size_t, const void *caller); - using GLIBC_REALLOC_Hook = void*(*)(void*, size_t, const void *caller); - using GLIBC_FREE_Hook = void(*)(void*, const void *caller); +using GLIBC_MALLOC_Hook = void* (*)(size_t, const void* caller); +using GLIBC_REALLOC_Hook = void* (*)(void*, size_t, const void* caller); +using GLIBC_FREE_Hook = void (*)(void*, const void* caller); - fextl::unique_ptr Alloc64{}; +fextl::unique_ptr Alloc64 {}; - void *FEX_mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) { - void *Result = Alloc64->Mmap(addr, length, prot, flags, fd, offset); - if (Result >= (void*)-4096) { - errno = -(uint64_t)Result; - return (void*)-1; - } - return Result; +void* FEX_mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) { + void* Result = Alloc64->Mmap(addr, length, prot, flags, fd, offset); + if (Result >= (void*)-4096) { + errno = -(uint64_t)Result; + return (void*)-1; } - int FEX_munmap(void *addr, size_t length) { - int Result = Alloc64->Munmap(addr, length); + return Result; +} +int FEX_munmap(void* addr, size_t length) { + int Result = Alloc64->Munmap(addr, length); - if (Result != 0) { - errno = -Result; - return -1; - } - return Result; + if (Result != 0) { + errno = -Result; + return -1; } + return Result; +} - // This function disables glibc's ability to allocate memory through the `sbrk` interface. - // This is run early in the lifecycle of FEX in order to make sure no 64-bit pointers can make it to the guest 32-bit application. - // - // How this works is that this allocates a single page at the current sbrk pointer (aligned upward to page size). This makes it - // so that when the sbrk syscall is used to allocate more memory, it fails with an ENOMEM since it runs in to the allocated guard page. - // - // glibc notices the sbrk failure and falls back to regular mmap based allocations when this occurs. Ensuring that memory can still be allocated. - void *DisableSBRKAllocations() { - void* INVALID_PTR = reinterpret_cast(~0ULL); - // Get the starting sbrk pointer. - void *StartingSBRK = sbrk(0); - if (StartingSBRK == INVALID_PTR) { - // If sbrk is already returning invalid pointers then nothing to do here. - return INVALID_PTR; - } +// This function disables glibc's ability to allocate memory through the `sbrk` interface. +// This is run early in the lifecycle of FEX in order to make sure no 64-bit pointers can make it to the guest 32-bit application. +// +// How this works is that this allocates a single page at the current sbrk pointer (aligned upward to page size). This makes it +// so that when the sbrk syscall is used to allocate more memory, it fails with an ENOMEM since it runs in to the allocated guard page. +// +// glibc notices the sbrk failure and falls back to regular mmap based allocations when this occurs. Ensuring that memory can still be allocated. +void* DisableSBRKAllocations() { + void* INVALID_PTR = reinterpret_cast(~0ULL); + // Get the starting sbrk pointer. + void* StartingSBRK = sbrk(0); + if (StartingSBRK == INVALID_PTR) { + // If sbrk is already returning invalid pointers then nothing to do here. + return INVALID_PTR; + } - // Now allocate the next page after the sbrk address to ensure it can't grow. - // In most cases at the start of `main` this will already be page aligned, which means subsequent `sbrk` - // calls won't allocate any memory through that. - void* AlignedBRK = reinterpret_cast(FEXCore::AlignUp(reinterpret_cast(StartingSBRK), FHU::FEX_PAGE_SIZE)); - void *AfterBRK = mmap(AlignedBRK, FHU::FEX_PAGE_SIZE, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE | MAP_NORESERVE, -1, 0); - if (AfterBRK == INVALID_PTR) { - // Couldn't allocate the page after the aligned brk? This should never happen. - // FEXCore::LogMan isn't configured yet so we just need to print the message. - fextl::fmt::print("Couldn't allocate page after SBRK.\n"); - FEX_TRAP_EXECUTION; - return INVALID_PTR; - } + // Now allocate the next page after the sbrk address to ensure it can't grow. + // In most cases at the start of `main` this will already be page aligned, which means subsequent `sbrk` + // calls won't allocate any memory through that. + void* AlignedBRK = reinterpret_cast(FEXCore::AlignUp(reinterpret_cast(StartingSBRK), FHU::FEX_PAGE_SIZE)); + void* AfterBRK = mmap(AlignedBRK, FHU::FEX_PAGE_SIZE, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE | MAP_NORESERVE, -1, 0); + if (AfterBRK == INVALID_PTR) { + // Couldn't allocate the page after the aligned brk? This should never happen. + // FEXCore::LogMan isn't configured yet so we just need to print the message. + fextl::fmt::print("Couldn't allocate page after SBRK.\n"); + FEX_TRAP_EXECUTION; + return INVALID_PTR; + } - // Now that the page after sbrk is allocated, FEX needs to consume the remaining sbrk space. - // This will be anywhere from [0, 4096) bytes. - // Start allocating from 1024 byte increments just to make any steps a bit faster. - intptr_t IncrementAmount = 1024; - for (; IncrementAmount != 0; IncrementAmount >>= 1) { - while (sbrk(IncrementAmount) != INVALID_PTR); - } - return AlignedBRK; + // Now that the page after sbrk is allocated, FEX needs to consume the remaining sbrk space. + // This will be anywhere from [0, 4096) bytes. + // Start allocating from 1024 byte increments just to make any steps a bit faster. + intptr_t IncrementAmount = 1024; + for (; IncrementAmount != 0; IncrementAmount >>= 1) { + while (sbrk(IncrementAmount) != INVALID_PTR) + ; } + return AlignedBRK; +} - void ReenableSBRKAllocations(void* Ptr) { - const void* INVALID_PTR = reinterpret_cast(~0ULL); - if (Ptr != INVALID_PTR) { - munmap(Ptr, FHU::FEX_PAGE_SIZE); - } +void ReenableSBRKAllocations(void* Ptr) { + const void* INVALID_PTR = reinterpret_cast(~0ULL); + if (Ptr != INVALID_PTR) { + munmap(Ptr, FHU::FEX_PAGE_SIZE); } +} #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" - void SetupHooks() { - Alloc64 = Alloc::OSAllocator::Create64BitAllocator(); +void SetupHooks() { + Alloc64 = Alloc::OSAllocator::Create64BitAllocator(); #ifdef ENABLE_JEMALLOC - je___mmap_hook = FEX_mmap; - je___munmap_hook = FEX_munmap; + je___mmap_hook = FEX_mmap; + je___munmap_hook = FEX_munmap; #endif - FEXCore::Allocator::mmap = FEX_mmap; - FEXCore::Allocator::munmap = FEX_munmap; - } + FEXCore::Allocator::mmap = FEX_mmap; + FEXCore::Allocator::munmap = FEX_munmap; +} - void ClearHooks() { +void ClearHooks() { #ifdef ENABLE_JEMALLOC - je___mmap_hook = ::mmap; - je___munmap_hook = ::munmap; + je___mmap_hook = ::mmap; + je___munmap_hook = ::munmap; #endif - FEXCore::Allocator::mmap = ::mmap; - FEXCore::Allocator::munmap = ::munmap; + FEXCore::Allocator::mmap = ::mmap; + FEXCore::Allocator::munmap = ::munmap; - // XXX: This is currently a leak. - // We can't work around this yet until static initializers that allocate memory are completely removed from our codebase - // Luckily we only remove this on process shutdown, so the kernel will do the cleanup for us - Alloc64.release(); - } + // XXX: This is currently a leak. + // We can't work around this yet until static initializers that allocate memory are completely removed from our codebase + // Luckily we only remove this on process shutdown, so the kernel will do the cleanup for us + Alloc64.release(); +} #pragma GCC diagnostic pop - FEX_DEFAULT_VISIBILITY size_t DetermineVASize() { - if (HostVASize) { - return HostVASize; - } - - static constexpr std::array TLBSizes = { - 57, - 52, - 48, - 47, - 42, - 39, - 36, - }; +FEX_DEFAULT_VISIBILITY size_t DetermineVASize() { + if (HostVASize) { + return HostVASize; + } - for (auto Bits : TLBSizes) { - uintptr_t Size = 1ULL << Bits; - // Just try allocating - // We can't actually determine VA size on ARM safely - auto Find = [](uintptr_t Size) -> bool { - for (int i = 0; i < 64; ++i) { - // Try grabbing a some of the top pages of the range - // x86 allocates some high pages in the top end - void *Ptr = ::mmap(reinterpret_cast(Size - FHU::FEX_PAGE_SIZE * i), FHU::FEX_PAGE_SIZE, PROT_NONE, MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (Ptr != (void*)~0ULL) { - ::munmap(Ptr, FHU::FEX_PAGE_SIZE); - if (Ptr == (void*)(Size - FHU::FEX_PAGE_SIZE * i)) { - return true; - } + static constexpr std::array TLBSizes = { + 57, 52, 48, 47, 42, 39, 36, + }; + + for (auto Bits : TLBSizes) { + uintptr_t Size = 1ULL << Bits; + // Just try allocating + // We can't actually determine VA size on ARM safely + auto Find = [](uintptr_t Size) -> bool { + for (int i = 0; i < 64; ++i) { + // Try grabbing a some of the top pages of the range + // x86 allocates some high pages in the top end + void* Ptr = ::mmap(reinterpret_cast(Size - FHU::FEX_PAGE_SIZE * i), FHU::FEX_PAGE_SIZE, PROT_NONE, + MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (Ptr != (void*)~0ULL) { + ::munmap(Ptr, FHU::FEX_PAGE_SIZE); + if (Ptr == (void*)(Size - FHU::FEX_PAGE_SIZE * i)) { + return true; } } - return false; - }; - - if (Find(Size)) { - HostVASize = Bits; - return Bits; } - } + return false; + }; - LOGMAN_MSG_A_FMT("Couldn't determine host VA size"); - FEX_UNREACHABLE; + if (Find(Size)) { + HostVASize = Bits; + return Bits; + } } - #define STEAL_LOG(...) // fprintf(stderr, __VA_ARGS__) + LOGMAN_MSG_A_FMT("Couldn't determine host VA size"); + FEX_UNREACHABLE; +} - fextl::vector StealMemoryRegion(uintptr_t Begin, uintptr_t End) { - void * const StackLocation = alloca(0); - const uintptr_t StackLocation_u64 = reinterpret_cast(StackLocation); - fextl::vector Regions; +#define STEAL_LOG(...) // fprintf(stderr, __VA_ARGS__) - int MapsFD = open("/proc/self/maps", O_RDONLY); - LogMan::Throw::AFmt(MapsFD != -1, "Failed to open /proc/self/maps"); +fextl::vector StealMemoryRegion(uintptr_t Begin, uintptr_t End) { + void* const StackLocation = alloca(0); + const uintptr_t StackLocation_u64 = reinterpret_cast(StackLocation); + fextl::vector Regions; - enum {ParseBegin, ParseEnd, ScanEnd} State = ParseBegin; + int MapsFD = open("/proc/self/maps", O_RDONLY); + LogMan::Throw::AFmt(MapsFD != -1, "Failed to open /proc/self/maps"); - uintptr_t RegionBegin = 0; - uintptr_t RegionEnd = 0; + enum { ParseBegin, ParseEnd, ScanEnd } State = ParseBegin; - uintptr_t PreviousMapEnd = 0; + uintptr_t RegionBegin = 0; + uintptr_t RegionEnd = 0; - char Buffer[2048]; - const char *Cursor; - ssize_t Remaining = 0; + uintptr_t PreviousMapEnd = 0; - for(;;) { + char Buffer[2048]; + const char* Cursor; + ssize_t Remaining = 0; - if (Remaining == 0) { - do { - Remaining = read(MapsFD, Buffer, sizeof(Buffer)); - } while ( Remaining == -1 && errno == EAGAIN); + for (;;) { - Cursor = Buffer; - } + if (Remaining == 0) { + do { + Remaining = read(MapsFD, Buffer, sizeof(Buffer)); + } while (Remaining == -1 && errno == EAGAIN); - if (Remaining == 0 && State == ParseBegin) { - STEAL_LOG("[%d] EndOfFile; RegionBegin: %016lX RegionEnd: %016lX\n", __LINE__, RegionBegin, RegionEnd); + Cursor = Buffer; + } - const auto MapBegin = std::max(RegionEnd, Begin); - const auto MapEnd = End; + if (Remaining == 0 && State == ParseBegin) { + STEAL_LOG("[%d] EndOfFile; RegionBegin: %016lX RegionEnd: %016lX\n", __LINE__, RegionBegin, RegionEnd); - STEAL_LOG(" MapBegin: %016lX MapEnd: %016lX\n", MapBegin, MapEnd); + const auto MapBegin = std::max(RegionEnd, Begin); + const auto MapEnd = End; - if (MapEnd > MapBegin) { - STEAL_LOG(" Reserving\n"); + STEAL_LOG(" MapBegin: %016lX MapEnd: %016lX\n", MapBegin, MapEnd); - auto MapSize = MapEnd - MapBegin; - auto Alloc = mmap((void*)MapBegin, MapSize, PROT_NONE, MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE | MAP_FIXED_NOREPLACE, -1, 0); + if (MapEnd > MapBegin) { + STEAL_LOG(" Reserving\n"); - LogMan::Throw::AFmt(Alloc != MAP_FAILED, "mmap({:x},{:x}) failed", MapBegin, MapSize); - LogMan::Throw::AFmt(Alloc == (void*)MapBegin, "mmap({},{:x}) returned {} instead of {:x}", Alloc, MapBegin); + auto MapSize = MapEnd - MapBegin; + auto Alloc = mmap((void*)MapBegin, MapSize, PROT_NONE, MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE | MAP_FIXED_NOREPLACE, -1, 0); - Regions.push_back({(void*)MapBegin, MapSize}); - } + LogMan::Throw::AFmt(Alloc != MAP_FAILED, "mmap({:x},{:x}) failed", MapBegin, MapSize); + LogMan::Throw::AFmt(Alloc == (void*)MapBegin, "mmap({},{:x}) returned {} instead of {:x}", Alloc, MapBegin); - close(MapsFD); - return Regions; + Regions.push_back({(void*)MapBegin, MapSize}); } - LogMan::Throw::AFmt(Remaining > 0, "Failed to parse /proc/self/maps"); + close(MapsFD); + return Regions; + } - auto c = *Cursor++; - Remaining--; + LogMan::Throw::AFmt(Remaining > 0, "Failed to parse /proc/self/maps"); - if (State == ScanEnd) { - if (c == '\n') { - State = ParseBegin; - } - continue; - } + auto c = *Cursor++; + Remaining--; - if (State == ParseBegin) { - if (c == '-') { - STEAL_LOG("[%d] ParseBegin; RegionBegin: %016lX RegionEnd: %016lX\n", __LINE__, RegionBegin, RegionEnd); + if (State == ScanEnd) { + if (c == '\n') { + State = ParseBegin; + } + continue; + } - const auto MapBegin = std::max(RegionEnd, Begin); - const auto MapEnd = std::min(RegionBegin, End); + if (State == ParseBegin) { + if (c == '-') { + STEAL_LOG("[%d] ParseBegin; RegionBegin: %016lX RegionEnd: %016lX\n", __LINE__, RegionBegin, RegionEnd); - // Store the location we are going to map. - PreviousMapEnd = MapEnd; + const auto MapBegin = std::max(RegionEnd, Begin); + const auto MapEnd = std::min(RegionBegin, End); - STEAL_LOG(" MapBegin: %016lX MapEnd: %016lX\n", MapBegin, MapEnd); + // Store the location we are going to map. + PreviousMapEnd = MapEnd; - if (MapEnd > MapBegin) { - STEAL_LOG(" Reserving\n"); + STEAL_LOG(" MapBegin: %016lX MapEnd: %016lX\n", MapBegin, MapEnd); - auto MapSize = MapEnd - MapBegin; - auto Alloc = mmap((void*)MapBegin, MapSize, PROT_NONE, MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE | MAP_FIXED_NOREPLACE, -1, 0); + if (MapEnd > MapBegin) { + STEAL_LOG(" Reserving\n"); - LogMan::Throw::AFmt(Alloc != MAP_FAILED, "mmap({:x},{:x}) failed", MapBegin, MapSize); - LogMan::Throw::AFmt(Alloc == (void*)MapBegin, "mmap({},{:x}) returned {} instead of {:x}", Alloc, MapBegin); + auto MapSize = MapEnd - MapBegin; + auto Alloc = mmap((void*)MapBegin, MapSize, PROT_NONE, MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE | MAP_FIXED_NOREPLACE, -1, 0); - Regions.push_back({(void*)MapBegin, MapSize}); - } + LogMan::Throw::AFmt(Alloc != MAP_FAILED, "mmap({:x},{:x}) failed", MapBegin, MapSize); + LogMan::Throw::AFmt(Alloc == (void*)MapBegin, "mmap({},{:x}) returned {} instead of {:x}", Alloc, MapBegin); - RegionBegin = 0; - RegionEnd = 0; - State = ParseEnd; - continue; - } else { - LogMan::Throw::AFmt(std::isalpha(c) || std::isdigit(c), "Unexpected char '{}' in ParseBegin", c); - RegionBegin = (RegionBegin << 4) | (c <= '9' ? (c - '0') : (c - 'a' + 10)); + Regions.push_back({(void*)MapBegin, MapSize}); } + + RegionBegin = 0; + RegionEnd = 0; + State = ParseEnd; + continue; + } else { + LogMan::Throw::AFmt(std::isalpha(c) || std::isdigit(c), "Unexpected char '{}' in ParseBegin", c); + RegionBegin = (RegionBegin << 4) | (c <= '9' ? (c - '0') : (c - 'a' + 10)); } + } - if (State == ParseEnd) { - if (c == ' ') { - STEAL_LOG("[%d] ParseEnd; RegionBegin: %016lX RegionEnd: %016lX\n", __LINE__, RegionBegin, RegionEnd); + if (State == ParseEnd) { + if (c == ' ') { + STEAL_LOG("[%d] ParseEnd; RegionBegin: %016lX RegionEnd: %016lX\n", __LINE__, RegionBegin, RegionEnd); - State = ScanEnd; + State = ScanEnd; - // If the previous map's ending and the region we just parsed overlap the stack then we need to save the stack mapping. - // Otherwise we will have severely limited stack size which crashes quickly. - if (PreviousMapEnd <= StackLocation_u64 && RegionEnd > StackLocation_u64) { - auto BelowStackRegion = Regions.back(); - LOGMAN_THROW_AA_FMT(reinterpret_cast(BelowStackRegion.Ptr) + BelowStackRegion.Size == PreviousMapEnd, - "This needs to match"); + // If the previous map's ending and the region we just parsed overlap the stack then we need to save the stack mapping. + // Otherwise we will have severely limited stack size which crashes quickly. + if (PreviousMapEnd <= StackLocation_u64 && RegionEnd > StackLocation_u64) { + auto BelowStackRegion = Regions.back(); + LOGMAN_THROW_AA_FMT(reinterpret_cast(BelowStackRegion.Ptr) + BelowStackRegion.Size == PreviousMapEnd, "This needs to " + "match"); - // Allocate the region under the stack as READ | WRITE so the stack can still grow - auto Alloc = mmap(BelowStackRegion.Ptr, BelowStackRegion.Size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE | MAP_FIXED, -1, 0); + // Allocate the region under the stack as READ | WRITE so the stack can still grow + auto Alloc = mmap(BelowStackRegion.Ptr, BelowStackRegion.Size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE | MAP_FIXED, -1, 0); - LogMan::Throw::AFmt(Alloc != MAP_FAILED, "mmap({:x},{:x}) failed", BelowStackRegion.Ptr, BelowStackRegion.Size); - LogMan::Throw::AFmt(Alloc == BelowStackRegion.Ptr, "mmap({},{:x}) returned {} instead of {:x}", Alloc, BelowStackRegion.Ptr); + LogMan::Throw::AFmt(Alloc != MAP_FAILED, "mmap({:x},{:x}) failed", BelowStackRegion.Ptr, BelowStackRegion.Size); + LogMan::Throw::AFmt(Alloc == BelowStackRegion.Ptr, "mmap({},{:x}) returned {} instead of {:x}", Alloc, BelowStackRegion.Ptr); - Regions.pop_back(); - } - continue; - } else { - LogMan::Throw::AFmt(std::isalpha(c) || std::isdigit(c), "Unexpected char '{}' in ParseEnd", c); - RegionEnd = (RegionEnd << 4) | (c <= '9' ? (c - '0') : (c - 'a' + 10)); + Regions.pop_back(); } + continue; + } else { + LogMan::Throw::AFmt(std::isalpha(c) || std::isdigit(c), "Unexpected char '{}' in ParseEnd", c); + RegionEnd = (RegionEnd << 4) | (c <= '9' ? (c - '0') : (c - 'a' + 10)); } } - - ERROR_AND_DIE_FMT("unreachable"); } - fextl::vector Steal48BitVA() { - size_t Bits = FEXCore::Allocator::DetermineVASize(); - if (Bits < 48) { - return {}; - } + ERROR_AND_DIE_FMT("unreachable"); +} - uintptr_t Begin48BitVA = 0x0'8000'0000'0000ULL; - uintptr_t End48BitVA = 0x1'0000'0000'0000ULL; - return StealMemoryRegion(Begin48BitVA, End48BitVA); +fextl::vector Steal48BitVA() { + size_t Bits = FEXCore::Allocator::DetermineVASize(); + if (Bits < 48) { + return {}; } - void ReclaimMemoryRegion(const fextl::vector &Regions) { - for (const auto &Region: Regions) { - ::munmap(Region.Ptr, Region.Size); - } + uintptr_t Begin48BitVA = 0x0'8000'0000'0000ULL; + uintptr_t End48BitVA = 0x1'0000'0000'0000ULL; + return StealMemoryRegion(Begin48BitVA, End48BitVA); +} + +void ReclaimMemoryRegion(const fextl::vector& Regions) { + for (const auto& Region : Regions) { + ::munmap(Region.Ptr, Region.Size); } +} - void LockBeforeFork(FEXCore::Core::InternalThreadState *Thread) { - if (Alloc64) { - Alloc64->LockBeforeFork(Thread); - } +void LockBeforeFork(FEXCore::Core::InternalThreadState* Thread) { + if (Alloc64) { + Alloc64->LockBeforeFork(Thread); } +} - void UnlockAfterFork(FEXCore::Core::InternalThreadState *Thread, bool Child) { - if (Alloc64) { - Alloc64->UnlockAfterFork(Thread, Child); - } +void UnlockAfterFork(FEXCore::Core::InternalThreadState* Thread, bool Child) { + if (Alloc64) { + Alloc64->UnlockAfterFork(Thread, Child); } } +} // namespace FEXCore::Allocator #endif diff --git a/FEXCore/Source/Utils/Allocator/64BitAllocator.cpp b/FEXCore/Source/Utils/Allocator/64BitAllocator.cpp index 9c2d27133b..9d332ffa28 100644 --- a/FEXCore/Source/Utils/Allocator/64BitAllocator.cpp +++ b/FEXCore/Source/Utils/Allocator/64BitAllocator.cpp @@ -30,166 +30,166 @@ namespace Alloc::OSAllocator { - thread_local FEXCore::Core::InternalThreadState *TLSThread{}; +thread_local FEXCore::Core::InternalThreadState* TLSThread {}; - void RegisterTLSData(FEXCore::Core::InternalThreadState *Thread) { - TLSThread = Thread; +void RegisterTLSData(FEXCore::Core::InternalThreadState* Thread) { + TLSThread = Thread; +} + +void UninstallTLSData(FEXCore::Core::InternalThreadState* Thread) { + TLSThread = nullptr; +} + +class OSAllocator_64Bit final : public Alloc::HostAllocator { +public: + OSAllocator_64Bit(); + virtual ~OSAllocator_64Bit(); + void* AllocateSlab(size_t Size) override { + return nullptr; } + void DeallocateSlab(void* Ptr, size_t Size) override {} + + void* Mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) override; + int Munmap(void* addr, size_t length) override; - void UninstallTLSData(FEXCore::Core::InternalThreadState *Thread) { - TLSThread = nullptr; + void LockBeforeFork(FEXCore::Core::InternalThreadState* Thread) override { + AllocationMutex.lock(); } - class OSAllocator_64Bit final : public Alloc::HostAllocator { - public: - OSAllocator_64Bit(); - virtual ~OSAllocator_64Bit(); - void *AllocateSlab(size_t Size) override { return nullptr; } - void DeallocateSlab(void *Ptr, size_t Size) override {} + void UnlockAfterFork(FEXCore::Core::InternalThreadState* Thread, bool Child) override { + if (Child) { + AllocationMutex.StealAndDropActiveLocks(); + } else { + AllocationMutex.unlock(); + } + } - void *Mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) override; - int Munmap(void *addr, size_t length) override; +private: + // Upper bound is the maximum virtual address space of the host processor + uintptr_t UPPER_BOUND = (1ULL << 57); - void LockBeforeFork(FEXCore::Core::InternalThreadState *Thread) override { - AllocationMutex.lock(); - } + // Lower bound is the starting of the range just past the lower 32bits + constexpr static uintptr_t LOWER_BOUND = 0x1'0000'0000ULL; - void UnlockAfterFork(FEXCore::Core::InternalThreadState *Thread, bool Child) override { - if (Child) { - AllocationMutex.StealAndDropActiveLocks(); - } - else { - AllocationMutex.unlock(); - } - } + uintptr_t UPPER_BOUND_PAGE = UPPER_BOUND / FHU::FEX_PAGE_SIZE; + constexpr static uintptr_t LOWER_BOUND_PAGE = LOWER_BOUND / FHU::FEX_PAGE_SIZE; - private: - // Upper bound is the maximum virtual address space of the host processor - uintptr_t UPPER_BOUND = (1ULL << 57); - - // Lower bound is the starting of the range just past the lower 32bits - constexpr static uintptr_t LOWER_BOUND = 0x1'0000'0000ULL; - - uintptr_t UPPER_BOUND_PAGE = UPPER_BOUND / FHU::FEX_PAGE_SIZE; - constexpr static uintptr_t LOWER_BOUND_PAGE = LOWER_BOUND / FHU::FEX_PAGE_SIZE; - - struct ReservedVMARegion { - uintptr_t Base; - // Could be number of pages if we want to pack this in to 12 bytes - uint64_t RegionSize; - }; - - bool MergeReservedRegionIfPossible(ReservedVMARegion *Region, uintptr_t NextPtr, uint64_t NextSize) { - constexpr uint64_t MaxReservedRegionSize = 64ULL * 1024 * 1024 * 1024; // 64GB - uintptr_t RegionEnd = Region->Base + Region->RegionSize; - uint64_t NewRegionSize = Region->RegionSize + NextSize; - if (RegionEnd == NextPtr && - NewRegionSize <= MaxReservedRegionSize) { - // Append the contiguous region - Region->RegionSize = NewRegionSize; - return true; - } - return false; - } + struct ReservedVMARegion { + uintptr_t Base; + // Could be number of pages if we want to pack this in to 12 bytes + uint64_t RegionSize; + }; - struct LiveVMARegion { - ReservedVMARegion *SlabInfo; - uint64_t FreeSpace{}; - uint64_t NumManagedPages{}; - uint32_t LastPageAllocation{}; - bool HadMunmap{}; - - // Align UsedPages so it pads to the next page. - // Necessary to take advantage of madvise zero page pooling. - using FlexBitElementType = uint64_t; - alignas(4096) FEXCore::FlexBitSet UsedPages; - - // This returns the size of the LiveVMARegion in addition to the flex set that tracks the used data - // The LiveVMARegion lives at the start of the VMA region which means on initialization we need to set that - // tracked ranged as used immediately - static size_t GetSizeWithFlexSet(size_t Size) { - // One element per page - - // 0x10'0000'0000 bytes - // 0x100'0000 Pages - // 1 bit per page for tracking means 0x20'0000 (Pages / 8) bytes of flex space - // Which is 2MB of tracking - uint64_t NumElements = (Size >> FHU::FEX_PAGE_SHIFT) * sizeof(FlexBitElementType); - return sizeof(LiveVMARegion) + FEXCore::FlexBitSet::Size(NumElements); - } + bool MergeReservedRegionIfPossible(ReservedVMARegion* Region, uintptr_t NextPtr, uint64_t NextSize) { + constexpr uint64_t MaxReservedRegionSize = 64ULL * 1024 * 1024 * 1024; // 64GB + uintptr_t RegionEnd = Region->Base + Region->RegionSize; + uint64_t NewRegionSize = Region->RegionSize + NextSize; + if (RegionEnd == NextPtr && NewRegionSize <= MaxReservedRegionSize) { + // Append the contiguous region + Region->RegionSize = NewRegionSize; + return true; + } + return false; + } - static void InitializeVMARegionUsed(LiveVMARegion *Region, size_t AdditionalSize) { - size_t SizeOfLiveRegion = FEXCore::AlignUp(LiveVMARegion::GetSizeWithFlexSet(Region->SlabInfo->RegionSize), FHU::FEX_PAGE_SIZE); - size_t SizePlusManagedData = SizeOfLiveRegion + AdditionalSize; + struct LiveVMARegion { + ReservedVMARegion* SlabInfo; + uint64_t FreeSpace {}; + uint64_t NumManagedPages {}; + uint32_t LastPageAllocation {}; + bool HadMunmap {}; + + // Align UsedPages so it pads to the next page. + // Necessary to take advantage of madvise zero page pooling. + using FlexBitElementType = uint64_t; + alignas(4096) FEXCore::FlexBitSet UsedPages; + + // This returns the size of the LiveVMARegion in addition to the flex set that tracks the used data + // The LiveVMARegion lives at the start of the VMA region which means on initialization we need to set that + // tracked ranged as used immediately + static size_t GetSizeWithFlexSet(size_t Size) { + // One element per page + + // 0x10'0000'0000 bytes + // 0x100'0000 Pages + // 1 bit per page for tracking means 0x20'0000 (Pages / 8) bytes of flex space + // Which is 2MB of tracking + uint64_t NumElements = (Size >> FHU::FEX_PAGE_SHIFT) * sizeof(FlexBitElementType); + return sizeof(LiveVMARegion) + FEXCore::FlexBitSet::Size(NumElements); + } - Region->FreeSpace = Region->SlabInfo->RegionSize - SizePlusManagedData; + static void InitializeVMARegionUsed(LiveVMARegion* Region, size_t AdditionalSize) { + size_t SizeOfLiveRegion = FEXCore::AlignUp(LiveVMARegion::GetSizeWithFlexSet(Region->SlabInfo->RegionSize), FHU::FEX_PAGE_SIZE); + size_t SizePlusManagedData = SizeOfLiveRegion + AdditionalSize; - size_t NumManagedPages = SizePlusManagedData >> FHU::FEX_PAGE_SHIFT; - size_t ManagedSize = NumManagedPages << FHU::FEX_PAGE_SHIFT; + Region->FreeSpace = Region->SlabInfo->RegionSize - SizePlusManagedData; - // Use madvise to set the full tracking region to zero. - // This ensures unused pages are zero, while not having the backing pages consuming memory. - ::madvise(Region->UsedPages.Memory + ManagedSize, (Region->SlabInfo->RegionSize >> FHU::FEX_PAGE_SHIFT) - ManagedSize, MADV_DONTNEED); + size_t NumManagedPages = SizePlusManagedData >> FHU::FEX_PAGE_SHIFT; + size_t ManagedSize = NumManagedPages << FHU::FEX_PAGE_SHIFT; - // Use madvise to claim WILLNEED on the beginning pages for initial state tracking. - // Improves performance of the following MemClear by not doing a page level fault dance for data necessary to track >170TB of used pages. - ::madvise(Region->UsedPages.Memory, ManagedSize, MADV_WILLNEED); + // Use madvise to set the full tracking region to zero. + // This ensures unused pages are zero, while not having the backing pages consuming memory. + ::madvise(Region->UsedPages.Memory + ManagedSize, (Region->SlabInfo->RegionSize >> FHU::FEX_PAGE_SHIFT) - ManagedSize, MADV_DONTNEED); - // Set our reserved pages - Region->UsedPages.MemSet(NumManagedPages); - Region->LastPageAllocation = NumManagedPages; - Region->NumManagedPages = NumManagedPages; - } - }; + // Use madvise to claim WILLNEED on the beginning pages for initial state tracking. + // Improves performance of the following MemClear by not doing a page level fault dance for data necessary to track >170TB of used pages. + ::madvise(Region->UsedPages.Memory, ManagedSize, MADV_WILLNEED); - static_assert(sizeof(LiveVMARegion) == 4096, "Needs to be the size of a page"); + // Set our reserved pages + Region->UsedPages.MemSet(NumManagedPages); + Region->LastPageAllocation = NumManagedPages; + Region->NumManagedPages = NumManagedPages; + } + }; - static_assert(std::is_trivially_copyable::value, "Needs to be trivially copyable"); - static_assert(offsetof(LiveVMARegion, UsedPages) == sizeof(LiveVMARegion), "FlexBitSet needs to be at the end"); + static_assert(sizeof(LiveVMARegion) == 4096, "Needs to be the size of a page"); - using ReservedRegionListType = fex_pmr::list; - using LiveRegionListType = fex_pmr::list; - ReservedRegionListType *ReservedRegions{}; - LiveRegionListType *LiveRegions{}; + static_assert(std::is_trivially_copyable::value, "Needs to be trivially copyable"); + static_assert(offsetof(LiveVMARegion, UsedPages) == sizeof(LiveVMARegion), "FlexBitSet needs to be at the end"); - Alloc::ForwardOnlyIntrusiveArenaAllocator *ObjectAlloc{}; - FEXCore::ForkableUniqueMutex AllocationMutex; - void DetermineVASize(); + using ReservedRegionListType = fex_pmr::list; + using LiveRegionListType = fex_pmr::list; + ReservedRegionListType* ReservedRegions {}; + LiveRegionListType* LiveRegions {}; - LiveVMARegion *MakeRegionActive(ReservedRegionListType::iterator ReservedIterator, uint64_t UsedSize) { - ReservedVMARegion *ReservedRegion = *ReservedIterator; + Alloc::ForwardOnlyIntrusiveArenaAllocator* ObjectAlloc {}; + FEXCore::ForkableUniqueMutex AllocationMutex; + void DetermineVASize(); - ReservedRegions->erase(ReservedIterator); + LiveVMARegion* MakeRegionActive(ReservedRegionListType::iterator ReservedIterator, uint64_t UsedSize) { + ReservedVMARegion* ReservedRegion = *ReservedIterator; - // mprotect the new region we've allocated - size_t SizeOfLiveRegion = FEXCore::AlignUp(LiveVMARegion::GetSizeWithFlexSet(ReservedRegion->RegionSize), FHU::FEX_PAGE_SIZE); - size_t SizePlusManagedData = UsedSize + SizeOfLiveRegion; + ReservedRegions->erase(ReservedIterator); - [[maybe_unused]] auto Res = mprotect(reinterpret_cast(ReservedRegion->Base), SizePlusManagedData, PROT_READ | PROT_WRITE); + // mprotect the new region we've allocated + size_t SizeOfLiveRegion = FEXCore::AlignUp(LiveVMARegion::GetSizeWithFlexSet(ReservedRegion->RegionSize), FHU::FEX_PAGE_SIZE); + size_t SizePlusManagedData = UsedSize + SizeOfLiveRegion; - if (Res == -1) { - LOGMAN_MSG_A_FMT("Couldn't mprotect region: {} '{}' Likely occurs when running out of memory or Maximum VMAs", errno, strerror(errno)); - } + [[maybe_unused]] auto Res = mprotect(reinterpret_cast(ReservedRegion->Base), SizePlusManagedData, PROT_READ | PROT_WRITE); - LiveVMARegion *LiveRange = new (reinterpret_cast(ReservedRegion->Base)) LiveVMARegion(); + if (Res == -1) { + LOGMAN_MSG_A_FMT("Couldn't mprotect region: {} '{}' Likely occurs when running out of memory or Maximum VMAs", errno, strerror(errno)); + } - // Copy over the reserved data - LiveRange->SlabInfo = ReservedRegion; + LiveVMARegion* LiveRange = new (reinterpret_cast(ReservedRegion->Base)) LiveVMARegion(); - // Initialize VMA - LiveVMARegion::InitializeVMARegionUsed(LiveRange, UsedSize); + // Copy over the reserved data + LiveRange->SlabInfo = ReservedRegion; - // Add to our active tracked ranges - auto LiveIter = LiveRegions->emplace_back(LiveRange); - return LiveIter; - } + // Initialize VMA + LiveVMARegion::InitializeVMARegionUsed(LiveRange, UsedSize); - // 32-bit old kernel workarounds - fextl::vector Steal32BitIfOldKernel(); + // Add to our active tracked ranges + auto LiveIter = LiveRegions->emplace_back(LiveRange); + return LiveIter; + } - void AllocateMemoryRegions(fextl::vector const &Ranges); - LiveVMARegion *FindLiveRegionForAddress(uintptr_t Addr, uintptr_t AddrEnd); - }; + // 32-bit old kernel workarounds + fextl::vector Steal32BitIfOldKernel(); + + void AllocateMemoryRegions(const fextl::vector& Ranges); + LiveVMARegion* FindLiveRegionForAddress(uintptr_t Addr, uintptr_t AddrEnd); +}; void OSAllocator_64Bit::DetermineVASize() { size_t Bits = FEXCore::Allocator::DetermineVASize(); @@ -197,23 +197,22 @@ void OSAllocator_64Bit::DetermineVASize() { UPPER_BOUND = Size; - #if _M_X86_64 // Last page cannot be allocated on x86 - UPPER_BOUND -= FHU::FEX_PAGE_SIZE; - #endif +#if _M_X86_64 // Last page cannot be allocated on x86 + UPPER_BOUND -= FHU::FEX_PAGE_SIZE; +#endif UPPER_BOUND_PAGE = UPPER_BOUND / FHU::FEX_PAGE_SIZE; } -OSAllocator_64Bit::LiveVMARegion *OSAllocator_64Bit::FindLiveRegionForAddress(uintptr_t Addr, uintptr_t AddrEnd) { - LiveVMARegion *LiveRegion{}; +OSAllocator_64Bit::LiveVMARegion* OSAllocator_64Bit::FindLiveRegionForAddress(uintptr_t Addr, uintptr_t AddrEnd) { + LiveVMARegion* LiveRegion {}; // Check active slabs to see if we can fit this for (auto it = LiveRegions->begin(); it != LiveRegions->end(); ++it) { uintptr_t RegionBegin = (*it)->SlabInfo->Base; uintptr_t RegionEnd = RegionBegin + (*it)->SlabInfo->RegionSize; - if (Addr >= RegionBegin && - Addr < RegionEnd) { + if (Addr >= RegionBegin && Addr < RegionEnd) { LiveRegion = *it; // Leave our loop break; @@ -226,10 +225,9 @@ OSAllocator_64Bit::LiveVMARegion *OSAllocator_64Bit::FindLiveRegionForAddress(ui // Didn't have a slab that fit this range // Check our reserved regions to see if we have one that fits for (auto it = ReservedRegions->begin(); it != ReservedRegions->end(); ++it) { - ReservedVMARegion *ReservedRegion = *it; + ReservedVMARegion* ReservedRegion = *it; uintptr_t RegionEnd = ReservedRegion->Base + ReservedRegion->RegionSize; - if (Addr >= ReservedRegion->Base && - AddrEnd < RegionEnd) { + if (Addr >= ReservedRegion->Base && AddrEnd < RegionEnd) { // Found one, let's make it active LiveRegion = MakeRegionActive(it, 0); break; @@ -240,9 +238,8 @@ OSAllocator_64Bit::LiveVMARegion *OSAllocator_64Bit::FindLiveRegionForAddress(ui return LiveRegion; } -void *OSAllocator_64Bit::Mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) { - if (addr != 0 && - addr < reinterpret_cast(LOWER_BOUND)) { +void* OSAllocator_64Bit::Mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) { + if (addr != 0 && addr < reinterpret_cast(LOWER_BOUND)) { // If we are asked to allocate something outside of the 64-bit space // Then we need to just hand this to the OS return ::mmap(addr, length, prot, flags, fd, offset); @@ -255,8 +252,7 @@ void *OSAllocator_64Bit::Mmap(void *addr, size_t length, int prot, int flags, in } // If FD is provided then offset must also be page aligned - if (fd != -1 && - offset & ~FHU::FEX_PAGE_MASK) { + if (fd != -1 && offset & ~FHU::FEX_PAGE_MASK) { return reinterpret_cast(-EINVAL); } @@ -274,24 +270,22 @@ void *OSAllocator_64Bit::Mmap(void *addr, size_t length, int prot, int flags, in // This needs a mutex to be thread safe auto lk = FEXCore::GuardSignalDeferringSectionWithFallback(AllocationMutex, TLSThread); - uint64_t AllocatedOffset{}; - LiveVMARegion *LiveRegion{}; + uint64_t AllocatedOffset {}; + LiveVMARegion* LiveRegion {}; if (Fixed || Addr != 0) { LiveRegion = FindLiveRegionForAddress(Addr, AddrEnd); } - again: +again: - auto CheckIfRangeFits = [&AllocatedOffset](LiveVMARegion *Region, uint64_t length, int prot, int flags, int fd, off_t offset, uint64_t StartingPosition = 0) -> std::pair { - uint64_t AllocatedPage{~0ULL}; + auto CheckIfRangeFits = [&AllocatedOffset](LiveVMARegion* Region, uint64_t length, int prot, int flags, int fd, off_t offset, + uint64_t StartingPosition = 0) -> std::pair { + uint64_t AllocatedPage {~0ULL}; uint64_t NumberOfPages = length >> FHU::FEX_PAGE_SHIFT; if (Region->FreeSpace >= length) { - uint64_t LastAllocation = - StartingPosition ? - (StartingPosition - Region->SlabInfo->Base) >> FHU::FEX_PAGE_SHIFT - : Region->LastPageAllocation; + uint64_t LastAllocation = StartingPosition ? (StartingPosition - Region->SlabInfo->Base) >> FHU::FEX_PAGE_SHIFT : Region->LastPageAllocation; size_t RegionNumberOfPages = Region->SlabInfo->RegionSize >> FHU::FEX_PAGE_SHIFT; @@ -320,11 +314,7 @@ void *OSAllocator_64Bit::Mmap(void *addr, size_t length, int prot, int flags, in AllocatedOffset = Region->SlabInfo->Base + AllocatedPage * FHU::FEX_PAGE_SIZE; // We need to setup protections for this - void *MMapResult = ::mmap(reinterpret_cast(AllocatedOffset), - length, - prot, - (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED, - fd, offset); + void* MMapResult = ::mmap(reinterpret_cast(AllocatedOffset), length, prot, (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED, fd, offset); if (MMapResult == MAP_FAILED) { return std::make_pair(Region, reinterpret_cast(-errno)); @@ -345,19 +335,13 @@ void *OSAllocator_64Bit::Mmap(void *addr, size_t length, int prot, int flags, in if (Fits.first && Fits.second == reinterpret_cast(Addr)) { // We fit correctly AllocatedOffset = Addr; - } - else { + } else { // Intersected with something that already existed return reinterpret_cast(-EEXIST); } - } - else { + } else { // We need to mmap the file to this location - void *MMapResult = ::mmap(reinterpret_cast(Addr), - length, - prot, - (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED, - fd, offset); + void* MMapResult = ::mmap(reinterpret_cast(Addr), length, prot, (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED, fd, offset); if (MMapResult == MAP_FAILED) { return reinterpret_cast(-errno); @@ -367,8 +351,7 @@ void *OSAllocator_64Bit::Mmap(void *addr, size_t length, int prot, int flags, in } // Fall through to live region tracking } - } - else { + } else { // Check our active slabs to see if we can fit the allocation // Slightly different than fixed since it doesn't need exact placement if (LiveRegion && Addr != 0) { @@ -378,8 +361,7 @@ void *OSAllocator_64Bit::Mmap(void *addr, size_t length, int prot, int flags, in if (Fits.first && Fits.second == reinterpret_cast(Addr)) { // We fit correctly AllocatedOffset = Addr; - } - else { + } else { // Couldn't fit // We can continue past this point still LiveRegion = nullptr; @@ -438,7 +420,7 @@ void *OSAllocator_64Bit::Mmap(void *addr, size_t length, int prot, int flags, in return reinterpret_cast(AllocatedOffset); } -int OSAllocator_64Bit::Munmap(void *addr, size_t length) { +int OSAllocator_64Bit::Munmap(void* addr, size_t length) { if (addr < reinterpret_cast(LOWER_BOUND)) { // If we are asked to allocate something outside of the 64-bit space // Then we need to just hand this to the OS @@ -471,11 +453,10 @@ int OSAllocator_64Bit::Munmap(void *addr, size_t length) { uintptr_t RegionBegin = (*it)->SlabInfo->Base; uintptr_t RegionEnd = RegionBegin + (*it)->SlabInfo->RegionSize; - if (RegionBegin <= PtrBegin && - RegionEnd > PtrEnd) { + if (RegionBegin <= PtrBegin && RegionEnd > PtrEnd) { // Live region fully encompasses slab range - uint64_t FreedPages{}; + uint64_t FreedPages {}; uint32_t SlabPageBegin = (PtrBegin - RegionBegin) >> FHU::FEX_PAGE_SHIFT; uint64_t PagesToFree = length >> FHU::FEX_PAGE_SHIFT; @@ -483,8 +464,7 @@ int OSAllocator_64Bit::Munmap(void *addr, size_t length) { FreedPages += (*it)->UsedPages.TestAndClear(SlabPageBegin + i) ? 1 : 0; } - if (FreedPages != 0) - { + if (FreedPages != 0) { // If we were contiuous freeing then make sure to give back the physical address space // If the region was locked then madvise won't remove the physical backing // This woul be a bug in the frontend application @@ -512,16 +492,16 @@ int OSAllocator_64Bit::Munmap(void *addr, size_t length) { fextl::vector OSAllocator_64Bit::Steal32BitIfOldKernel() { // First calculate kernel version - struct utsname buf{}; + struct utsname buf {}; if (uname(&buf) == -1) { return {}; } - int32_t Major{}; - int32_t Minor{}; - int32_t Patch{}; - char Tmp{}; - fextl::istringstream ss{buf.release}; + int32_t Major {}; + int32_t Minor {}; + int32_t Patch {}; + char Tmp {}; + fextl::istringstream ss {buf.release}; ss >> Major; ss.read(&Tmp, 1); ss >> Minor; @@ -541,8 +521,8 @@ fextl::vector OSAllocator_64Bit::Steal32BitIfO return FEXCore::Allocator::StealMemoryRegion(LOWER_BOUND_32, UPPER_BOUND_32); } -void OSAllocator_64Bit::AllocateMemoryRegions(fextl::vector const &Ranges) { - for (auto [Ptr, AllocationSize]: Ranges) { +void OSAllocator_64Bit::AllocateMemoryRegions(const fextl::vector& Ranges) { + for (auto [Ptr, AllocationSize] : Ranges) { if (!ObjectAlloc) { auto MaxSize = std::min(size_t(64) * 1024 * 1024, AllocationSize); @@ -564,7 +544,7 @@ void OSAllocator_64Bit::AllocateMemoryRegions(fextl::vectornew_construct(); + ReservedVMARegion* Region = ObjectAlloc->new_construct(); Region->Base = reinterpret_cast(Ptr); Region->RegionSize = AllocationSize; ReservedRegions->emplace_back(Region); @@ -602,4 +582,4 @@ OSAllocator_64Bit::~OSAllocator_64Bit() { fextl::unique_ptr Create64BitAllocator() { return fextl::make_unique(); } -} +} // namespace Alloc::OSAllocator diff --git a/FEXCore/Source/Utils/AllocatorOverride.cpp b/FEXCore/Source/Utils/AllocatorOverride.cpp index 410af55429..63ed66eed3 100644 --- a/FEXCore/Source/Utils/AllocatorOverride.cpp +++ b/FEXCore/Source/Utils/AllocatorOverride.cpp @@ -11,147 +11,147 @@ #include extern "C" { - // The majority of FEX internal code should avoid using the glibc allocator. To ensure glibc allocations don't accidentally slip - // in, FEX overrides these glibc functions with faulting variants. - // - // A notable exception is thunks, which should still use glibc allocations and avoid using `fextl::` namespace. - // - // Other minor exceptions throughout FEX use the `YesIKnowImNotSupposedToUseTheGlibcAllocator` helper to temporarily disable faulting. +// The majority of FEX internal code should avoid using the glibc allocator. To ensure glibc allocations don't accidentally slip +// in, FEX overrides these glibc functions with faulting variants. +// +// A notable exception is thunks, which should still use glibc allocations and avoid using `fextl::` namespace. +// +// Other minor exceptions throughout FEX use the `YesIKnowImNotSupposedToUseTheGlibcAllocator` helper to temporarily disable faulting. #define GLIBC_ALIAS_FUNCTION(func) __attribute__((alias(#func), visibility("default"))) - extern void *__libc_calloc(size_t, size_t); - void *calloc(size_t, size_t) GLIBC_ALIAS_FUNCTION(fault_calloc); +extern void* __libc_calloc(size_t, size_t); +void* calloc(size_t, size_t) GLIBC_ALIAS_FUNCTION(fault_calloc); - extern void __libc_free(void*); - void free(void*) GLIBC_ALIAS_FUNCTION(fault_free); +extern void __libc_free(void*); +void free(void*) GLIBC_ALIAS_FUNCTION(fault_free); - extern void *__libc_malloc(size_t); - void *malloc(size_t) GLIBC_ALIAS_FUNCTION(fault_malloc); +extern void* __libc_malloc(size_t); +void* malloc(size_t) GLIBC_ALIAS_FUNCTION(fault_malloc); - extern void *__libc_memalign(size_t, size_t); - void *memalign(size_t, size_t) GLIBC_ALIAS_FUNCTION(fault_memalign); +extern void* __libc_memalign(size_t, size_t); +void* memalign(size_t, size_t) GLIBC_ALIAS_FUNCTION(fault_memalign); - extern void *__libc_realloc(void*, size_t); - void *realloc(void*, size_t) GLIBC_ALIAS_FUNCTION(fault_realloc); +extern void* __libc_realloc(void*, size_t); +void* realloc(void*, size_t) GLIBC_ALIAS_FUNCTION(fault_realloc); - extern void *__libc_valloc(size_t); - void *valloc(size_t) GLIBC_ALIAS_FUNCTION(fault_valloc); +extern void* __libc_valloc(size_t); +void* valloc(size_t) GLIBC_ALIAS_FUNCTION(fault_valloc); - extern int __posix_memalign(void **, size_t, size_t); - int posix_memalign(void **, size_t, size_t) GLIBC_ALIAS_FUNCTION(fault_posix_memalign); +extern int __posix_memalign(void**, size_t, size_t); +int posix_memalign(void**, size_t, size_t) GLIBC_ALIAS_FUNCTION(fault_posix_memalign); - extern size_t __malloc_usable_size(void*); - size_t malloc_usable_size(void*) GLIBC_ALIAS_FUNCTION(fault_malloc_usable_size); +extern size_t __malloc_usable_size(void*); +size_t malloc_usable_size(void*) GLIBC_ALIAS_FUNCTION(fault_malloc_usable_size); - // Reuse __libc_memalign - void *aligned_alloc(size_t, size_t) GLIBC_ALIAS_FUNCTION(fault_aligned_alloc); +// Reuse __libc_memalign +void* aligned_alloc(size_t, size_t) GLIBC_ALIAS_FUNCTION(fault_aligned_alloc); } namespace FEXCore::Allocator { - // Enable or disable allocation faulting globally. - static bool GlobalEvaluate{}; - - // Enable or disable allocation faulting per-thread. - static thread_local uint64_t SkipEvalForThread{}; - - // Internal memory allocation hooks to allow non-faulting allocations through. - auto calloc_ptr = __libc_calloc; - auto free_ptr = __libc_free; - auto malloc_ptr = __libc_malloc; - auto memalign_ptr = __libc_memalign; - auto realloc_ptr = __libc_realloc; - auto valloc_ptr = __libc_valloc; - auto posix_memalign_ptr = ::posix_memalign; - auto malloc_usable_size_ptr = ::malloc_usable_size; - auto aligned_alloc_ptr = __libc_memalign; - - // Constructor for per-thread allocation faulting check. - YesIKnowImNotSupposedToUseTheGlibcAllocator::YesIKnowImNotSupposedToUseTheGlibcAllocator() { - ++SkipEvalForThread; - } +// Enable or disable allocation faulting globally. +static bool GlobalEvaluate {}; + +// Enable or disable allocation faulting per-thread. +static thread_local uint64_t SkipEvalForThread {}; + +// Internal memory allocation hooks to allow non-faulting allocations through. +auto calloc_ptr = __libc_calloc; +auto free_ptr = __libc_free; +auto malloc_ptr = __libc_malloc; +auto memalign_ptr = __libc_memalign; +auto realloc_ptr = __libc_realloc; +auto valloc_ptr = __libc_valloc; +auto posix_memalign_ptr = ::posix_memalign; +auto malloc_usable_size_ptr = ::malloc_usable_size; +auto aligned_alloc_ptr = __libc_memalign; + +// Constructor for per-thread allocation faulting check. +YesIKnowImNotSupposedToUseTheGlibcAllocator::YesIKnowImNotSupposedToUseTheGlibcAllocator() { + ++SkipEvalForThread; +} - // Destructor for per-thread allocation faulting check. - YesIKnowImNotSupposedToUseTheGlibcAllocator::~YesIKnowImNotSupposedToUseTheGlibcAllocator() { - --SkipEvalForThread; - } +// Destructor for per-thread allocation faulting check. +YesIKnowImNotSupposedToUseTheGlibcAllocator::~YesIKnowImNotSupposedToUseTheGlibcAllocator() { + --SkipEvalForThread; +} - // Hard disabling of per-thread allocation fault checking. - // No coming back from this, used on thread destruction. - FEX_DEFAULT_VISIBILITY void YesIKnowImNotSupposedToUseTheGlibcAllocator::HardDisable() { - // Just set it to half of its maximum value so it never wraps back around. - SkipEvalForThread = std::numeric_limits::max() / 2; - } +// Hard disabling of per-thread allocation fault checking. +// No coming back from this, used on thread destruction. +FEX_DEFAULT_VISIBILITY void YesIKnowImNotSupposedToUseTheGlibcAllocator::HardDisable() { + // Just set it to half of its maximum value so it never wraps back around. + SkipEvalForThread = std::numeric_limits::max() / 2; +} - // Enable global fault checking. - void SetupFaultEvaluate() { - GlobalEvaluate = true; - } +// Enable global fault checking. +void SetupFaultEvaluate() { + GlobalEvaluate = true; +} - // Disable global fault checking. - void ClearFaultEvaluate() { - GlobalEvaluate = false; +// Disable global fault checking. +void ClearFaultEvaluate() { + GlobalEvaluate = false; +} + +// Evaluate if a glibc hooked allocation should fault. +void EvaluateReturnAddress(void* Return) { + if (!GlobalEvaluate) { + // Fault evaluation disabled globally. + return; } - // Evaluate if a glibc hooked allocation should fault. - void EvaluateReturnAddress(void* Return) { - if (!GlobalEvaluate) { - // Fault evaluation disabled globally. - return; - } - - if (SkipEvalForThread) { - // Fault evaluation currently disabled for this thread. - return; - } - - // We don't know where we are when allocating. Make sure to be safe and generate the string on the stack. - // Print an error message to let a developer know that an allocation faulted. - char Tmp[512]; - auto Res = fmt::format_to_n(Tmp, 512, "Allocation from 0x{:x}\n", reinterpret_cast(Return)); - Tmp[Res.size] = 0; - write(STDERR_FILENO, Tmp, Res.size); - - // Trap the execution to stop FEX in its tracks. - FEX_TRAP_EXECUTION; + if (SkipEvalForThread) { + // Fault evaluation currently disabled for this thread. + return; } + + // We don't know where we are when allocating. Make sure to be safe and generate the string on the stack. + // Print an error message to let a developer know that an allocation faulted. + char Tmp[512]; + auto Res = fmt::format_to_n(Tmp, 512, "Allocation from 0x{:x}\n", reinterpret_cast(Return)); + Tmp[Res.size] = 0; + write(STDERR_FILENO, Tmp, Res.size); + + // Trap the execution to stop FEX in its tracks. + FEX_TRAP_EXECUTION; } +} // namespace FEXCore::Allocator extern "C" { - // These are the glibc allocator override symbols. - // These will override the glibc allocators and then check if the allocation should fault. - void *fault_calloc(size_t n, size_t size) { - FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr (__builtin_return_address (0))); - return FEXCore::Allocator::calloc_ptr(n, size); - } - void fault_free(void* ptr) { - FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr (__builtin_return_address (0))); - FEXCore::Allocator::free_ptr(ptr); - } - void *fault_malloc(size_t size) { - FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr (__builtin_return_address (0))); - return FEXCore::Allocator::malloc_ptr(size); - } - void *fault_memalign(size_t align, size_t s) { - FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr (__builtin_return_address (0))); - return FEXCore::Allocator::memalign_ptr(align, s); - } - void *fault_realloc(void* ptr, size_t size) { - FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr (__builtin_return_address (0))); - return FEXCore::Allocator::realloc_ptr(ptr, size); - } - void *fault_valloc(size_t size) { - FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr (__builtin_return_address (0))); - return FEXCore::Allocator::valloc_ptr(size); - } - int fault_posix_memalign(void ** r, size_t a, size_t s) { - FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr (__builtin_return_address (0))); - return FEXCore::Allocator::posix_memalign_ptr(r, a, s); - } - size_t fault_malloc_usable_size(void *ptr) { - FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr (__builtin_return_address (0))); - return FEXCore::Allocator::malloc_usable_size_ptr(ptr); - } - void *fault_aligned_alloc(size_t a, size_t s) { - FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr (__builtin_return_address (0))); - return FEXCore::Allocator::aligned_alloc_ptr(a, s); - } +// These are the glibc allocator override symbols. +// These will override the glibc allocators and then check if the allocation should fault. +void* fault_calloc(size_t n, size_t size) { + FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0))); + return FEXCore::Allocator::calloc_ptr(n, size); +} +void fault_free(void* ptr) { + FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0))); + FEXCore::Allocator::free_ptr(ptr); +} +void* fault_malloc(size_t size) { + FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0))); + return FEXCore::Allocator::malloc_ptr(size); +} +void* fault_memalign(size_t align, size_t s) { + FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0))); + return FEXCore::Allocator::memalign_ptr(align, s); +} +void* fault_realloc(void* ptr, size_t size) { + FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0))); + return FEXCore::Allocator::realloc_ptr(ptr, size); +} +void* fault_valloc(size_t size) { + FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0))); + return FEXCore::Allocator::valloc_ptr(size); +} +int fault_posix_memalign(void** r, size_t a, size_t s) { + FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0))); + return FEXCore::Allocator::posix_memalign_ptr(r, a, s); +} +size_t fault_malloc_usable_size(void* ptr) { + FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0))); + return FEXCore::Allocator::malloc_usable_size_ptr(ptr); +} +void* fault_aligned_alloc(size_t a, size_t s) { + FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0))); + return FEXCore::Allocator::aligned_alloc_ptr(a, s); +} } diff --git a/FEXCore/Source/Utils/ArchHelpers/Arm64.cpp b/FEXCore/Source/Utils/ArchHelpers/Arm64.cpp index 4e08e36470..a07eaa0ca3 100644 --- a/FEXCore/Source/Utils/ArchHelpers/Arm64.cpp +++ b/FEXCore/Source/Utils/ArchHelpers/Arm64.cpp @@ -15,9 +15,9 @@ namespace FEXCore::ArchHelpers::Arm64 { FEXCORE_TELEMETRY_STATIC_INIT(SplitLock, TYPE_HAS_SPLIT_LOCKS); FEXCORE_TELEMETRY_STATIC_INIT(SplitLock16B, TYPE_16BYTE_SPLIT); -FEXCORE_TELEMETRY_STATIC_INIT(Cas16Tear, TYPE_CAS_16BIT_TEAR); -FEXCORE_TELEMETRY_STATIC_INIT(Cas32Tear, TYPE_CAS_32BIT_TEAR); -FEXCORE_TELEMETRY_STATIC_INIT(Cas64Tear, TYPE_CAS_64BIT_TEAR); +FEXCORE_TELEMETRY_STATIC_INIT(Cas16Tear, TYPE_CAS_16BIT_TEAR); +FEXCORE_TELEMETRY_STATIC_INIT(Cas32Tear, TYPE_CAS_32BIT_TEAR); +FEXCORE_TELEMETRY_STATIC_INIT(Cas64Tear, TYPE_CAS_64BIT_TEAR); FEXCORE_TELEMETRY_STATIC_INIT(Cas128Tear, TYPE_CAS_128BIT_TEAR); static void ClearICache(void* Begin, std::size_t Length) { @@ -25,20 +25,19 @@ static void ClearICache(void* Begin, std::size_t Length) { } static __uint128_t LoadAcquire128(uint64_t Addr) { - __uint128_t Result{}; + __uint128_t Result {}; uint64_t Lower; uint64_t Upper; // This specifically avoids using std::atomic<__uint128_t> // std::atomic helper does a ldaxp + stxp pair that crashes when the page is only mapped readable __asm volatile( -R"( + R"( ldaxp %[ResultLower], %[ResultUpper], [%[Addr]]; clrex; )" - : [ResultLower] "=r" (Lower) - , [ResultUpper] "=r" (Upper) - : [Addr] "r" (Addr) - : "memory"); + : [ResultLower] "=r"(Lower), [ResultUpper] "=r"(Upper) + : [Addr] "r"(Addr) + : "memory"); Result = Upper; Result <<= 64; Result |= Lower; @@ -46,32 +45,32 @@ R"( } static uint64_t LoadAcquire64(uint64_t Addr) { - std::atomic *Atom = reinterpret_cast*>(Addr); + std::atomic* Atom = reinterpret_cast*>(Addr); return Atom->load(std::memory_order_acquire); } -static bool StoreCAS64(uint64_t &Expected, uint64_t Val, uint64_t Addr) { - std::atomic *Atom = reinterpret_cast*>(Addr); +static bool StoreCAS64(uint64_t& Expected, uint64_t Val, uint64_t Addr) { + std::atomic* Atom = reinterpret_cast*>(Addr); return Atom->compare_exchange_strong(Expected, Val); } static uint32_t LoadAcquire32(uint64_t Addr) { - std::atomic *Atom = reinterpret_cast*>(Addr); + std::atomic* Atom = reinterpret_cast*>(Addr); return Atom->load(std::memory_order_acquire); } -static bool StoreCAS32(uint32_t &Expected, uint32_t Val, uint64_t Addr) { - std::atomic *Atom = reinterpret_cast*>(Addr); +static bool StoreCAS32(uint32_t& Expected, uint32_t Val, uint64_t Addr) { + std::atomic* Atom = reinterpret_cast*>(Addr); return Atom->compare_exchange_strong(Expected, Val); } static uint8_t LoadAcquire8(uint64_t Addr) { - std::atomic *Atom = reinterpret_cast*>(Addr); + std::atomic* Atom = reinterpret_cast*>(Addr); return Atom->load(std::memory_order_acquire); } -static bool StoreCAS8(uint8_t &Expected, uint8_t Val, uint64_t Addr) { - std::atomic *Atom = reinterpret_cast*>(Addr); +static bool StoreCAS8(uint8_t& Expected, uint8_t Val, uint64_t Addr) { + std::atomic* Atom = reinterpret_cast*>(Addr); return Atom->compare_exchange_strong(Expected, Val); } @@ -81,8 +80,8 @@ uint16_t DoLoad16(uint64_t Addr) { // Address crosses over 16byte or 64byte threshold // Needs two loads uint64_t AddrUpper = Addr + 1; - uint8_t ActualUpper{}; - uint8_t ActualLower{}; + uint8_t ActualUpper {}; + uint8_t ActualLower {}; // Careful ordering here ActualUpper = LoadAcquire8(AddrUpper); ActualLower = LoadAcquire8(Addr); @@ -91,8 +90,7 @@ uint16_t DoLoad16(uint64_t Addr) { Result <<= 8; Result |= ActualLower; return Result; - } - else { + } else { AlignmentMask = 0b111; if ((Addr & AlignmentMask) == 7) { // Crosses 8byte boundary @@ -106,8 +104,7 @@ uint16_t DoLoad16(uint64_t Addr) { // Zexts the result uint16_t Result = TmpResult >> (Alignment * 8); return Result; - } - else { + } else { AlignmentMask = 0b11; if ((Addr & AlignmentMask) == 3) { // Crosses 4byte boundary @@ -115,21 +112,20 @@ uint16_t DoLoad16(uint64_t Addr) { uint64_t Alignment = Addr & AlignmentMask; Addr &= ~AlignmentMask; - std::atomic *Atomic = reinterpret_cast*>(Addr); + std::atomic* Atomic = reinterpret_cast*>(Addr); uint64_t TmpResult = Atomic->load(); // Zexts the result uint16_t Result = TmpResult >> (Alignment * 8); return Result; - } - else { + } else { // Fits within 4byte boundary // Only needs 32bit Load // Only alignment offset will be 1 here uint64_t Alignment = Addr & AlignmentMask; Addr &= ~AlignmentMask; - std::atomic *Atomic = reinterpret_cast*>(Addr); + std::atomic* Atomic = reinterpret_cast*>(Addr); uint32_t TmpResult = Atomic->load(); // Zexts the result @@ -158,8 +154,7 @@ uint32_t DoLoad32(uint64_t Addr) { Result <<= 32; Result |= ActualLower; return Result >> (Alignment * 8); - } - else { + } else { AlignmentMask = 0b111; if ((Addr & AlignmentMask) >= 5) { // Crosses 8byte boundary @@ -171,15 +166,14 @@ uint32_t DoLoad32(uint64_t Addr) { __uint128_t TmpResult = LoadAcquire128(Addr); return TmpResult >> (Alignment * 8); - } - else { + } else { // Fits within 8byte boundary // Only needs 64bit CAS // Alignments can be [1,5) uint64_t Alignment = Addr & AlignmentMask; Addr &= ~AlignmentMask; - std::atomic *Atomic = reinterpret_cast*>(Addr); + std::atomic* Atomic = reinterpret_cast*>(Addr); uint64_t TmpResult = Atomic->load(); return TmpResult >> (Alignment * 8); @@ -196,8 +190,8 @@ uint64_t DoLoad64(uint64_t Addr) { // Crosses a 16byte boundary // Needs two 8 byte loads - uint64_t ActualUpper{}; - uint64_t ActualLower{}; + uint64_t ActualUpper {}; + uint64_t ActualLower {}; // Careful ordering here ActualUpper = LoadAcquire64(AddrUpper); ActualLower = LoadAcquire64(Addr); @@ -206,8 +200,7 @@ uint64_t DoLoad64(uint64_t Addr) { Result <<= 64; Result |= ActualLower; return Result >> (Alignment * 8); - } - else { + } else { // Fits within a 16byte region uint64_t Alignment = Addr & AlignmentMask; Addr &= ~AlignmentMask; @@ -234,17 +227,18 @@ std::pair DoLoad128(uint64_t Addr) { } Bytes; }; - AlignedData *Data = reinterpret_cast(alloca(sizeof(AlignedData))); + AlignedData* Data = reinterpret_cast(alloca(sizeof(AlignedData))); Data->Large.Upper = LoadAcquire128(AddrUpper); Data->Large.Lower = LoadAcquire128(Addr); - uint64_t ResultLower{}, ResultUpper{}; + uint64_t ResultLower {}, ResultUpper {}; memcpy(&ResultLower, &Data->Bytes.Data[Alignment], sizeof(uint64_t)); memcpy(&ResultUpper, &Data->Bytes.Data[Alignment + sizeof(uint64_t)], sizeof(uint64_t)); return {ResultLower, ResultUpper}; } -static bool RunCASPAL(uint64_t *GPRs, uint32_t Size, uint32_t DesiredReg1, uint32_t DesiredReg2, uint32_t ExpectedReg1, uint32_t ExpectedReg2, uint32_t AddressReg) { +static bool RunCASPAL(uint64_t* GPRs, uint32_t Size, uint32_t DesiredReg1, uint32_t DesiredReg2, uint32_t ExpectedReg1, + uint32_t ExpectedReg2, uint32_t AddressReg) { if (Size == 0) { // 32bit uint64_t Addr = GPRs[AddressReg]; @@ -280,8 +274,8 @@ static bool RunCASPAL(uint64_t *GPRs, uint32_t Size, uint32_t DesiredReg1, uint3 __uint128_t Mask = ~0ULL; Mask <<= Alignment * 8; __uint128_t NegMask = ~Mask; - __uint128_t TmpExpected{}; - __uint128_t TmpDesired{}; + __uint128_t TmpExpected {}; + __uint128_t TmpDesired {}; __uint128_t Desired = DesiredUpper; Desired <<= 32; @@ -319,8 +313,7 @@ static bool RunCASPAL(uint64_t *GPRs, uint32_t Size, uint32_t DesiredReg1, uint3 if (StoreCAS64(TmpExpectedLower, TmpDesiredLower, Addr)) { // Stored successfully return true; - } - else { + } else { // CAS managed to tear, we can't really solve this // Continue down the path to let the guest know values weren't expected FEXCORE_TELEMETRY_SET(Cas128Tear, 1); @@ -330,8 +323,7 @@ static bool RunCASPAL(uint64_t *GPRs, uint32_t Size, uint32_t DesiredReg1, uint3 TmpExpected = TmpExpectedUpper; TmpExpected <<= 64; TmpExpected |= TmpExpectedLower; - } - else { + } else { // Mismatch up front TmpExpected = TmpActual; } @@ -363,18 +355,17 @@ static bool RunCASPAL(uint64_t *GPRs, uint32_t Size, uint32_t DesiredReg1, uint3 GPRs[ExpectedReg2] = FailedResult >> 32; return true; } - } - else { + } else { // Fits within a 16byte region uint64_t Alignment = Addr & 0b1111; Addr &= ~0b1111ULL; - std::atomic<__uint128_t> *Atomic128 = reinterpret_cast*>(Addr); + std::atomic<__uint128_t>* Atomic128 = reinterpret_cast*>(Addr); __uint128_t Mask = ~0ULL; Mask <<= Alignment * 8; __uint128_t NegMask = ~Mask; - __uint128_t TmpExpected{}; - __uint128_t TmpDesired{}; + __uint128_t TmpExpected {}; + __uint128_t TmpDesired {}; __uint128_t Desired = (uint64_t)DesiredUpper << 32 | DesiredLower; Desired <<= Alignment * 8; @@ -398,8 +389,7 @@ static bool RunCASPAL(uint64_t *GPRs, uint32_t Size, uint32_t DesiredReg1, uint3 if (CASResult) { // Successful, so we are done return true; - } - else { + } else { // Not successful // Now we need to check the results to see if we need to try again __uint128_t FailedResultOurBits = TmpExpected & Mask; @@ -424,7 +414,7 @@ static bool RunCASPAL(uint64_t *GPRs, uint32_t Size, uint32_t DesiredReg1, uint3 return false; } -bool HandleCASPAL(uint32_t Instr, uint64_t *GPRs) { +bool HandleCASPAL(uint32_t Instr, uint64_t* GPRs) { uint32_t Size = (Instr >> 30) & 1; uint32_t DesiredReg1 = Instr & 0b11111; @@ -436,7 +426,7 @@ bool HandleCASPAL(uint32_t Instr, uint64_t *GPRs) { return RunCASPAL(GPRs, Size, DesiredReg1, DesiredReg2, ExpectedReg1, ExpectedReg2, AddressReg); } -uint64_t HandleCASPAL_ARMv8(uint32_t Instr, uintptr_t ProgramCounter, uint64_t *GPRs) { +uint64_t HandleCASPAL_ARMv8(uint32_t Instr, uintptr_t ProgramCounter, uint64_t* GPRs) { // caspair // [1] ldaxp(TMP2.W(), TMP3.W(), MemOperand(MemSrc)); <-- DataReg & AddrReg // [2] cmp(TMP2.W(), Expected.first.W()); <-- ExpectedReg1 @@ -451,20 +441,20 @@ uint64_t HandleCASPAL_ARMv8(uint32_t Instr, uintptr_t ProgramCounter, uint64_t * // [11] mov(Dst.second.W(), TMP3.W()); // [12] clrex(); - uint32_t *PC = (uint32_t*)ProgramCounter; + uint32_t* PC = (uint32_t*)ProgramCounter; uint32_t Size = (Instr >> 30) & 1; uint32_t AddrReg = (Instr >> 5) & 0x1F; uint32_t DataReg = Instr & 0x1F; uint32_t DataReg2 = (Instr >> 10) & 0x1F; - uint32_t ExpectedReg1{}; - uint32_t ExpectedReg2{}; + uint32_t ExpectedReg1 {}; + uint32_t ExpectedReg2 {}; - uint32_t DesiredReg1{}; - uint32_t DesiredReg2{}; + uint32_t DesiredReg1 {}; + uint32_t DesiredReg2 {}; - if(Size == 1) { + if (Size == 1) { // 64-bit pair happens on paranoid vector loads // [1] ldaxp(TMP1, TMP2, MemSrc); // [2] clrex(); @@ -475,8 +465,7 @@ uint64_t HandleCASPAL_ARMv8(uint32_t Instr, uintptr_t ProgramCounter, uint64_t * // [3] cbnz(TMP3, &B); // < Overwritten with DMB if (DataReg == 31) { - } - else { + } else { uint32_t NextInstr = PC[1]; if ((NextInstr & ArchHelpers::Arm64::CLREX_MASK) == ArchHelpers::Arm64::CLREX_INST) { uint64_t Addr = GPRs[AddrReg]; @@ -497,25 +486,25 @@ uint64_t HandleCASPAL_ARMv8(uint32_t Instr, uintptr_t ProgramCounter, uint64_t * return 0; } - //Only 32-bit pairs - for(int i = 1; i < 10; i++) { + // Only 32-bit pairs + for (int i = 1; i < 10; i++) { uint32_t NextInstr = PC[i]; if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::CMP_INST || (NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::CMP_SHIFT_INST) { - ExpectedReg1 = GetRmReg(NextInstr); + ExpectedReg1 = GetRmReg(NextInstr); } else if ((NextInstr & ArchHelpers::Arm64::CCMP_MASK) == ArchHelpers::Arm64::CCMP_INST) { - ExpectedReg2 = GetRmReg(NextInstr); + ExpectedReg2 = GetRmReg(NextInstr); } else if ((NextInstr & ArchHelpers::Arm64::STLXP_MASK) == ArchHelpers::Arm64::STLXP_INST) { - DesiredReg1 = (NextInstr & 0x1F); - DesiredReg2 = (NextInstr >> 10) & 0x1F; + DesiredReg1 = (NextInstr & 0x1F); + DesiredReg2 = (NextInstr >> 10) & 0x1F; } } - //mov expected into the temp registers used by JIT + // mov expected into the temp registers used by JIT GPRs[DataReg] = GPRs[ExpectedReg1]; GPRs[DataReg2] = GPRs[ExpectedReg2]; - if(RunCASPAL(GPRs, Size, DesiredReg1, DesiredReg2, DataReg, DataReg2, AddrReg)) { + if (RunCASPAL(GPRs, Size, DesiredReg1, DesiredReg2, DataReg, DataReg2, AddrReg)) { return 9 * sizeof(uint32_t); // skip to mov + clrex } else { return 0; @@ -523,12 +512,12 @@ uint64_t HandleCASPAL_ARMv8(uint32_t Instr, uintptr_t ProgramCounter, uint64_t * } static bool HandleAtomicVectorStore(uint32_t Instr, uintptr_t ProgramCounter) { - uint32_t *PC = (uint32_t*)ProgramCounter; + uint32_t* PC = (uint32_t*)ProgramCounter; uint32_t Size = (Instr >> 30) & 1; uint32_t DataReg = Instr & 0x1F; - if(Size == 1) { + if (Size == 1) { // 64-bit pair happens on paranoid vector stores // [0] ldaxp(xzr, TMP3, MemSrc); // <- Can hit SIGBUS. Overwritten with DMB // [1] stlxp(TMP3, TMP1, TMP2, MemSrc); // <- Can also hit SIGBUS @@ -538,12 +527,7 @@ static bool HandleAtomicVectorStore(uint32_t Instr, uintptr_t ProgramCounter) { uint32_t AddrReg = (NextInstr >> 5) & 0x1F; DataReg = NextInstr & 0x1F; uint32_t DataReg2 = (NextInstr >> 10) & 0x1F; - uint32_t STP = - (0b10 << 30) | - (0b101001000000000 << 15) | - (DataReg2 << 10) | - (AddrReg << 5) | - DataReg; + uint32_t STP = (0b10 << 30) | (0b101001000000000 << 15) | (DataReg2 << 10) | (AddrReg << 5) | DataReg; PC[0] = DMB; PC[1] = STP; @@ -557,19 +541,14 @@ static bool HandleAtomicVectorStore(uint32_t Instr, uintptr_t ProgramCounter) { return false; } -template +template using CASExpectedFn = T (*)(T Src, T Expected); -template +template using CASDesiredFn = T (*)(T Src, T Desired); template -static -uint16_t DoCAS16( - uint16_t DesiredSrc, - uint16_t ExpectedSrc, - uint64_t Addr, - CASExpectedFn ExpectedFunction, - CASDesiredFn DesiredFunction) { +static uint16_t DoCAS16(uint16_t DesiredSrc, uint16_t ExpectedSrc, uint64_t Addr, CASExpectedFn ExpectedFunction, + CASDesiredFn DesiredFunction) { if ((Addr & 63) == 63) { FEXCORE_TELEMETRY_SET(SplitLock, 1); @@ -585,8 +564,8 @@ uint16_t DoCAS16( uint64_t AddrUpper = Addr + 1; while (1) { - uint8_t ActualUpper{}; - uint8_t ActualLower{}; + uint8_t ActualUpper {}; + uint8_t ActualLower {}; // Careful ordering here ActualUpper = LoadAcquire8(AddrUpper); ActualLower = LoadAcquire8(Addr); @@ -604,14 +583,12 @@ uint16_t DoCAS16( uint8_t ExpectedUpper = Expected >> 8; bool Tear = false; - if (ActualUpper == ExpectedUpper && - ActualLower == ExpectedLower) { + if (ActualUpper == ExpectedUpper && ActualLower == ExpectedLower) { if (StoreCAS8(ExpectedUpper, DesiredUpper, AddrUpper)) { if (StoreCAS8(ExpectedLower, DesiredLower, Addr)) { // Stored successfully return Expected; - } - else { + } else { // CAS managed to tear, we can't really solve this // Continue down the path to let the guest know values weren't expected Tear = true; @@ -634,19 +611,16 @@ uint16_t DoCAS16( // If we are retrying and tearing then we can't do anything here // XXX: Resolve with TME return FailedResult; - } - else { + } else { // We can retry safely } - } - else { + } else { // Without Retry (CAS) then we have failed regardless of tear // CAS failed but handled successfully return FailedResult; } } - } - else { + } else { AlignmentMask = 0b111; if ((Addr & AlignmentMask) == 7) { // Crosses 8byte boundary @@ -654,13 +628,13 @@ uint16_t DoCAS16( // Fits within a 16byte region uint64_t Alignment = Addr & 0b1111; Addr &= ~0b1111ULL; - std::atomic<__uint128_t> *Atomic128 = reinterpret_cast*>(Addr); + std::atomic<__uint128_t>* Atomic128 = reinterpret_cast*>(Addr); __uint128_t Mask = 0xFFFF; Mask <<= Alignment * 8; __uint128_t NegMask = ~Mask; - __uint128_t TmpExpected{}; - __uint128_t TmpDesired{}; + __uint128_t TmpExpected {}; + __uint128_t TmpDesired {}; while (1) { TmpExpected = Atomic128->load(); @@ -684,8 +658,7 @@ uint16_t DoCAS16( if (CASResult) { // Successful, so we are done return Expected >> (Alignment * 8); - } - else { + } else { if constexpr (Retry) { // If we failed but we have enabled retry then just retry without checking results // CAS can't retry but atomic memory ops need to retry until passing @@ -709,8 +682,7 @@ uint16_t DoCAS16( return FailedResult; } } - } - else { + } else { AlignmentMask = 0b11; if ((Addr & AlignmentMask) == 3) { // Crosses 4byte boundary @@ -723,10 +695,10 @@ uint16_t DoCAS16( uint64_t NegMask = ~Mask; - uint64_t TmpExpected{}; - uint64_t TmpDesired{}; + uint64_t TmpExpected {}; + uint64_t TmpDesired {}; - std::atomic *Atomic = reinterpret_cast*>(Addr); + std::atomic* Atomic = reinterpret_cast*>(Addr); while (1) { TmpExpected = Atomic->load(); @@ -749,8 +721,7 @@ uint16_t DoCAS16( if (CASResult) { // Successful, so we are done return Expected >> (Alignment * 8); - } - else { + } else { if constexpr (Retry) { // If we failed but we have enabled retry then just retry without checking results // CAS can't retry but atomic memory ops need to retry until passing @@ -775,8 +746,7 @@ uint16_t DoCAS16( return FailedResult; } } - } - else { + } else { // Fits within 4byte boundary // Only needs 32bit CAS // Only alignment offset will be 1 here @@ -788,10 +758,10 @@ uint16_t DoCAS16( uint32_t NegMask = ~Mask; - uint32_t TmpExpected{}; - uint32_t TmpDesired{}; + uint32_t TmpExpected {}; + uint32_t TmpDesired {}; - std::atomic *Atomic = reinterpret_cast*>(Addr); + std::atomic* Atomic = reinterpret_cast*>(Addr); while (1) { TmpExpected = Atomic->load(); @@ -815,8 +785,7 @@ uint16_t DoCAS16( if (CASResult) { // Successful, so we are done return Expected >> (Alignment * 8); - } - else { + } else { if constexpr (Retry) { // If we failed but we have enabled retry then just retry without checking results // CAS can't retry but atomic memory ops need to retry until passing @@ -847,13 +816,8 @@ uint16_t DoCAS16( } template -static -uint32_t DoCAS32( - uint32_t DesiredSrc, - uint32_t ExpectedSrc, - uint64_t Addr, - CASExpectedFn ExpectedFunction, - CASDesiredFn DesiredFunction) { +static uint32_t DoCAS32(uint32_t DesiredSrc, uint32_t ExpectedSrc, uint64_t Addr, CASExpectedFn ExpectedFunction, + CASDesiredFn DesiredFunction) { if ((Addr & 63) > 60) { FEXCORE_TELEMETRY_SET(SplitLock, 1); @@ -904,8 +868,7 @@ uint32_t DoCAS32( if (StoreCAS32(TmpExpectedLower, TmpDesiredLower, Addr)) { // Stored successfully return Expected; - } - else { + } else { // CAS managed to tear, we can't really solve this // Continue down the path to let the guest know values weren't expected Tear = true; @@ -916,8 +879,7 @@ uint32_t DoCAS32( TmpExpected = TmpExpectedUpper; TmpExpected <<= 32; TmpExpected |= TmpExpectedLower; - } - else { + } else { // Mismatch up front TmpExpected = TmpActual; } @@ -942,19 +904,16 @@ uint32_t DoCAS32( // If we are retrying and tearing then we can't do anything here // XXX: Resolve with TME return FailedResult; - } - else { + } else { // We can retry safely } - } - else { + } else { // Without Retry (CAS) then we have failed regardless of tear // CAS failed but handled successfully return FailedResult; } } - } - else { + } else { AlignmentMask = 0b111; if ((Addr & AlignmentMask) >= 5) { // Crosses 8byte boundary @@ -962,13 +921,13 @@ uint32_t DoCAS32( // Fits within a 16byte region uint64_t Alignment = Addr & 0b1111; Addr &= ~0b1111ULL; - std::atomic<__uint128_t> *Atomic128 = reinterpret_cast*>(Addr); + std::atomic<__uint128_t>* Atomic128 = reinterpret_cast*>(Addr); __uint128_t Mask = ~0U; Mask <<= Alignment * 8; __uint128_t NegMask = ~Mask; - __uint128_t TmpExpected{}; - __uint128_t TmpDesired{}; + __uint128_t TmpExpected {}; + __uint128_t TmpDesired {}; while (1) { __uint128_t TmpActual = Atomic128->load(); @@ -990,8 +949,7 @@ uint32_t DoCAS32( if (CASResult) { // Stored successfully return Expected; - } - else { + } else { if constexpr (Retry) { // If we failed but we have enabled retry then just retry without checking results // CAS can't retry but atomic memory ops need to retry until passing @@ -1016,8 +974,7 @@ uint32_t DoCAS32( return FailedResult; } } - } - else { + } else { // Fits within 8byte boundary // Only needs 64bit CAS // Alignments can be [1,5) @@ -1029,10 +986,10 @@ uint32_t DoCAS32( uint64_t NegMask = ~Mask; - uint64_t TmpExpected{}; - uint64_t TmpDesired{}; + uint64_t TmpExpected {}; + uint64_t TmpDesired {}; - std::atomic *Atomic = reinterpret_cast*>(Addr); + std::atomic* Atomic = reinterpret_cast*>(Addr); while (1) { uint64_t TmpActual = Atomic->load(); @@ -1053,8 +1010,7 @@ uint32_t DoCAS32( if (CASResult) { // Stored successfully return Expected; - } - else { + } else { if constexpr (Retry) { // If we failed but we have enabled retry then just retry without checking results // CAS can't retry but atomic memory ops need to retry until passing @@ -1085,13 +1041,8 @@ uint32_t DoCAS32( } template -static -uint64_t DoCAS64( - uint64_t DesiredSrc, - uint64_t ExpectedSrc, - uint64_t Addr, - CASExpectedFn ExpectedFunction, - CASDesiredFn DesiredFunction) { +static uint64_t DoCAS64(uint64_t DesiredSrc, uint64_t ExpectedSrc, uint64_t Addr, CASExpectedFn ExpectedFunction, + CASDesiredFn DesiredFunction) { if ((Addr & 63) > 56) { FEXCORE_TELEMETRY_SET(SplitLock, 1); @@ -1111,8 +1062,8 @@ uint64_t DoCAS64( __uint128_t Mask = ~0ULL; Mask <<= Alignment * 8; __uint128_t NegMask = ~Mask; - __uint128_t TmpExpected{}; - __uint128_t TmpDesired{}; + __uint128_t TmpExpected {}; + __uint128_t TmpDesired {}; while (1) { __uint128_t LoadOrderUpper = LoadAcquire64(AddrUpper); @@ -1144,8 +1095,7 @@ uint64_t DoCAS64( if (StoreCAS64(TmpExpectedLower, TmpDesiredLower, Addr)) { // Stored successfully return Expected; - } - else { + } else { // CAS managed to tear, we can't really solve this // Continue down the path to let the guest know values weren't expected Tear = true; @@ -1156,8 +1106,7 @@ uint64_t DoCAS64( TmpExpected = TmpExpectedUpper; TmpExpected <<= 64; TmpExpected |= TmpExpectedLower; - } - else { + } else { // Mismatch up front TmpExpected = TmpActual; } @@ -1182,29 +1131,26 @@ uint64_t DoCAS64( // If we are retrying and tearing then we can't do anything here // XXX: Resolve with TME return FailedResult; - } - else { + } else { // We can retry safely } - } - else { + } else { // Without Retry (CAS) then we have failed regardless of tear // CAS failed but handled successfully return FailedResult; } } - } - else { + } else { // Fits within a 16byte region uint64_t Alignment = Addr & AlignmentMask; Addr &= ~AlignmentMask; - std::atomic<__uint128_t> *Atomic128 = reinterpret_cast*>(Addr); + std::atomic<__uint128_t>* Atomic128 = reinterpret_cast*>(Addr); __uint128_t Mask = ~0ULL; Mask <<= Alignment * 8; __uint128_t NegMask = ~Mask; - __uint128_t TmpExpected{}; - __uint128_t TmpDesired{}; + __uint128_t TmpExpected {}; + __uint128_t TmpDesired {}; while (1) { __uint128_t TmpActual = Atomic128->load(); @@ -1226,8 +1172,7 @@ uint64_t DoCAS64( if (CASResult) { // Stored successfully return Expected; - } - else { + } else { if constexpr (Retry) { // If we failed but we have enabled retry then just retry without checking results // CAS can't retry but atomic memory ops need to retry until passing @@ -1255,7 +1200,7 @@ uint64_t DoCAS64( } } -static bool RunCASAL(uint64_t *GPRs, uint32_t Size, uint32_t DesiredReg, uint32_t ExpectedReg, uint32_t AddressReg) { +static bool RunCASAL(uint64_t* GPRs, uint32_t Size, uint32_t DesiredReg, uint32_t ExpectedReg, uint32_t AddressReg) { uint64_t Addr = GPRs[AddressReg]; // Cross-cacheline CAS doesn't work on ARM @@ -1270,17 +1215,15 @@ static bool RunCASAL(uint64_t *GPRs, uint32_t Size, uint32_t DesiredReg, uint32_ // Only need to handle 16, 32, 64 if (Size == 2) { auto Res = DoCAS16( - GPRs[DesiredReg], - GPRs[ExpectedReg], - Addr, + GPRs[DesiredReg], GPRs[ExpectedReg], Addr, [](uint16_t, uint16_t Expected) -> uint16_t { - // Expected is just Expected - return Expected; - }, + // Expected is just Expected + return Expected; + }, [](uint16_t, uint16_t Desired) -> uint16_t { - // Desired is just Desired - return Desired; - }); + // Desired is just Desired + return Desired; + }); // Regardless of pass or fail // We set the result register if it isn't a zero register @@ -1288,20 +1231,17 @@ static bool RunCASAL(uint64_t *GPRs, uint32_t Size, uint32_t DesiredReg, uint32_ GPRs[ExpectedReg] = Res; } return true; - } - else if (Size == 4) { + } else if (Size == 4) { auto Res = DoCAS32( - GPRs[DesiredReg], - GPRs[ExpectedReg], - Addr, + GPRs[DesiredReg], GPRs[ExpectedReg], Addr, [](uint32_t, uint32_t Expected) -> uint32_t { - // Expected is just Expected - return Expected; - }, + // Expected is just Expected + return Expected; + }, [](uint32_t, uint32_t Desired) -> uint32_t { - // Desired is just Desired - return Desired; - }); + // Desired is just Desired + return Desired; + }); // Regardless of pass or fail // We set the result register if it isn't a zero register @@ -1309,20 +1249,17 @@ static bool RunCASAL(uint64_t *GPRs, uint32_t Size, uint32_t DesiredReg, uint32_ GPRs[ExpectedReg] = Res; } return true; - } - else if (Size == 8) { + } else if (Size == 8) { auto Res = DoCAS64( - GPRs[DesiredReg], - GPRs[ExpectedReg], - Addr, + GPRs[DesiredReg], GPRs[ExpectedReg], Addr, [](uint64_t, uint64_t Expected) -> uint64_t { - // Expected is just Expected - return Expected; - }, + // Expected is just Expected + return Expected; + }, [](uint64_t, uint64_t Desired) -> uint64_t { - // Desired is just Desired - return Desired; - }); + // Desired is just Desired + return Desired; + }); // Regardless of pass or fail // We set the result register if it isn't a zero register @@ -1335,7 +1272,7 @@ static bool RunCASAL(uint64_t *GPRs, uint32_t Size, uint32_t DesiredReg, uint32_ return false; } -static bool HandleCASAL(uint64_t *GPRs, uint32_t Instr) { +static bool HandleCASAL(uint64_t* GPRs, uint32_t Instr) { uint32_t Size = 1 << (Instr >> 30); uint32_t DesiredReg = Instr & 0b11111; @@ -1344,7 +1281,7 @@ static bool HandleCASAL(uint64_t *GPRs, uint32_t Instr) { return RunCASAL(GPRs, Size, DesiredReg, ExpectedReg, AddressReg); } -static bool HandleAtomicMemOp(uint32_t Instr, uint64_t *GPRs) { +static bool HandleAtomicMemOp(uint32_t Instr, uint64_t* GPRs) { uint32_t Size = 1 << (Instr >> 30); uint32_t ResultReg = Instr & 0b11111; uint32_t SourceReg = (Instr >> 16) & 0b11111; @@ -1379,43 +1316,27 @@ static bool HandleAtomicMemOp(uint32_t Instr, uint64_t *GPRs) { return Desired; }; - CASDesiredFn DesiredFunction{}; + CASDesiredFn DesiredFunction {}; switch (Op) { - case ATOMIC_ADD_OP: - DesiredFunction = ADDDesired; - break; - case ATOMIC_CLR_OP: - DesiredFunction = CLRDesired; - break; - case ATOMIC_EOR_OP: - DesiredFunction = EORDesired; - break; - case ATOMIC_SET_OP: - DesiredFunction = SETDesired; - break; - case ATOMIC_SWAP_OP: - DesiredFunction = SWAPDesired; - break; - default: - LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", Op); - return false; + case ATOMIC_ADD_OP: DesiredFunction = ADDDesired; break; + case ATOMIC_CLR_OP: DesiredFunction = CLRDesired; break; + case ATOMIC_EOR_OP: DesiredFunction = EORDesired; break; + case ATOMIC_SET_OP: DesiredFunction = SETDesired; break; + case ATOMIC_SWAP_OP: DesiredFunction = SWAPDesired; break; + default: LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", Op); return false; } - auto Res = DoCAS16( - GPRs[SourceReg], - 0, // Unused - Addr, - NOPExpected, - DesiredFunction); + auto Res = DoCAS16(GPRs[SourceReg], + 0, // Unused + Addr, NOPExpected, DesiredFunction); // If we passed and our destination register is not zero // Then we need to update the result register with what was in memory if (ResultReg != 31) { GPRs[ResultReg] = Res; } return true; - } - else if (Size == 4) { + } else if (Size == 4) { auto NOPExpected = [](uint32_t SrcVal, uint32_t) -> uint32_t { return SrcVal; }; @@ -1440,43 +1361,27 @@ static bool HandleAtomicMemOp(uint32_t Instr, uint64_t *GPRs) { return Desired; }; - CASDesiredFn DesiredFunction{}; + CASDesiredFn DesiredFunction {}; switch (Op) { - case ATOMIC_ADD_OP: - DesiredFunction = ADDDesired; - break; - case ATOMIC_CLR_OP: - DesiredFunction = CLRDesired; - break; - case ATOMIC_EOR_OP: - DesiredFunction = EORDesired; - break; - case ATOMIC_SET_OP: - DesiredFunction = SETDesired; - break; - case ATOMIC_SWAP_OP: - DesiredFunction = SWAPDesired; - break; - default: - LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", Op); - return false; + case ATOMIC_ADD_OP: DesiredFunction = ADDDesired; break; + case ATOMIC_CLR_OP: DesiredFunction = CLRDesired; break; + case ATOMIC_EOR_OP: DesiredFunction = EORDesired; break; + case ATOMIC_SET_OP: DesiredFunction = SETDesired; break; + case ATOMIC_SWAP_OP: DesiredFunction = SWAPDesired; break; + default: LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", Op); return false; } - auto Res = DoCAS32( - GPRs[SourceReg], - 0, // Unused - Addr, - NOPExpected, - DesiredFunction); + auto Res = DoCAS32(GPRs[SourceReg], + 0, // Unused + Addr, NOPExpected, DesiredFunction); // If we passed and our destination register is not zero // Then we need to update the result register with what was in memory if (ResultReg != 31) { GPRs[ResultReg] = Res; } return true; - } - else if (Size == 8) { + } else if (Size == 8) { auto NOPExpected = [](uint64_t SrcVal, uint64_t) -> uint64_t { return SrcVal; }; @@ -1501,35 +1406,20 @@ static bool HandleAtomicMemOp(uint32_t Instr, uint64_t *GPRs) { return Desired; }; - CASDesiredFn DesiredFunction{}; + CASDesiredFn DesiredFunction {}; switch (Op) { - case ATOMIC_ADD_OP: - DesiredFunction = ADDDesired; - break; - case ATOMIC_CLR_OP: - DesiredFunction = CLRDesired; - break; - case ATOMIC_EOR_OP: - DesiredFunction = EORDesired; - break; - case ATOMIC_SET_OP: - DesiredFunction = SETDesired; - break; - case ATOMIC_SWAP_OP: - DesiredFunction = SWAPDesired; - break; - default: - LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", Op); - return false; + case ATOMIC_ADD_OP: DesiredFunction = ADDDesired; break; + case ATOMIC_CLR_OP: DesiredFunction = CLRDesired; break; + case ATOMIC_EOR_OP: DesiredFunction = EORDesired; break; + case ATOMIC_SET_OP: DesiredFunction = SETDesired; break; + case ATOMIC_SWAP_OP: DesiredFunction = SWAPDesired; break; + default: LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", Op); return false; } - auto Res = DoCAS64( - GPRs[SourceReg], - 0, // Unused - Addr, - NOPExpected, - DesiredFunction); + auto Res = DoCAS64(GPRs[SourceReg], + 0, // Unused + Addr, NOPExpected, DesiredFunction); // If we passed and our destination register is not zero // Then we need to update the result register with what was in memory if (ResultReg != 31) { @@ -1541,7 +1431,7 @@ static bool HandleAtomicMemOp(uint32_t Instr, uint64_t *GPRs) { return false; } -static bool HandleAtomicLoad(uint32_t Instr, uint64_t *GPRs, int64_t Offset) { +static bool HandleAtomicLoad(uint32_t Instr, uint64_t* GPRs, int64_t Offset) { uint32_t Size = 1 << (Instr >> 30); uint32_t ResultReg = Instr & 0b11111; @@ -1556,16 +1446,14 @@ static bool HandleAtomicLoad(uint32_t Instr, uint64_t *GPRs, int64_t Offset) { GPRs[ResultReg] = Res; } return true; - } - else if (Size == 4) { + } else if (Size == 4) { auto Res = DoLoad32(Addr); // We set the result register if it isn't a zero register if (ResultReg != 31) { GPRs[ResultReg] = Res; } return true; - } - else if (Size == 8) { + } else if (Size == 8) { auto Res = DoLoad64(Addr); // We set the result register if it isn't a zero register if (ResultReg != 31) { @@ -1577,7 +1465,7 @@ static bool HandleAtomicLoad(uint32_t Instr, uint64_t *GPRs, int64_t Offset) { return false; } -static bool HandleAtomicStore(uint32_t Instr, uint64_t *GPRs, int64_t Offset) { +static bool HandleAtomicStore(uint32_t Instr, uint64_t* GPRs, int64_t Offset) { uint32_t Size = 1 << (Instr >> 30); uint32_t DataReg = Instr & 0x1F; @@ -1592,51 +1480,48 @@ static bool HandleAtomicStore(uint32_t Instr, uint64_t *GPRs, int64_t Offset) { 0, // Unused Addr, [](uint16_t SrcVal, uint16_t) -> uint16_t { - // Expected is just src - return SrcVal; - }, + // Expected is just src + return SrcVal; + }, [](uint16_t, uint16_t Desired) -> uint16_t { - // Desired is just Desired - return Desired; - }); + // Desired is just Desired + return Desired; + }); return true; - } - else if (Size == 4) { + } else if (Size == 4) { DoCAS32( GPRs[DataReg], 0, // Unused Addr, [](uint32_t SrcVal, uint32_t) -> uint32_t { - // Expected is just src - return SrcVal; - }, + // Expected is just src + return SrcVal; + }, [](uint32_t, uint32_t Desired) -> uint32_t { - // Desired is just Desired - return Desired; - }); + // Desired is just Desired + return Desired; + }); return true; - } - else if (Size == 8) { + } else if (Size == 8) { DoCAS64( GPRs[DataReg], 0, // Unused Addr, [](uint64_t SrcVal, uint64_t) -> uint64_t { - // Expected is just src - return SrcVal; - }, + // Expected is just src + return SrcVal; + }, [](uint64_t, uint64_t Desired) -> uint64_t { - // Desired is just Desired - return Desired; - }); + // Desired is just Desired + return Desired; + }); return true; } return false; } -static uint64_t HandleCAS_NoAtomics(uintptr_t ProgramCounter, uint64_t *GPRs) -{ +static uint64_t HandleCAS_NoAtomics(uintptr_t ProgramCounter, uint64_t* GPRs) { // ARMv8.0 CAS // [1] ldaxrb(TMP2.W(), MemOperand(MemSrc)) // [2] cmp (TMP2.W(), Expected.W()) @@ -1648,40 +1533,39 @@ static uint64_t HandleCAS_NoAtomics(uintptr_t ProgramCounter, uint64_t *GPRs) // [8] mov (.., TMP2.W()); // [9] clrex - uint32_t *PC = (uint32_t*)ProgramCounter; + uint32_t* PC = (uint32_t*)ProgramCounter; uint32_t Instr = PC[0]; uint32_t Size = 1 << (Instr >> 30); uint32_t AddressReg = GetRnReg(Instr); - uint32_t ResultReg = GetRdReg(Instr); //TMP2 + uint32_t ResultReg = GetRdReg(Instr); // TMP2 uint32_t DesiredReg = 0; uint32_t ExpectedReg = 0; for (size_t i = 1; i < 6; ++i) { - uint32_t NextInstr = PC[i]; - if ((NextInstr & ArchHelpers::Arm64::STLXR_MASK) == ArchHelpers::Arm64::STLXR_INST) { - #if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED - // Just double check that the memory destination matches - const uint32_t StoreAddressReg = GetRnReg(NextInstr); - LOGMAN_THROW_A_FMT(StoreAddressReg == AddressReg, "StoreExclusive memory register didn't match the store exclusive register"); - #endif - DesiredReg = GetRdReg(NextInstr); - } - else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::CMP_INST || - (NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::CMP_SHIFT_INST) { - ExpectedReg = GetRmReg(NextInstr); - } + uint32_t NextInstr = PC[i]; + if ((NextInstr & ArchHelpers::Arm64::STLXR_MASK) == ArchHelpers::Arm64::STLXR_INST) { +#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED + // Just double check that the memory destination matches + const uint32_t StoreAddressReg = GetRnReg(NextInstr); + LOGMAN_THROW_A_FMT(StoreAddressReg == AddressReg, "StoreExclusive memory register didn't match the store exclusive register"); +#endif + DesiredReg = GetRdReg(NextInstr); + } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::CMP_INST || + (NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::CMP_SHIFT_INST) { + ExpectedReg = GetRmReg(NextInstr); + } } - //set up CASAL by doing mov(TMP2, Expected) + // set up CASAL by doing mov(TMP2, Expected) GPRs[ResultReg] = GPRs[ExpectedReg]; - if(RunCASAL(GPRs, Size, DesiredReg, ResultReg, AddressReg)) { - return 7 * sizeof(uint32_t); //jump to mov to allocated register + if (RunCASAL(GPRs, Size, DesiredReg, ResultReg, AddressReg)) { + return 7 * sizeof(uint32_t); // jump to mov to allocated register } else { return 0; } } -static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_t *GPRs) { - uint32_t *PC = (uint32_t*)ProgramCounter; +static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_t* GPRs) { + uint32_t* PC = (uint32_t*)ProgramCounter; uint32_t Instr = PC[0]; // Atomic Add @@ -1745,48 +1629,38 @@ static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_ (NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::ADD_SHIFT_INST) { AtomicOp = ExclusiveAtomicPairType::TYPE_ADD; DataSourceReg = GetRmReg(NextInstr); - } - else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::SUB_INST || - (NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::SUB_SHIFT_INST) { + } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::SUB_INST || + (NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::SUB_SHIFT_INST) { uint32_t RnReg = GetRnReg(NextInstr); if (RnReg == REGISTER_MASK) { // Zero reg means neg AtomicOp = ExclusiveAtomicPairType::TYPE_NEG; - } - else { + } else { AtomicOp = ExclusiveAtomicPairType::TYPE_SUB; } DataSourceReg = GetRmReg(NextInstr); - } - else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::CMP_INST || - (NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::CMP_SHIFT_INST ) { - return HandleCAS_NoAtomics(ProgramCounter, GPRs); //ARMv8.0 CAS - } - else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::AND_INST) { + } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::CMP_INST || + (NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::CMP_SHIFT_INST) { + return HandleCAS_NoAtomics(ProgramCounter, GPRs); // ARMv8.0 CAS + } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::AND_INST) { AtomicOp = ExclusiveAtomicPairType::TYPE_AND; DataSourceReg = GetRmReg(NextInstr); - } - else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::BIC_INST) { + } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::BIC_INST) { AtomicOp = ExclusiveAtomicPairType::TYPE_BIC; DataSourceReg = GetRmReg(NextInstr); - } - else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::OR_INST) { + } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::OR_INST) { AtomicOp = ExclusiveAtomicPairType::TYPE_OR; DataSourceReg = GetRmReg(NextInstr); - } - else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::ORN_INST) { + } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::ORN_INST) { AtomicOp = ExclusiveAtomicPairType::TYPE_ORN; DataSourceReg = GetRmReg(NextInstr); - } - else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::EOR_INST) { + } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::EOR_INST) { AtomicOp = ExclusiveAtomicPairType::TYPE_EOR; DataSourceReg = GetRmReg(NextInstr); - } - else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::EON_INST) { + } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::EON_INST) { AtomicOp = ExclusiveAtomicPairType::TYPE_EON; DataSourceReg = GetRmReg(NextInstr); - } - else if ((NextInstr & ArchHelpers::Arm64::STLXR_MASK) == ArchHelpers::Arm64::STLXR_INST) { + } else if ((NextInstr & ArchHelpers::Arm64::STLXR_MASK) == ArchHelpers::Arm64::STLXR_INST) { #if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED // Just double check that the memory destination matches const uint32_t StoreAddressReg = GetRnReg(NextInstr); @@ -1801,14 +1675,12 @@ static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_ // Source is directly in STLXR DataSourceReg = StoreResultReg; } - } - else if ((NextInstr & ArchHelpers::Arm64::CBNZ_MASK) == ArchHelpers::Arm64::CBNZ_INST) { + } else if ((NextInstr & ArchHelpers::Arm64::CBNZ_MASK) == ArchHelpers::Arm64::CBNZ_INST) { // Found the CBNZ, we want to skip to just after this instruction when done NumInstructionsToSkip = i + 1; // This is the last instruction we care about. Leave now break; - } - else { + } else { LogMan::Msg::AFmt("Unknown instruction 0x{:08x}", NextInstr); } } @@ -1863,159 +1735,79 @@ static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_ if (Size == 2) { using AtomicType = uint16_t; - CASDesiredFn DesiredFunction{}; + CASDesiredFn DesiredFunction {}; switch (AtomicOp) { - case ExclusiveAtomicPairType::TYPE_SWAP: - DesiredFunction = SWAPDesired; - break; - case ExclusiveAtomicPairType::TYPE_ADD: - DesiredFunction = ADDDesired; - break; - case ExclusiveAtomicPairType::TYPE_SUB: - DesiredFunction = SUBDesired; - break; - case ExclusiveAtomicPairType::TYPE_AND: - DesiredFunction = ANDDesired; - break; - case ExclusiveAtomicPairType::TYPE_BIC: - DesiredFunction = BICDesired; - break; - case ExclusiveAtomicPairType::TYPE_OR: - DesiredFunction = ORDesired; - break; - case ExclusiveAtomicPairType::TYPE_ORN: - DesiredFunction = ORNDesired; - break; - case ExclusiveAtomicPairType::TYPE_EOR: - DesiredFunction = EORDesired; - break; - case ExclusiveAtomicPairType::TYPE_EON: - DesiredFunction = EONDesired; - break; - case ExclusiveAtomicPairType::TYPE_NEG: - DesiredFunction = NEGDesired; - break; - default: - LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", - FEXCore::ToUnderlying(AtomicOp)); - return false; + case ExclusiveAtomicPairType::TYPE_SWAP: DesiredFunction = SWAPDesired; break; + case ExclusiveAtomicPairType::TYPE_ADD: DesiredFunction = ADDDesired; break; + case ExclusiveAtomicPairType::TYPE_SUB: DesiredFunction = SUBDesired; break; + case ExclusiveAtomicPairType::TYPE_AND: DesiredFunction = ANDDesired; break; + case ExclusiveAtomicPairType::TYPE_BIC: DesiredFunction = BICDesired; break; + case ExclusiveAtomicPairType::TYPE_OR: DesiredFunction = ORDesired; break; + case ExclusiveAtomicPairType::TYPE_ORN: DesiredFunction = ORNDesired; break; + case ExclusiveAtomicPairType::TYPE_EOR: DesiredFunction = EORDesired; break; + case ExclusiveAtomicPairType::TYPE_EON: DesiredFunction = EONDesired; break; + case ExclusiveAtomicPairType::TYPE_NEG: DesiredFunction = NEGDesired; break; + default: LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", FEXCore::ToUnderlying(AtomicOp)); return false; } - auto Res = DoCAS16( - GPRs[DataSourceReg], - 0, // Unused - Addr, - NOPExpected, - DesiredFunction); + auto Res = DoCAS16(GPRs[DataSourceReg], + 0, // Unused + Addr, NOPExpected, DesiredFunction); if (AtomicFetch && ResultReg != 31) { // On atomic fetch then we store the resulting value back in to the loadacquire destination register // We want the memory value BEFORE the ALU op GPRs[ResultReg] = Res; } - } - else if (Size == 4) { + } else if (Size == 4) { using AtomicType = uint32_t; - CASDesiredFn DesiredFunction{}; + CASDesiredFn DesiredFunction {}; switch (AtomicOp) { - case ExclusiveAtomicPairType::TYPE_SWAP: - DesiredFunction = SWAPDesired; - break; - case ExclusiveAtomicPairType::TYPE_ADD: - DesiredFunction = ADDDesired; - break; - case ExclusiveAtomicPairType::TYPE_SUB: - DesiredFunction = SUBDesired; - break; - case ExclusiveAtomicPairType::TYPE_AND: - DesiredFunction = ANDDesired; - break; - case ExclusiveAtomicPairType::TYPE_BIC: - DesiredFunction = BICDesired; - break; - case ExclusiveAtomicPairType::TYPE_OR: - DesiredFunction = ORDesired; - break; - case ExclusiveAtomicPairType::TYPE_ORN: - DesiredFunction = ORNDesired; - break; - case ExclusiveAtomicPairType::TYPE_EOR: - DesiredFunction = EORDesired; - break; - case ExclusiveAtomicPairType::TYPE_EON: - DesiredFunction = EONDesired; - break; - case ExclusiveAtomicPairType::TYPE_NEG: - DesiredFunction = NEGDesired; - break; - default: - LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", - FEXCore::ToUnderlying(AtomicOp)); - return false; + case ExclusiveAtomicPairType::TYPE_SWAP: DesiredFunction = SWAPDesired; break; + case ExclusiveAtomicPairType::TYPE_ADD: DesiredFunction = ADDDesired; break; + case ExclusiveAtomicPairType::TYPE_SUB: DesiredFunction = SUBDesired; break; + case ExclusiveAtomicPairType::TYPE_AND: DesiredFunction = ANDDesired; break; + case ExclusiveAtomicPairType::TYPE_BIC: DesiredFunction = BICDesired; break; + case ExclusiveAtomicPairType::TYPE_OR: DesiredFunction = ORDesired; break; + case ExclusiveAtomicPairType::TYPE_ORN: DesiredFunction = ORNDesired; break; + case ExclusiveAtomicPairType::TYPE_EOR: DesiredFunction = EORDesired; break; + case ExclusiveAtomicPairType::TYPE_EON: DesiredFunction = EONDesired; break; + case ExclusiveAtomicPairType::TYPE_NEG: DesiredFunction = NEGDesired; break; + default: LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", FEXCore::ToUnderlying(AtomicOp)); return false; } - auto Res = DoCAS32( - GPRs[DataSourceReg], - 0, // Unused - Addr, - NOPExpected, - DesiredFunction); + auto Res = DoCAS32(GPRs[DataSourceReg], + 0, // Unused + Addr, NOPExpected, DesiredFunction); if (AtomicFetch && ResultReg != 31) { // On atomic fetch then we store the resulting value back in to the loadacquire destination register // We want the memory value BEFORE the ALU op GPRs[ResultReg] = Res; } - } - else if (Size == 8) { + } else if (Size == 8) { using AtomicType = uint64_t; - CASDesiredFn DesiredFunction{}; + CASDesiredFn DesiredFunction {}; switch (AtomicOp) { - case ExclusiveAtomicPairType::TYPE_SWAP: - DesiredFunction = SWAPDesired; - break; - case ExclusiveAtomicPairType::TYPE_ADD: - DesiredFunction = ADDDesired; - break; - case ExclusiveAtomicPairType::TYPE_SUB: - DesiredFunction = SUBDesired; - break; - case ExclusiveAtomicPairType::TYPE_AND: - DesiredFunction = ANDDesired; - break; - case ExclusiveAtomicPairType::TYPE_BIC: - DesiredFunction = BICDesired; - break; - case ExclusiveAtomicPairType::TYPE_OR: - DesiredFunction = ORDesired; - break; - case ExclusiveAtomicPairType::TYPE_ORN: - DesiredFunction = ORNDesired; - break; - case ExclusiveAtomicPairType::TYPE_EOR: - DesiredFunction = EORDesired; - break; - case ExclusiveAtomicPairType::TYPE_EON: - DesiredFunction = EONDesired; - break; - case ExclusiveAtomicPairType::TYPE_NEG: - DesiredFunction = NEGDesired; - break; - default: - LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", - FEXCore::ToUnderlying(AtomicOp)); - return false; + case ExclusiveAtomicPairType::TYPE_SWAP: DesiredFunction = SWAPDesired; break; + case ExclusiveAtomicPairType::TYPE_ADD: DesiredFunction = ADDDesired; break; + case ExclusiveAtomicPairType::TYPE_SUB: DesiredFunction = SUBDesired; break; + case ExclusiveAtomicPairType::TYPE_AND: DesiredFunction = ANDDesired; break; + case ExclusiveAtomicPairType::TYPE_BIC: DesiredFunction = BICDesired; break; + case ExclusiveAtomicPairType::TYPE_OR: DesiredFunction = ORDesired; break; + case ExclusiveAtomicPairType::TYPE_ORN: DesiredFunction = ORNDesired; break; + case ExclusiveAtomicPairType::TYPE_EOR: DesiredFunction = EORDesired; break; + case ExclusiveAtomicPairType::TYPE_EON: DesiredFunction = EONDesired; break; + case ExclusiveAtomicPairType::TYPE_NEG: DesiredFunction = NEGDesired; break; + default: LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", FEXCore::ToUnderlying(AtomicOp)); return false; } - auto Res = DoCAS64( - GPRs[DataSourceReg], - 0, // Unused - Addr, - NOPExpected, - DesiredFunction); + auto Res = DoCAS64(GPRs[DataSourceReg], + 0, // Unused + Addr, NOPExpected, DesiredFunction); if (AtomicFetch && ResultReg != 31) { // On atomic fetch then we store the resulting value back in to the loadacquire destination register // We want the memory value BEFORE the ALU op @@ -2027,7 +1819,9 @@ static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_ return NumInstructionsToSkip * 4; } -[[nodiscard]] std::pair HandleUnalignedAccess(FEXCore::Core::InternalThreadState *Thread, bool ParanoidTSO, uintptr_t ProgramCounter, uint64_t *GPRs) { +[[nodiscard]] +std::pair +HandleUnalignedAccess(FEXCore::Core::InternalThreadState* Thread, bool ParanoidTSO, uintptr_t ProgramCounter, uint64_t* GPRs) { #ifdef _M_ARM_64 constexpr bool is_arm64 = true; #else @@ -2037,7 +1831,7 @@ static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_ constexpr auto NotHandled = std::make_pair(false, 0); if constexpr (is_arm64) { - uint32_t *PC = (uint32_t*)ProgramCounter; + uint32_t* PC = (uint32_t*)ProgramCounter; uint32_t Instr = PC[0]; // 1 = 16bit @@ -2049,47 +1843,40 @@ static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_ // ParanoidTSO path doesn't modify any code. if (ParanoidTSO) [[unlikely]] { - if ((Instr & LDAXR_MASK) == LDAR_INST || // LDAR* + if ((Instr & LDAXR_MASK) == LDAR_INST || // LDAR* (Instr & LDAXR_MASK) == LDAPR_INST) { // LDAPR* if (ArchHelpers::Arm64::HandleAtomicLoad(Instr, GPRs, 0)) { // Skip this instruction now return std::make_pair(true, 4); - } - else { + } else { LogMan::Msg::EFmt("Unhandled JIT SIGBUS LDAR*: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]); return NotHandled; } - } - else if ( (Instr & LDAXR_MASK) == STLR_INST) { // STLR* + } else if ((Instr & LDAXR_MASK) == STLR_INST) { // STLR* if (ArchHelpers::Arm64::HandleAtomicStore(Instr, GPRs, 0)) { // Skip this instruction now return std::make_pair(true, 4); - } - else { + } else { LogMan::Msg::EFmt("Unhandled JIT SIGBUS STLR*: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]); return NotHandled; } - } - else if ((Instr & RCPC2_MASK) == LDAPUR_INST) { // LDAPUR* + } else if ((Instr & RCPC2_MASK) == LDAPUR_INST) { // LDAPUR* // Extract the 9-bit offset from the instruction int32_t Offset = static_cast(Instr) << 11 >> 23; if (ArchHelpers::Arm64::HandleAtomicLoad(Instr, GPRs, Offset)) { // Skip this instruction now return std::make_pair(true, 4); - } - else { + } else { LogMan::Msg::EFmt("Unhandled JIT SIGBUS LDAPUR*: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]); return NotHandled; } - } - else if ((Instr & RCPC2_MASK) == STLUR_INST) { // STLUR* + } else if ((Instr & RCPC2_MASK) == STLUR_INST) { // STLUR* // Extract the 9-bit offset from the instruction int32_t Offset = static_cast(Instr) << 11 >> 23; if (ArchHelpers::Arm64::HandleAtomicStore(Instr, GPRs, Offset)) { // Skip this instruction now return std::make_pair(true, 4); - } - else { + } else { LogMan::Msg::EFmt("Unhandled JIT SIGBUS LDLUR*: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]); return NotHandled; } @@ -2098,14 +1885,14 @@ static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_ const auto Frame = Thread->CurrentFrame; const uint64_t BlockBegin = Frame->State.InlineJITBlockHeader; - auto InlineHeader = reinterpret_cast(BlockBegin); - auto InlineTail = reinterpret_cast(Frame->State.InlineJITBlockHeader + InlineHeader->OffsetToBlockTail); + auto InlineHeader = reinterpret_cast(BlockBegin); + auto InlineTail = reinterpret_cast(Frame->State.InlineJITBlockHeader + InlineHeader->OffsetToBlockTail); // Lock code mutex during any SIGBUS handling that potentially changes code. // Need to be careful to not read any code part-way through modification. FEXCore::Utils::SpinWaitLock::UniqueSpinMutex lk(&InlineTail->SpinLockFutex); - if ((Instr & LDAXR_MASK) == LDAR_INST || // LDAR* + if ((Instr & LDAXR_MASK) == LDAR_INST || // LDAR* (Instr & LDAXR_MASK) == LDAPR_INST) { // LDAPR* uint32_t LDR = 0b0011'1000'0111'1111'0110'1000'0000'0000; LDR |= Size << 30; @@ -2116,8 +1903,7 @@ static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_ ClearICache(&PC[-1], 16); // With the instruction modified, now execute again. return std::make_pair(true, 0); - } - else if ( (Instr & LDAXR_MASK) == STLR_INST) { // STLR* + } else if ((Instr & LDAXR_MASK) == STLR_INST) { // STLR* uint32_t STR = 0b0011'1000'0011'1111'0110'1000'0000'0000; STR |= Size << 30; STR |= AddrReg << 5; @@ -2127,8 +1913,7 @@ static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_ ClearICache(&PC[-1], 16); // Back up one instruction and have another go return std::make_pair(true, -4); - } - else if ((Instr & RCPC2_MASK) == LDAPUR_INST) { // LDAPUR* + } else if ((Instr & RCPC2_MASK) == LDAPUR_INST) { // LDAPUR* // Extract the 9-bit offset from the instruction uint32_t LDUR = 0b0011'1000'0100'0000'0000'0000'0000'0000; LDUR |= Size << 30; @@ -2140,84 +1925,70 @@ static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_ ClearICache(&PC[-1], 16); // With the instruction modified, now execute again. return std::make_pair(true, 0); - } - else if ((Instr & RCPC2_MASK) == STLUR_INST) { // STLUR* + } else if ((Instr & RCPC2_MASK) == STLUR_INST) { // STLUR* uint32_t STUR = 0b0011'1000'0000'0000'0000'0000'0000'0000; STUR |= Size << 30; STUR |= AddrReg << 5; STUR |= DataReg; STUR |= Instr & (0b1'1111'1111 << 9); - PC[-1] = DMB; // Back-patch the half-barrier. + PC[-1] = DMB; // Back-patch the half-barrier. PC[0] = STUR; ClearICache(&PC[-1], 16); // Back up one instruction and have another go return std::make_pair(true, -4); - } - else if ((Instr & ArchHelpers::Arm64::LDAXP_MASK) == ArchHelpers::Arm64::LDAXP_INST) { // LDAXP - //Should be compare and swap pair only. LDAXP not used elsewhere + } else if ((Instr & ArchHelpers::Arm64::LDAXP_MASK) == ArchHelpers::Arm64::LDAXP_INST) { // LDAXP + // Should be compare and swap pair only. LDAXP not used elsewhere uint64_t BytesToSkip = ArchHelpers::Arm64::HandleCASPAL_ARMv8(Instr, ProgramCounter, GPRs); if (BytesToSkip) { // Skip this instruction now return std::make_pair(true, BytesToSkip); - } - else { + } else { if (ArchHelpers::Arm64::HandleAtomicVectorStore(Instr, ProgramCounter)) { return std::make_pair(true, 0); - } - else { + } else { LogMan::Msg::EFmt("Unhandled JIT SIGBUS LDAXP: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]); return NotHandled; } } - } - else if ((Instr & ArchHelpers::Arm64::STLXP_MASK) == ArchHelpers::Arm64::STLXP_INST) { // STLXP - //Should not trigger - middle of an LDAXP/STAXP pair. + } else if ((Instr & ArchHelpers::Arm64::STLXP_MASK) == ArchHelpers::Arm64::STLXP_INST) { // STLXP + // Should not trigger - middle of an LDAXP/STAXP pair. LogMan::Msg::EFmt("Unhandled JIT SIGBUS STLXP: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]); return NotHandled; - } - else if ((Instr & ArchHelpers::Arm64::CASPAL_MASK) == ArchHelpers::Arm64::CASPAL_INST) { // CASPAL + } else if ((Instr & ArchHelpers::Arm64::CASPAL_MASK) == ArchHelpers::Arm64::CASPAL_INST) { // CASPAL if (ArchHelpers::Arm64::HandleCASPAL(Instr, GPRs)) { // Skip this instruction now return std::make_pair(true, 4); - } - else { + } else { LogMan::Msg::EFmt("Unhandled JIT SIGBUS CASPAL: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]); return NotHandled; } - } - else if ((Instr & ArchHelpers::Arm64::CASAL_MASK) == ArchHelpers::Arm64::CASAL_INST) { // CASAL + } else if ((Instr & ArchHelpers::Arm64::CASAL_MASK) == ArchHelpers::Arm64::CASAL_INST) { // CASAL if (ArchHelpers::Arm64::HandleCASAL(GPRs, Instr)) { // Skip this instruction now return std::make_pair(true, 4); - } - else { + } else { LogMan::Msg::EFmt("Unhandled JIT SIGBUS CASAL: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]); return NotHandled; } - } - else if ((Instr & ArchHelpers::Arm64::ATOMIC_MEM_MASK) == ArchHelpers::Arm64::ATOMIC_MEM_INST) { // Atomic memory op + } else if ((Instr & ArchHelpers::Arm64::ATOMIC_MEM_MASK) == ArchHelpers::Arm64::ATOMIC_MEM_INST) { // Atomic memory op if (ArchHelpers::Arm64::HandleAtomicMemOp(Instr, GPRs)) { // Skip this instruction now return std::make_pair(true, 4); - } - else { + } else { uint8_t Op = (PC[0] >> 12) & 0xF; LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}: PC: 0x{:x} Instruction: 0x{:08x}\n", Op, ProgramCounter, PC[0]); return NotHandled; } - } - else if ((Instr & ArchHelpers::Arm64::LDAXR_MASK) == ArchHelpers::Arm64::LDAXR_INST) { // LDAXR* + } else if ((Instr & ArchHelpers::Arm64::LDAXR_MASK) == ArchHelpers::Arm64::LDAXR_INST) { // LDAXR* uint64_t BytesToSkip = ArchHelpers::Arm64::HandleAtomicLoadstoreExclusive(ProgramCounter, GPRs); if (BytesToSkip) { // Skip this instruction now return std::make_pair(true, BytesToSkip); - } - else { + } else { LogMan::Msg::EFmt("Unhandled JIT SIGBUS LDAXR: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]); return NotHandled; } - } - else { + } else { LogMan::Msg::EFmt("Unhandled JIT SIGBUS: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]); return NotHandled; } @@ -2226,4 +1997,4 @@ static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_ } -} +} // namespace FEXCore::ArchHelpers::Arm64 diff --git a/FEXCore/Source/Utils/ArchHelpers/Arm64_stubs.cpp b/FEXCore/Source/Utils/ArchHelpers/Arm64_stubs.cpp index 434d378fe0..242754b731 100644 --- a/FEXCore/Source/Utils/ArchHelpers/Arm64_stubs.cpp +++ b/FEXCore/Source/Utils/ArchHelpers/Arm64_stubs.cpp @@ -11,22 +11,23 @@ namespace FEXCore::ArchHelpers::Arm64 { // Obvously such a configuration can't do the actual arm64-specific stuff -bool HandleCASPAL(void *_ucontext, void *_info, uint32_t Instr) { - ERROR_AND_DIE_FMT("HandleCASPAL Not Implemented"); +bool HandleCASPAL(void* _ucontext, void* _info, uint32_t Instr) { + ERROR_AND_DIE_FMT("HandleCASPAL Not Implemented"); } -bool HandleCASAL(void *_ucontext, void *_info, uint32_t Instr) { - ERROR_AND_DIE_FMT("HandleCASAL Not Implemented"); +bool HandleCASAL(void* _ucontext, void* _info, uint32_t Instr) { + ERROR_AND_DIE_FMT("HandleCASAL Not Implemented"); } -bool HandleAtomicMemOp(void *_ucontext, void *_info, uint32_t Instr) { - ERROR_AND_DIE_FMT("HandleAtomicMemOp Not Implemented"); +bool HandleAtomicMemOp(void* _ucontext, void* _info, uint32_t Instr) { + ERROR_AND_DIE_FMT("HandleAtomicMemOp Not Implemented"); } -std::pair HandleUnalignedAccess(FEXCore::Core::InternalThreadState *Thread, bool ParanoidTSO, uintptr_t ProgramCounter, uint64_t *GPRs) { +std::pair +HandleUnalignedAccess(FEXCore::Core::InternalThreadState* Thread, bool ParanoidTSO, uintptr_t ProgramCounter, uint64_t* GPRs) { ERROR_AND_DIE_FMT("HandleAtomicMemOp Not Implemented"); } #endif -} +} // namespace FEXCore::ArchHelpers::Arm64 diff --git a/FEXCore/Source/Utils/CPUInfo.cpp b/FEXCore/Source/Utils/CPUInfo.cpp index 07b9b07542..f0683e4ace 100644 --- a/FEXCore/Source/Utils/CPUInfo.cpp +++ b/FEXCore/Source/Utils/CPUInfo.cpp @@ -13,24 +13,24 @@ namespace FEXCore::CPUInfo { #ifndef _WIN32 - uint32_t CalculateNumberOfCPUs() { - char Tmp[PATH_MAX]; - size_t CPUs = 1; +uint32_t CalculateNumberOfCPUs() { + char Tmp[PATH_MAX]; + size_t CPUs = 1; - for (;; ++CPUs) { - auto Size = fmt::format_to_n(Tmp, sizeof(Tmp), "/sys/devices/system/cpu/cpu{}", CPUs); - Tmp[Size.size] = 0; - if (!FHU::Filesystem::Exists(Tmp)) { - break; - } + for (;; ++CPUs) { + auto Size = fmt::format_to_n(Tmp, sizeof(Tmp), "/sys/devices/system/cpu/cpu{}", CPUs); + Tmp[Size.size] = 0; + if (!FHU::Filesystem::Exists(Tmp)) { + break; } - - return CPUs; } + + return CPUs; +} #else - uint32_t CalculateNumberOfCPUs() { - // May not return correct number of cores if some are parked. - return std::thread::hardware_concurrency(); - } -#endif +uint32_t CalculateNumberOfCPUs() { + // May not return correct number of cores if some are parked. + return std::thread::hardware_concurrency(); } +#endif +} // namespace FEXCore::CPUInfo diff --git a/FEXCore/Source/Utils/FileLoading.cpp b/FEXCore/Source/Utils/FileLoading.cpp index 71e6c83171..bd46a3396c 100644 --- a/FEXCore/Source/Utils/FileLoading.cpp +++ b/FEXCore/Source/Utils/FileLoading.cpp @@ -14,34 +14,32 @@ namespace FEXCore::FileLoading { #ifndef _WIN32 template -static bool LoadFileImpl(T &Data, const fextl::string &Filepath, size_t FixedSize) { +static bool LoadFileImpl(T& Data, const fextl::string& Filepath, size_t FixedSize) { int FD = open(Filepath.c_str(), O_RDONLY); if (FD == -1) { return false; } - size_t FileSize{}; + size_t FileSize {}; if (FixedSize == 0) { struct stat buf; if (fstat(FD, &buf) == 0) { FileSize = buf.st_size; } - } - else { + } else { FileSize = FixedSize; } ssize_t Read = -1; - bool LoadedFile{}; + bool LoadedFile {}; if (FileSize) { // File size is known upfront Data.resize(FileSize); Read = pread(FD, &Data.at(0), FileSize, 0); LoadedFile = Read == FileSize; - } - else { + } else { // The file is either empty or its size is unknown (e.g. procfs data). // Try reading in chunks instead ssize_t CurrentOffset = 0; @@ -68,7 +66,7 @@ static bool LoadFileImpl(T &Data, const fextl::string &Filepath, size_t FixedSiz return LoadedFile; } -ssize_t LoadFileToBuffer(const fextl::string &Filepath, std::span Buffer) { +ssize_t LoadFileToBuffer(const fextl::string& Filepath, std::span Buffer) { int FD = open(Filepath.c_str(), O_RDONLY); if (FD == -1) { @@ -82,7 +80,7 @@ ssize_t LoadFileToBuffer(const fextl::string &Filepath, std::span Buffer) #else template -static bool LoadFileImpl(T &Data, const fextl::string &Filepath, size_t FixedSize) { +static bool LoadFileImpl(T& Data, const fextl::string& Filepath, size_t FixedSize) { std::ifstream f(Filepath, std::ios::binary | std::ios::ate); if (f.fail()) { return false; @@ -94,19 +92,19 @@ static bool LoadFileImpl(T &Data, const fextl::string &Filepath, size_t FixedSiz return !f.fail(); } -ssize_t LoadFileToBuffer(const fextl::string &Filepath, std::span Buffer) { +ssize_t LoadFileToBuffer(const fextl::string& Filepath, std::span Buffer) { std::ifstream f(Filepath, std::ios::binary | std::ios::ate); return f.readsome(Buffer.data(), Buffer.size()); } #endif -bool LoadFile(fextl::vector &Data, const fextl::string &Filepath, size_t FixedSize) { +bool LoadFile(fextl::vector& Data, const fextl::string& Filepath, size_t FixedSize) { return LoadFileImpl(Data, Filepath, FixedSize); } -bool LoadFile(fextl::string &Data, const fextl::string &Filepath, size_t FixedSize) { +bool LoadFile(fextl::string& Data, const fextl::string& Filepath, size_t FixedSize) { return LoadFileImpl(Data, Filepath, FixedSize); } -} +} // namespace FEXCore::FileLoading diff --git a/FEXCore/Source/Utils/ForcedAssert.cpp b/FEXCore/Source/Utils/ForcedAssert.cpp index e9f38c0989..ef27f7182a 100644 --- a/FEXCore/Source/Utils/ForcedAssert.cpp +++ b/FEXCore/Source/Utils/ForcedAssert.cpp @@ -1,15 +1,12 @@ // SPDX-License-Identifier: MIT namespace FEXCore::Assert { - // This function can not be inlined - [[noreturn]] - __attribute__((noinline, naked)) - void ForcedAssert() { +// This function can not be inlined +[[noreturn]] +__attribute__((noinline, naked)) void ForcedAssert() { #ifdef _M_X86_64 - asm volatile("ud2"); + asm volatile("ud2"); #else - asm volatile("hlt #1"); + asm volatile("hlt #1"); #endif - } } - - +} // namespace FEXCore::Assert diff --git a/FEXCore/Source/Utils/LogManager.cpp b/FEXCore/Source/Utils/LogManager.cpp index 74697f5ffd..b0cfd75e22 100644 --- a/FEXCore/Source/Utils/LogManager.cpp +++ b/FEXCore/Source/Utils/LogManager.cpp @@ -17,33 +17,41 @@ tags: glue|log-manager namespace LogMan { namespace Throw { -fextl::vector Handlers; -void InstallHandler(ThrowHandler Handler) { Handlers.emplace_back(Handler); } -void UnInstallHandlers() { Handlers.clear(); } + fextl::vector Handlers; + void InstallHandler(ThrowHandler Handler) { + Handlers.emplace_back(Handler); + } + void UnInstallHandlers() { + Handlers.clear(); + } -void MFmt(const char *fmt, const fmt::format_args& args) { - auto msg = fextl::fmt::vformat(fmt, args); + void MFmt(const char* fmt, const fmt::format_args& args) { + auto msg = fextl::fmt::vformat(fmt, args); - for (auto& Handler : Handlers) { - Handler(msg.c_str()); - } + for (auto& Handler : Handlers) { + Handler(msg.c_str()); + } - FEX_TRAP_EXECUTION; -} + FEX_TRAP_EXECUTION; + } } // namespace Throw namespace Msg { -fextl::vector Handlers; -void InstallHandler(MsgHandler Handler) { Handlers.emplace_back(Handler); } -void UnInstallHandlers() { Handlers.clear(); } + fextl::vector Handlers; + void InstallHandler(MsgHandler Handler) { + Handlers.emplace_back(Handler); + } + void UnInstallHandlers() { + Handlers.clear(); + } -void MFmtImpl(DebugLevels level, const char* fmt, const fmt::format_args& args) { - const auto msg = fextl::fmt::vformat(fmt, args); + void MFmtImpl(DebugLevels level, const char* fmt, const fmt::format_args& args) { + const auto msg = fextl::fmt::vformat(fmt, args); - for (auto& Handler : Handlers) { - Handler(level, msg.c_str()); + for (auto& Handler : Handlers) { + Handler(level, msg.c_str()); + } } -} } // namespace Msg } // namespace LogMan diff --git a/FEXCore/Source/Utils/Profiler.cpp b/FEXCore/Source/Utils/Profiler.cpp index eae4976742..8fe5c04291 100644 --- a/FEXCore/Source/Utils/Profiler.cpp +++ b/FEXCore/Source/Utils/Profiler.cpp @@ -20,71 +20,70 @@ #ifdef ENABLE_FEXCORE_PROFILER #if FEXCORE_PROFILER_BACKEND == BACKEND_GPUVIS namespace FEXCore::Profiler { - ProfilerBlock::ProfilerBlock(std::string_view const Format) - : DurationBegin {GetTime()} - , Format {Format} { - } +ProfilerBlock::ProfilerBlock(const std::string_view Format) + : DurationBegin {GetTime()} + , Format {Format} {} - ProfilerBlock::~ProfilerBlock() { - auto Duration = GetTime() - DurationBegin; - TraceObject(Format, Duration); - } +ProfilerBlock::~ProfilerBlock() { + auto Duration = GetTime() - DurationBegin; + TraceObject(Format, Duration); } +} // namespace FEXCore::Profiler namespace GPUVis { - // ftrace FD for writing trace data. - // Needs to be a raw FD since we hold this open for the entire application execution. - static int TraceFD {-1}; - - // Need to search the paths to find the real trace path - static std::array TraceFSDirectories { - "/sys/kernel/tracing", - "/sys/kernel/debug/tracing", - }; - - static bool IsTraceFS(char const* Path) { - struct statfs stat; - if (statfs(Path, &stat)) { - return false; - } - return stat.f_type == TRACEFS_MAGIC; +// ftrace FD for writing trace data. +// Needs to be a raw FD since we hold this open for the entire application execution. +static int TraceFD {-1}; + +// Need to search the paths to find the real trace path +static std::array TraceFSDirectories { + "/sys/kernel/tracing", + "/sys/kernel/debug/tracing", +}; + +static bool IsTraceFS(const char* Path) { + struct statfs stat; + if (statfs(Path, &stat)) { + return false; } + return stat.f_type == TRACEFS_MAGIC; +} - void Init() { - for (auto Path : TraceFSDirectories) { - if (IsTraceFS(Path)) { - fextl::string FilePath = fextl::fmt::format("{}/trace_marker", Path); - TraceFD = open(FilePath.c_str(), O_WRONLY | O_CLOEXEC); - if (TraceFD != -1) { - // Opened TraceFD, early exit - break; - } +void Init() { + for (auto Path : TraceFSDirectories) { + if (IsTraceFS(Path)) { + fextl::string FilePath = fextl::fmt::format("{}/trace_marker", Path); + TraceFD = open(FilePath.c_str(), O_WRONLY | O_CLOEXEC); + if (TraceFD != -1) { + // Opened TraceFD, early exit + break; } } } +} - void Shutdown() { - if (TraceFD != -1) { - close(TraceFD); - TraceFD = -1; - } +void Shutdown() { + if (TraceFD != -1) { + close(TraceFD); + TraceFD = -1; } +} - void TraceObject(std::string_view const Format, uint64_t Duration) { - if (TraceFD != -1) { - // Print the duration as something that began negative duration ago - fextl::string Event = fextl::fmt::format("{} (lduration=-{})\n", Format, Duration); - write(TraceFD, Event.c_str(), Event.size()); - } +void TraceObject(const std::string_view Format, uint64_t Duration) { + if (TraceFD != -1) { + // Print the duration as something that began negative duration ago + fextl::string Event = fextl::fmt::format("{} (lduration=-{})\n", Format, Duration); + write(TraceFD, Event.c_str(), Event.size()); } +} - void TraceObject(std::string_view const Format) { - if (TraceFD != -1) { - fextl::string Event = fextl::fmt::format("{}\n", Format); - write(TraceFD, Format.data(), Format.size()); - } +void TraceObject(const std::string_view Format) { + if (TraceFD != -1) { + fextl::string Event = fextl::fmt::format("{}\n", Format); + write(TraceFD, Format.data(), Format.size()); } } +} // namespace GPUVis #else #error Unknown profiler backend #endif @@ -92,29 +91,28 @@ namespace GPUVis { namespace FEXCore::Profiler { #ifdef ENABLE_FEXCORE_PROFILER - void Init() { +void Init() { #if FEXCORE_PROFILER_BACKEND == BACKEND_GPUVIS - GPUVis::Init(); + GPUVis::Init(); #endif - } +} - void Shutdown() { +void Shutdown() { #if FEXCORE_PROFILER_BACKEND == BACKEND_GPUVIS - GPUVis::Shutdown(); + GPUVis::Shutdown(); #endif - } +} - void TraceObject(std::string_view const Format, uint64_t Duration) { +void TraceObject(const std::string_view Format, uint64_t Duration) { #if FEXCORE_PROFILER_BACKEND == BACKEND_GPUVIS - GPUVis::TraceObject(Format, Duration); + GPUVis::TraceObject(Format, Duration); #endif - } +} - void TraceObject(std::string_view const Format) { +void TraceObject(const std::string_view Format) { #if FEXCORE_PROFILER_BACKEND == BACKEND_GPUVIS - GPUVis::TraceObject(Format); -#endif - - } + GPUVis::TraceObject(Format); #endif } +#endif +} // namespace FEXCore::Profiler diff --git a/FEXCore/Source/Utils/SpinWaitLock.cpp b/FEXCore/Source/Utils/SpinWaitLock.cpp index b2f011adec..f6b2ff416a 100644 --- a/FEXCore/Source/Utils/SpinWaitLock.cpp +++ b/FEXCore/Source/Utils/SpinWaitLock.cpp @@ -2,26 +2,25 @@ namespace FEXCore::Utils::SpinWaitLock { #ifdef _M_ARM_64 - constexpr uint64_t NanosecondsInSecond = 1'000'000'000ULL; +constexpr uint64_t NanosecondsInSecond = 1'000'000'000ULL; - static uint32_t GetCycleCounterFrequency() { - uint64_t Result{}; - __asm("mrs %[Res], CNTFRQ_EL0" - : [Res] "=r" (Result)); - return Result; - } +static uint32_t GetCycleCounterFrequency() { + uint64_t Result {}; + __asm("mrs %[Res], CNTFRQ_EL0" : [Res] "=r"(Result)); + return Result; +} - static uint64_t CalculateCyclesPerNanosecond() { - // Snapdragon devices historically use a 19.2Mhz cycle counter frequency - // This means that the number of cycles per nanosecond ends up being 52.0833... - // - // ARMv8.6 and ARMv9.1 requires the cycle counter frequency to be 1Ghz. - // This means the number of cycles per nanosecond ends up being 1. - uint64_t CounterFrequency = GetCycleCounterFrequency(); - return NanosecondsInSecond / CounterFrequency; - } +static uint64_t CalculateCyclesPerNanosecond() { + // Snapdragon devices historically use a 19.2Mhz cycle counter frequency + // This means that the number of cycles per nanosecond ends up being 52.0833... + // + // ARMv8.6 and ARMv9.1 requires the cycle counter frequency to be 1Ghz. + // This means the number of cycles per nanosecond ends up being 1. + uint64_t CounterFrequency = GetCycleCounterFrequency(); + return NanosecondsInSecond / CounterFrequency; +} - uint32_t CycleCounterFrequency = GetCycleCounterFrequency(); - uint64_t CyclesPerNanosecond = CalculateCyclesPerNanosecond(); +uint32_t CycleCounterFrequency = GetCycleCounterFrequency(); +uint64_t CyclesPerNanosecond = CalculateCyclesPerNanosecond(); #endif -} +} // namespace FEXCore::Utils::SpinWaitLock diff --git a/FEXCore/Source/Utils/Telemetry.cpp b/FEXCore/Source/Utils/Telemetry.cpp index c234d390a8..3b6e892e77 100644 --- a/FEXCore/Source/Utils/Telemetry.cpp +++ b/FEXCore/Source/Utils/Telemetry.cpp @@ -14,76 +14,73 @@ namespace FEXCore::Telemetry { #ifndef FEX_DISABLE_TELEMETRY - static std::array TelemetryValues = {{ }}; - const std::array TelemetryNames { - "64byte Split Locks", - "16byte Split atomics", - "VEX instructions (AVX)", - "EVEX instructions (AVX512)", - "16bit CAS Tear", - "32bit CAS Tear", - "64bit CAS Tear", - "128bit CAS Tear", - "Crash mask", - "Write 32-bit Segment ES", - "Write 32-bit Segment SS", - "Write 32-bit Segment CS", - "Write 32-bit Segment DS", - "Uses 32-bit Segment ES", - "Uses 32-bit Segment SS", - "Uses 32-bit Segment CS", - "Uses 32-bit Segment DS", - }; +static std::array TelemetryValues = {{}}; +const std::array TelemetryNames { + "64byte Split Locks", + "16byte Split atomics", + "VEX instructions (AVX)", + "EVEX instructions (AVX512)", + "16bit CAS Tear", + "32bit CAS Tear", + "64bit CAS Tear", + "128bit CAS Tear", + "Crash mask", + "Write 32-bit Segment ES", + "Write 32-bit Segment SS", + "Write 32-bit Segment CS", + "Write 32-bit Segment DS", + "Uses 32-bit Segment ES", + "Uses 32-bit Segment SS", + "Uses 32-bit Segment CS", + "Uses 32-bit Segment DS", +}; - static bool Enabled {true}; - void Initialize() { - FEX_CONFIG_OPT(DisableTelemetry, DISABLETELEMETRY); - if (DisableTelemetry) { - Enabled = false; - return; - } +static bool Enabled {true}; +void Initialize() { + FEX_CONFIG_OPT(DisableTelemetry, DISABLETELEMETRY); + if (DisableTelemetry) { + Enabled = false; + return; + } - auto DataDirectory = Config::GetDataDirectory(); - DataDirectory += "Telemetry/"; + auto DataDirectory = Config::GetDataDirectory(); + DataDirectory += "Telemetry/"; - // Ensure the folder structure is created for our configuration - if (!FHU::Filesystem::Exists(DataDirectory) && - !FHU::Filesystem::CreateDirectories(DataDirectory)) { - LogMan::Msg::IFmt("Couldn't create telemetry Folder"); - } + // Ensure the folder structure is created for our configuration + if (!FHU::Filesystem::Exists(DataDirectory) && !FHU::Filesystem::CreateDirectories(DataDirectory)) { + LogMan::Msg::IFmt("Couldn't create telemetry Folder"); } +} - void Shutdown(fextl::string const &ApplicationName) { - if (!Enabled) { - return; - } +void Shutdown(const fextl::string& ApplicationName) { + if (!Enabled) { + return; + } - auto DataDirectory = Config::GetDataDirectory(); - DataDirectory += "Telemetry/" + ApplicationName + ".telem"; + auto DataDirectory = Config::GetDataDirectory(); + DataDirectory += "Telemetry/" + ApplicationName + ".telem"; - if (FHU::Filesystem::Exists(DataDirectory)) { - // If the file exists, retain a single backup - auto Backup = DataDirectory + ".1"; - FHU::Filesystem::CopyFile(DataDirectory, Backup, FHU::Filesystem::CopyOptions::OVERWRITE_EXISTING); - } + if (FHU::Filesystem::Exists(DataDirectory)) { + // If the file exists, retain a single backup + auto Backup = DataDirectory + ".1"; + FHU::Filesystem::CopyFile(DataDirectory, Backup, FHU::Filesystem::CopyOptions::OVERWRITE_EXISTING); + } - auto File = FEXCore::File::File(DataDirectory.c_str(), - FEXCore::File::FileModes::WRITE | - FEXCore::File::FileModes::CREATE | - FEXCore::File::FileModes::TRUNCATE); + auto File = FEXCore::File::File(DataDirectory.c_str(), + FEXCore::File::FileModes::WRITE | FEXCore::File::FileModes::CREATE | FEXCore::File::FileModes::TRUNCATE); - if (File.IsValid()) { - for (size_t i = 0; i < TelemetryType::TYPE_LAST; ++i) { - auto &Name = TelemetryNames.at(i); - auto &Data = TelemetryValues.at(i); - fextl::fmt::print(File, "{}: {}\n", Name, *Data); - } - File.Flush(); + if (File.IsValid()) { + for (size_t i = 0; i < TelemetryType::TYPE_LAST; ++i) { + auto& Name = TelemetryNames.at(i); + auto& Data = TelemetryValues.at(i); + fextl::fmt::print(File, "{}: {}\n", Name, *Data); } + File.Flush(); } +} - Value &GetTelemetryValue(TelemetryType Type) { - return TelemetryValues.at(Type); - } -#endif +Value& GetTelemetryValue(TelemetryType Type) { + return TelemetryValues.at(Type); } +#endif +} // namespace FEXCore::Telemetry diff --git a/FEXCore/Source/Utils/Threads.cpp b/FEXCore/Source/Utils/Threads.cpp index 1e826f84e3..828faf7285 100644 --- a/FEXCore/Source/Utils/Threads.cpp +++ b/FEXCore/Source/Utils/Threads.cpp @@ -18,47 +18,43 @@ #endif namespace FEXCore::Threads { - fextl::unique_ptr CreateThread_Default( - ThreadFunc Func, - void* Arg) { - ERROR_AND_DIE_FMT("Frontend didn't setup thread creation!"); - } +fextl::unique_ptr CreateThread_Default(ThreadFunc Func, void* Arg) { + ERROR_AND_DIE_FMT("Frontend didn't setup thread creation!"); +} - void CleanupAfterFork_Default() { - ERROR_AND_DIE_FMT("Frontend didn't setup thread creation!"); - } +void CleanupAfterFork_Default() { + ERROR_AND_DIE_FMT("Frontend didn't setup thread creation!"); +} - static FEXCore::Threads::Pointers Ptrs = { - .CreateThread = CreateThread_Default, - .CleanupAfterFork = CleanupAfterFork_Default, - }; +static FEXCore::Threads::Pointers Ptrs = { + .CreateThread = CreateThread_Default, + .CleanupAfterFork = CleanupAfterFork_Default, +}; - fextl::unique_ptr FEXCore::Threads::Thread::Create( - ThreadFunc Func, - void* Arg) { - return Ptrs.CreateThread(Func, Arg); - } +fextl::unique_ptr FEXCore::Threads::Thread::Create(ThreadFunc Func, void* Arg) { + return Ptrs.CreateThread(Func, Arg); +} - void FEXCore::Threads::Thread::CleanupAfterFork() { - return Ptrs.CleanupAfterFork(); - } +void FEXCore::Threads::Thread::CleanupAfterFork() { + return Ptrs.CleanupAfterFork(); +} - void FEXCore::Threads::Thread::SetInternalPointers(Pointers const &_Ptrs) { - memcpy(&Ptrs, &_Ptrs, sizeof(FEXCore::Threads::Pointers)); - } +void FEXCore::Threads::Thread::SetInternalPointers(const Pointers& _Ptrs) { + memcpy(&Ptrs, &_Ptrs, sizeof(FEXCore::Threads::Pointers)); +} - uint64_t SetSignalMask(uint64_t Mask) { +uint64_t SetSignalMask(uint64_t Mask) { #ifndef _WIN32 - ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &Mask, &Mask, 8); - return Mask; + ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &Mask, &Mask, 8); + return Mask; #endif - } +} - void SetThreadName(const char *name) { +void SetThreadName(const char* name) { #ifndef _WIN32 pthread_setname_np(pthread_self(), name); #else // TODO: #endif - } } +} // namespace FEXCore::Threads diff --git a/FEXCore/unittests/APITests/FutexSpinTest.cpp b/FEXCore/unittests/APITests/FutexSpinTest.cpp index 9871b318cc..7de98019f1 100644 --- a/FEXCore/unittests/APITests/FutexSpinTest.cpp +++ b/FEXCore/unittests/APITests/FutexSpinTest.cpp @@ -6,7 +6,7 @@ constexpr auto SleepAmount = std::chrono::milliseconds(250); TEST_CASE("FutexSpin-Timed-8bit") { - uint8_t Test{}; + uint8_t Test {}; auto now = std::chrono::high_resolution_clock::now(); FEXCore::Utils::SpinWaitLock::Wait(&Test, 1, SleepAmount); @@ -20,8 +20,8 @@ TEST_CASE("FutexSpin-Timed-8bit") { TEST_CASE("FutexSpin-Sleep-8bit") { constexpr auto SleepAmount = std::chrono::seconds(1); - uint8_t Test{}; - std::atomic ActualSpinLoop{}; + uint8_t Test {}; + std::atomic ActualSpinLoop {}; std::chrono::nanoseconds SleptAmount; std::thread t([&Test, &SleptAmount, &ActualSpinLoop]() { @@ -33,7 +33,8 @@ TEST_CASE("FutexSpin-Sleep-8bit") { }); // Wait until the second thread lets us know to stop waiting sleeping. - while(ActualSpinLoop.load() == 0); + while (ActualSpinLoop.load() == 0) + ; // sleep this thread for the sleep amount. std::this_thread::sleep_for(SleepAmount); @@ -49,7 +50,7 @@ TEST_CASE("FutexSpin-Sleep-8bit") { } TEST_CASE("FutexSpin-Timed-16bit") { - uint16_t Test{}; + uint16_t Test {}; auto now = std::chrono::high_resolution_clock::now(); FEXCore::Utils::SpinWaitLock::Wait(&Test, 1, SleepAmount); @@ -61,7 +62,7 @@ TEST_CASE("FutexSpin-Timed-16bit") { } TEST_CASE("FutexSpin-Timed-32bit") { - uint32_t Test{}; + uint32_t Test {}; auto now = std::chrono::high_resolution_clock::now(); FEXCore::Utils::SpinWaitLock::Wait(&Test, 1, SleepAmount); @@ -73,7 +74,7 @@ TEST_CASE("FutexSpin-Timed-32bit") { } TEST_CASE("FutexSpin-Timed-64bit") { - uint64_t Test{}; + uint64_t Test {}; auto now = std::chrono::high_resolution_clock::now(); FEXCore::Utils::SpinWaitLock::Wait(&Test, 1, SleepAmount); diff --git a/FEXCore/unittests/Emitter/ALU_Tests.cpp b/FEXCore/unittests/Emitter/ALU_Tests.cpp index a5d746db5b..a1a58458a2 100644 --- a/FEXCore/unittests/Emitter/ALU_Tests.cpp +++ b/FEXCore/unittests/Emitter/ALU_Tests.cpp @@ -226,7 +226,6 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: PC relative") { CHECK(DisassembleEncoding(0) == 0xb000081e); CHECK(DisassembleEncoding(1) == 0x910013de); } - } TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Add/subtract immediate") { TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, 0, false), "add w29, w28, #0x0 (0)"); @@ -371,9 +370,9 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Logical immediate") { TEST_SINGLE(eor(Size::i64Bit, Reg::r29, Reg::r28, 1), "eor x29, x28, #0x1"); TEST_SINGLE(eor(Size::i64Bit, Reg::r29, Reg::r28, -2), "eor x29, x28, #0xfffffffffffffffe"); - TEST_SINGLE(tst(Size::i32Bit, Reg::r28, 1), "tst w28, #0x1"); + TEST_SINGLE(tst(Size::i32Bit, Reg::r28, 1), "tst w28, #0x1"); TEST_SINGLE(tst(Size::i32Bit, Reg::r28, -2), "tst w28, #0xfffffffe"); - TEST_SINGLE(tst(Size::i64Bit, Reg::r28, 1), "tst x28, #0x1"); + TEST_SINGLE(tst(Size::i64Bit, Reg::r28, 1), "tst x28, #0x1"); TEST_SINGLE(tst(Size::i64Bit, Reg::r28, -2), "tst x28, #0xfffffffffffffffe"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Move wide immediate") { @@ -442,28 +441,28 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Bitfield") { TEST_SINGLE(asr(Size::i32Bit, Reg::r29, Reg::r28, 17), "asr w29, w28, #17"); TEST_SINGLE(asr(Size::i64Bit, Reg::r29, Reg::r28, 17), "asr x29, x28, #17"); - TEST_SINGLE(bfc(Size::i32Bit, Reg::r29, 4, 3), "bfc w29, #4, #3"); + TEST_SINGLE(bfc(Size::i32Bit, Reg::r29, 4, 3), "bfc w29, #4, #3"); TEST_SINGLE(bfc(Size::i32Bit, Reg::r29, 27, 3), "bfc w29, #27, #3"); - TEST_SINGLE(bfc(Size::i64Bit, Reg::r29, 4, 3), "bfc x29, #4, #3"); + TEST_SINGLE(bfc(Size::i64Bit, Reg::r29, 4, 3), "bfc x29, #4, #3"); TEST_SINGLE(bfc(Size::i64Bit, Reg::r29, 57, 3), "bfc x29, #57, #3"); - TEST_SINGLE(bfxil(Size::i32Bit, Reg::r29, Reg::r28, 4, 3), "bfxil w29, w28, #4, #3"); + TEST_SINGLE(bfxil(Size::i32Bit, Reg::r29, Reg::r28, 4, 3), "bfxil w29, w28, #4, #3"); TEST_SINGLE(bfxil(Size::i32Bit, Reg::r29, Reg::r28, 27, 3), "bfxil w29, w28, #27, #3"); - - TEST_SINGLE(bfxil(Size::i64Bit, Reg::r29, Reg::r28, 4, 3), "bfxil x29, x28, #4, #3"); + + TEST_SINGLE(bfxil(Size::i64Bit, Reg::r29, Reg::r28, 4, 3), "bfxil x29, x28, #4, #3"); TEST_SINGLE(bfxil(Size::i64Bit, Reg::r29, Reg::r28, 57, 3), "bfxil x29, x28, #57, #3"); - TEST_SINGLE(sbfiz(Size::i32Bit, Reg::r29, Reg::r28, 5, 3), "sbfiz w29, w28, #5, #3"); + TEST_SINGLE(sbfiz(Size::i32Bit, Reg::r29, Reg::r28, 5, 3), "sbfiz w29, w28, #5, #3"); TEST_SINGLE(sbfiz(Size::i32Bit, Reg::r29, Reg::r28, 27, 3), "sbfiz w29, w28, #27, #3"); - TEST_SINGLE(sbfiz(Size::i64Bit, Reg::r29, Reg::r28, 5, 3), "sbfiz x29, x28, #5, #3"); + TEST_SINGLE(sbfiz(Size::i64Bit, Reg::r29, Reg::r28, 5, 3), "sbfiz x29, x28, #5, #3"); TEST_SINGLE(sbfiz(Size::i64Bit, Reg::r29, Reg::r28, 54, 3), "sbfiz x29, x28, #54, #3"); - TEST_SINGLE(ubfiz(Size::i32Bit, Reg::r29, Reg::r28, 5, 3), "ubfiz w29, w28, #5, #3"); + TEST_SINGLE(ubfiz(Size::i32Bit, Reg::r29, Reg::r28, 5, 3), "ubfiz w29, w28, #5, #3"); TEST_SINGLE(ubfiz(Size::i32Bit, Reg::r29, Reg::r28, 27, 3), "ubfiz w29, w28, #27, #3"); - TEST_SINGLE(ubfiz(Size::i64Bit, Reg::r29, Reg::r28, 5, 3), "ubfiz x29, x28, #5, #3"); + TEST_SINGLE(ubfiz(Size::i64Bit, Reg::r29, Reg::r28, 5, 3), "ubfiz x29, x28, #5, #3"); TEST_SINGLE(ubfiz(Size::i64Bit, Reg::r29, Reg::r28, 54, 3), "ubfiz x29, x28, #54, #3"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Extract") { @@ -1691,8 +1690,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: AddSub - with carry") { TEST_SINGLE(sbcs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27), "sbcs w29, w28, w27"); TEST_SINGLE(sbcs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27), "sbcs x29, x28, x27"); - TEST_SINGLE(ngc(Size::i32Bit, Reg::r29, Reg::r27), "ngc w29, w27"); - TEST_SINGLE(ngc(Size::i64Bit, Reg::r29, Reg::r27), "ngc x29, x27"); + TEST_SINGLE(ngc(Size::i32Bit, Reg::r29, Reg::r27), "ngc w29, w27"); + TEST_SINGLE(ngc(Size::i64Bit, Reg::r29, Reg::r27), "ngc x29, x27"); TEST_SINGLE(ngcs(Size::i32Bit, Reg::r29, Reg::r27), "ngcs w29, w27"); TEST_SINGLE(ngcs(Size::i64Bit, Reg::r29, Reg::r27), "ngcs x29, x27"); @@ -1706,7 +1705,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Rotate right into flags") { TEST_SINGLE(rmif(XReg::x30, 63, 0b1111), "rmif x30, #63, #NZCV"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Evaluate into flags") { - TEST_SINGLE(setf8(WReg::w30), "setf8 w30"); + TEST_SINGLE(setf8(WReg::w30), "setf8 w30"); TEST_SINGLE(setf16(WReg::w30), "setf16 w30"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Carry flag invert") { diff --git a/FEXCore/unittests/Emitter/ASIMD_Tests.cpp b/FEXCore/unittests/Emitter/ASIMD_Tests.cpp index 0b14a15778..f9aa944d82 100644 --- a/FEXCore/unittests/Emitter/ASIMD_Tests.cpp +++ b/FEXCore/unittests/Emitter/ASIMD_Tests.cpp @@ -40,35 +40,41 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD table lookup") TEST_SINGLE(tbx(QReg::q30, QReg::q26, QReg::q25), "tbx v30.16b, {v26.16b}, v25.16b"); TEST_SINGLE(tbx(DReg::d30, QReg::q26, DReg::d25), "tbx v30.8b, {v26.16b}, v25.8b"); - TEST_SINGLE(tbl(QReg::q30, QReg::q31, QReg::q0, QReg::q25), "tbl v30.16b, {v31.16b, v0.16b}, v25.16b"); - TEST_SINGLE(tbl(DReg::d30, QReg::q31, QReg::q0, DReg::d25), "tbl v30.8b, {v31.16b, v0.16b}, v25.8b"); + TEST_SINGLE(tbl(QReg::q30, QReg::q31, QReg::q0, QReg::q25), "tbl v30.16b, {v31.16b, v0.16b}, v25.16b"); + TEST_SINGLE(tbl(DReg::d30, QReg::q31, QReg::q0, DReg::d25), "tbl v30.8b, {v31.16b, v0.16b}, v25.8b"); TEST_SINGLE(tbl(QReg::q30, QReg::q26, QReg::q27, QReg::q25), "tbl v30.16b, {v26.16b, v27.16b}, v25.16b"); TEST_SINGLE(tbl(DReg::d30, QReg::q26, QReg::q27, DReg::d25), "tbl v30.8b, {v26.16b, v27.16b}, v25.8b"); - TEST_SINGLE(tbx(QReg::q30, QReg::q31, QReg::q0, QReg::q25), "tbx v30.16b, {v31.16b, v0.16b}, v25.16b"); - TEST_SINGLE(tbx(DReg::d30, QReg::q31, QReg::q0, DReg::d25), "tbx v30.8b, {v31.16b, v0.16b}, v25.8b"); + TEST_SINGLE(tbx(QReg::q30, QReg::q31, QReg::q0, QReg::q25), "tbx v30.16b, {v31.16b, v0.16b}, v25.16b"); + TEST_SINGLE(tbx(DReg::d30, QReg::q31, QReg::q0, DReg::d25), "tbx v30.8b, {v31.16b, v0.16b}, v25.8b"); TEST_SINGLE(tbx(QReg::q30, QReg::q26, QReg::q27, QReg::q25), "tbx v30.16b, {v26.16b, v27.16b}, v25.16b"); TEST_SINGLE(tbx(DReg::d30, QReg::q26, QReg::q27, DReg::d25), "tbx v30.8b, {v26.16b, v27.16b}, v25.8b"); - TEST_SINGLE(tbl(QReg::q30, QReg::q31, QReg::q0, QReg::q1, QReg::q25), "tbl v30.16b, {v31.16b, v0.16b, v1.16b}, v25.16b"); - TEST_SINGLE(tbl(DReg::d30, QReg::q31, QReg::q0, QReg::q1, DReg::d25), "tbl v30.8b, {v31.16b, v0.16b, v1.16b}, v25.8b"); + TEST_SINGLE(tbl(QReg::q30, QReg::q31, QReg::q0, QReg::q1, QReg::q25), "tbl v30.16b, {v31.16b, v0.16b, v1.16b}, v25.16b"); + TEST_SINGLE(tbl(DReg::d30, QReg::q31, QReg::q0, QReg::q1, DReg::d25), "tbl v30.8b, {v31.16b, v0.16b, v1.16b}, v25.8b"); TEST_SINGLE(tbl(QReg::q30, QReg::q26, QReg::q27, QReg::q28, QReg::q25), "tbl v30.16b, {v26.16b, v27.16b, v28.16b}, v25.16b"); TEST_SINGLE(tbl(DReg::d30, QReg::q26, QReg::q27, QReg::q28, DReg::d25), "tbl v30.8b, {v26.16b, v27.16b, v28.16b}, v25.8b"); - TEST_SINGLE(tbx(QReg::q30, QReg::q31, QReg::q0, QReg::q1, QReg::q25), "tbx v30.16b, {v31.16b, v0.16b, v1.16b}, v25.16b"); - TEST_SINGLE(tbx(DReg::d30, QReg::q31, QReg::q0, QReg::q1, DReg::d25), "tbx v30.8b, {v31.16b, v0.16b, v1.16b}, v25.8b"); + TEST_SINGLE(tbx(QReg::q30, QReg::q31, QReg::q0, QReg::q1, QReg::q25), "tbx v30.16b, {v31.16b, v0.16b, v1.16b}, v25.16b"); + TEST_SINGLE(tbx(DReg::d30, QReg::q31, QReg::q0, QReg::q1, DReg::d25), "tbx v30.8b, {v31.16b, v0.16b, v1.16b}, v25.8b"); TEST_SINGLE(tbx(QReg::q30, QReg::q26, QReg::q27, QReg::q28, QReg::q25), "tbx v30.16b, {v26.16b, v27.16b, v28.16b}, v25.16b"); TEST_SINGLE(tbx(DReg::d30, QReg::q26, QReg::q27, QReg::q28, DReg::d25), "tbx v30.8b, {v26.16b, v27.16b, v28.16b}, v25.8b"); - TEST_SINGLE(tbl(QReg::q30, QReg::q31, QReg::q0, QReg::q1, QReg::q2, QReg::q25), "tbl v30.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v25.16b"); - TEST_SINGLE(tbl(DReg::d30, QReg::q31, QReg::q0, QReg::q1, QReg::q2, DReg::d25), "tbl v30.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v25.8b"); - TEST_SINGLE(tbl(QReg::q30, QReg::q26, QReg::q27, QReg::q28, QReg::q29, QReg::q25), "tbl v30.16b, {v26.16b, v27.16b, v28.16b, v29.16b}, v25.16b"); - TEST_SINGLE(tbl(DReg::d30, QReg::q26, QReg::q27, QReg::q28, QReg::q29, DReg::d25), "tbl v30.8b, {v26.16b, v27.16b, v28.16b, v29.16b}, v25.8b"); - - TEST_SINGLE(tbx(QReg::q30, QReg::q31, QReg::q0, QReg::q1, QReg::q2, QReg::q25), "tbx v30.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v25.16b"); - TEST_SINGLE(tbx(DReg::d30, QReg::q31, QReg::q0, QReg::q1, QReg::q2, DReg::d25), "tbx v30.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v25.8b"); - TEST_SINGLE(tbx(QReg::q30, QReg::q26, QReg::q27, QReg::q28, QReg::q29, QReg::q25), "tbx v30.16b, {v26.16b, v27.16b, v28.16b, v29.16b}, v25.16b"); - TEST_SINGLE(tbx(DReg::d30, QReg::q26, QReg::q27, QReg::q28, QReg::q29, DReg::d25), "tbx v30.8b, {v26.16b, v27.16b, v28.16b, v29.16b}, v25.8b"); + TEST_SINGLE(tbl(QReg::q30, QReg::q31, QReg::q0, QReg::q1, QReg::q2, QReg::q25), "tbl v30.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, " + "v25.16b"); + TEST_SINGLE(tbl(DReg::d30, QReg::q31, QReg::q0, QReg::q1, QReg::q2, DReg::d25), "tbl v30.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v25.8b"); + TEST_SINGLE(tbl(QReg::q30, QReg::q26, QReg::q27, QReg::q28, QReg::q29, QReg::q25), "tbl v30.16b, {v26.16b, v27.16b, v28.16b, v29.16b}, " + "v25.16b"); + TEST_SINGLE(tbl(DReg::d30, QReg::q26, QReg::q27, QReg::q28, QReg::q29, DReg::d25), "tbl v30.8b, {v26.16b, v27.16b, v28.16b, v29.16b}, " + "v25.8b"); + + TEST_SINGLE(tbx(QReg::q30, QReg::q31, QReg::q0, QReg::q1, QReg::q2, QReg::q25), "tbx v30.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, " + "v25.16b"); + TEST_SINGLE(tbx(DReg::d30, QReg::q31, QReg::q0, QReg::q1, QReg::q2, DReg::d25), "tbx v30.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v25.8b"); + TEST_SINGLE(tbx(QReg::q30, QReg::q26, QReg::q27, QReg::q28, QReg::q29, QReg::q25), "tbx v30.16b, {v26.16b, v27.16b, v28.16b, v29.16b}, " + "v25.16b"); + TEST_SINGLE(tbx(DReg::d30, QReg::q26, QReg::q27, QReg::q28, QReg::q29, DReg::d25), "tbx v30.8b, {v26.16b, v27.16b, v28.16b, v29.16b}, " + "v25.8b"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD permute") { // Commented out lines showcase unallocated encodings. @@ -84,11 +90,11 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD permute") { TEST_SINGLE(uzp1(DReg::d30, DReg::d29, DReg::d28), "uzp1 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uzp1(DReg::d30, DReg::d29, DReg::d28), "uzp1 v30.4h, v29.4h, v28.4h"); TEST_SINGLE(uzp1(DReg::d30, DReg::d29, DReg::d28), "uzp1 v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(uzp1(DReg::d30, DReg::d29, DReg::d28), "uzp1 v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(uzp1(DReg::d30, DReg::d29, DReg::d28), "uzp1 v30.1d, v29.1d, v28.1d"); TEST_SINGLE(uzp1(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uzp1 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uzp1(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uzp1 v30.4h, v29.4h, v28.4h"); TEST_SINGLE(uzp1(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uzp1 v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(uzp1(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uzp1 v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(uzp1(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uzp1 v30.1d, v29.1d, v28.1d"); TEST_SINGLE(trn1(QReg::q30, QReg::q29, QReg::q28), "trn1 v30.16b, v29.16b, v28.16b"); TEST_SINGLE(trn1(QReg::q30, QReg::q29, QReg::q28), "trn1 v30.8h, v29.8h, v28.8h"); @@ -102,11 +108,11 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD permute") { TEST_SINGLE(trn1(DReg::d30, DReg::d29, DReg::d28), "trn1 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(trn1(DReg::d30, DReg::d29, DReg::d28), "trn1 v30.4h, v29.4h, v28.4h"); TEST_SINGLE(trn1(DReg::d30, DReg::d29, DReg::d28), "trn1 v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(trn1(DReg::d30, DReg::d29, DReg::d28), "trn1 v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(trn1(DReg::d30, DReg::d29, DReg::d28), "trn1 v30.1d, v29.1d, v28.1d"); TEST_SINGLE(trn1(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "trn1 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(trn1(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "trn1 v30.4h, v29.4h, v28.4h"); TEST_SINGLE(trn1(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "trn1 v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(trn1(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "trn1 v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(trn1(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "trn1 v30.1d, v29.1d, v28.1d"); TEST_SINGLE(zip1(QReg::q30, QReg::q29, QReg::q28), "zip1 v30.16b, v29.16b, v28.16b"); TEST_SINGLE(zip1(QReg::q30, QReg::q29, QReg::q28), "zip1 v30.8h, v29.8h, v28.8h"); @@ -120,11 +126,11 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD permute") { TEST_SINGLE(zip1(DReg::d30, DReg::d29, DReg::d28), "zip1 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(zip1(DReg::d30, DReg::d29, DReg::d28), "zip1 v30.4h, v29.4h, v28.4h"); TEST_SINGLE(zip1(DReg::d30, DReg::d29, DReg::d28), "zip1 v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(zip1(DReg::d30, DReg::d29, DReg::d28), "zip1 v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(zip1(DReg::d30, DReg::d29, DReg::d28), "zip1 v30.1d, v29.1d, v28.1d"); TEST_SINGLE(zip1(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "zip1 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(zip1(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "zip1 v30.4h, v29.4h, v28.4h"); TEST_SINGLE(zip1(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "zip1 v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(zip1(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "zip1 v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(zip1(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "zip1 v30.1d, v29.1d, v28.1d"); TEST_SINGLE(uzp2(QReg::q30, QReg::q29, QReg::q28), "uzp2 v30.16b, v29.16b, v28.16b"); TEST_SINGLE(uzp2(QReg::q30, QReg::q29, QReg::q28), "uzp2 v30.8h, v29.8h, v28.8h"); @@ -138,11 +144,11 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD permute") { TEST_SINGLE(uzp2(DReg::d30, DReg::d29, DReg::d28), "uzp2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uzp2(DReg::d30, DReg::d29, DReg::d28), "uzp2 v30.4h, v29.4h, v28.4h"); TEST_SINGLE(uzp2(DReg::d30, DReg::d29, DReg::d28), "uzp2 v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(uzp2(DReg::d30, DReg::d29, DReg::d28), "uzp2 v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(uzp2(DReg::d30, DReg::d29, DReg::d28), "uzp2 v30.1d, v29.1d, v28.1d"); TEST_SINGLE(uzp2(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uzp2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uzp2(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uzp2 v30.4h, v29.4h, v28.4h"); TEST_SINGLE(uzp2(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uzp2 v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(uzp2(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uzp2 v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(uzp2(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uzp2 v30.1d, v29.1d, v28.1d"); TEST_SINGLE(trn2(QReg::q30, QReg::q29, QReg::q28), "trn2 v30.16b, v29.16b, v28.16b"); TEST_SINGLE(trn2(QReg::q30, QReg::q29, QReg::q28), "trn2 v30.8h, v29.8h, v28.8h"); @@ -156,11 +162,11 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD permute") { TEST_SINGLE(trn2(DReg::d30, DReg::d29, DReg::d28), "trn2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(trn2(DReg::d30, DReg::d29, DReg::d28), "trn2 v30.4h, v29.4h, v28.4h"); TEST_SINGLE(trn2(DReg::d30, DReg::d29, DReg::d28), "trn2 v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(trn2(DReg::d30, DReg::d29, DReg::d28), "trn2 v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(trn2(DReg::d30, DReg::d29, DReg::d28), "trn2 v30.1d, v29.1d, v28.1d"); TEST_SINGLE(trn2(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "trn2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(trn2(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "trn2 v30.4h, v29.4h, v28.4h"); TEST_SINGLE(trn2(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "trn2 v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(trn2(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "trn2 v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(trn2(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "trn2 v30.1d, v29.1d, v28.1d"); TEST_SINGLE(zip2(QReg::q30, QReg::q29, QReg::q28), "zip2 v30.16b, v29.16b, v28.16b"); TEST_SINGLE(zip2(QReg::q30, QReg::q29, QReg::q28), "zip2 v30.8h, v29.8h, v28.8h"); @@ -174,11 +180,11 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD permute") { TEST_SINGLE(zip2(DReg::d30, DReg::d29, DReg::d28), "zip2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(zip2(DReg::d30, DReg::d29, DReg::d28), "zip2 v30.4h, v29.4h, v28.4h"); TEST_SINGLE(zip2(DReg::d30, DReg::d29, DReg::d28), "zip2 v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(zip2(DReg::d30, DReg::d29, DReg::d28), "zip2 v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(zip2(DReg::d30, DReg::d29, DReg::d28), "zip2 v30.1d, v29.1d, v28.1d"); TEST_SINGLE(zip2(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "zip2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(zip2(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "zip2 v30.4h, v29.4h, v28.4h"); TEST_SINGLE(zip2(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "zip2 v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(zip2(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "zip2 v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(zip2(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "zip2 v30.1d, v29.1d, v28.1d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD extract") { TEST_SINGLE(ext(QReg::q30, QReg::q29, QReg::q28, 0), "ext v30.16b, v29.16b, v28.16b, #0"); @@ -200,11 +206,11 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD copy") { TEST_SINGLE(dup(SubRegSize::i8Bit, DReg::d30, DReg::d29, 0), "dup v30.8b, v29.b[0]"); TEST_SINGLE(dup(SubRegSize::i16Bit, DReg::d30, DReg::d29, 0), "dup v30.4h, v29.h[0]"); TEST_SINGLE(dup(SubRegSize::i32Bit, DReg::d30, DReg::d29, 0), "dup v30.2s, v29.s[0]"); - //TEST_SINGLE(dup(SubRegSize::i64Bit, DReg::d30, DReg::d29, 0), "dup v30.1d, v29.d[0]"); + // TEST_SINGLE(dup(SubRegSize::i64Bit, DReg::d30, DReg::d29, 0), "dup v30.1d, v29.d[0]"); TEST_SINGLE(dup(SubRegSize::i8Bit, DReg::d30, DReg::d29, 15), "dup v30.8b, v29.b[15]"); TEST_SINGLE(dup(SubRegSize::i16Bit, DReg::d30, DReg::d29, 7), "dup v30.4h, v29.h[7]"); TEST_SINGLE(dup(SubRegSize::i32Bit, DReg::d30, DReg::d29, 3), "dup v30.2s, v29.s[3]"); - //TEST_SINGLE(dup(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "dup v30.1d, v29.d[1]"); + // TEST_SINGLE(dup(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "dup v30.1d, v29.d[1]"); TEST_SINGLE(dup(SubRegSize::i8Bit, QReg::q30, Reg::r29), "dup v30.16b, w29"); TEST_SINGLE(dup(SubRegSize::i16Bit, QReg::q30, Reg::r29), "dup v30.8h, w29"); @@ -214,7 +220,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD copy") { TEST_SINGLE(dup(SubRegSize::i8Bit, DReg::d30, Reg::r29), "dup v30.8b, w29"); TEST_SINGLE(dup(SubRegSize::i16Bit, DReg::d30, Reg::r29), "dup v30.4h, w29"); TEST_SINGLE(dup(SubRegSize::i32Bit, DReg::d30, Reg::r29), "dup v30.2s, w29"); - //TEST_SINGLE(dup(SubRegSize::i64Bit, DReg::d30, Reg::r29), "dup v30.1d, x29"); + // TEST_SINGLE(dup(SubRegSize::i64Bit, DReg::d30, Reg::r29), "dup v30.1d, x29"); TEST_SINGLE(smov(XReg::x29, VReg::v30, 0), "smov x29, v30.b[0]"); TEST_SINGLE(smov(XReg::x29, VReg::v30, 15), "smov x29, v30.b[15]"); TEST_SINGLE(smov(XReg::x29, VReg::v30, 0), "smov x29, v30.h[0]"); @@ -236,7 +242,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD copy") { TEST_SINGLE(umov(Reg::r29, VReg::v30, 0), "mov x29, v30.d[0]"); TEST_SINGLE(umov(Reg::r29, VReg::v30, 1), "mov x29, v30.d[1]"); - TEST_SINGLE(ins(VReg::v30, 0, Reg::r29), "mov v30.b[0], w29"); + TEST_SINGLE(ins(VReg::v30, 0, Reg::r29), "mov v30.b[0], w29"); TEST_SINGLE(ins(VReg::v30, 0, Reg::r29), "mov v30.h[0], w29"); TEST_SINGLE(ins(VReg::v30, 0, Reg::r29), "mov v30.s[0], w29"); TEST_SINGLE(ins(VReg::v30, 0, Reg::r29), "mov v30.d[0], x29"); @@ -245,7 +251,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD copy") { TEST_SINGLE(ins(VReg::v30, 3, Reg::r29), "mov v30.s[3], w29"); TEST_SINGLE(ins(VReg::v30, 1, Reg::r29), "mov v30.d[1], x29"); - TEST_SINGLE(ins(SubRegSize::i8Bit, VReg::v30, 0, Reg::r29), "mov v30.b[0], w29"); + TEST_SINGLE(ins(SubRegSize::i8Bit, VReg::v30, 0, Reg::r29), "mov v30.b[0], w29"); TEST_SINGLE(ins(SubRegSize::i16Bit, VReg::v30, 0, Reg::r29), "mov v30.h[0], w29"); TEST_SINGLE(ins(SubRegSize::i32Bit, VReg::v30, 0, Reg::r29), "mov v30.s[0], w29"); TEST_SINGLE(ins(SubRegSize::i64Bit, VReg::v30, 0, Reg::r29), "mov v30.d[0], x29"); @@ -254,7 +260,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD copy") { TEST_SINGLE(ins(SubRegSize::i32Bit, VReg::v30, 3, Reg::r29), "mov v30.s[3], w29"); TEST_SINGLE(ins(SubRegSize::i64Bit, VReg::v30, 1, Reg::r29), "mov v30.d[1], x29"); - TEST_SINGLE(ins(SubRegSize::i8Bit, VReg::v30, 0, VReg::v29, 15), "mov v30.b[0], v29.b[15]"); + TEST_SINGLE(ins(SubRegSize::i8Bit, VReg::v30, 0, VReg::v29, 15), "mov v30.b[0], v29.b[15]"); TEST_SINGLE(ins(SubRegSize::i16Bit, VReg::v30, 0, VReg::v29, 7), "mov v30.h[0], v29.h[7]"); TEST_SINGLE(ins(SubRegSize::i32Bit, VReg::v30, 0, VReg::v29, 3), "mov v30.s[0], v29.s[3]"); TEST_SINGLE(ins(SubRegSize::i64Bit, VReg::v30, 0, VReg::v29, 1), "mov v30.d[0], v29.d[1]"); @@ -328,7 +334,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three-register TEST_SINGLE(sdot(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sdot v30.8b, v29.8b, v28.8b"); TEST_SINGLE(sdot(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sdot v30.4h, v29.8b, v28.8b"); TEST_SINGLE(sdot(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sdot v30.2s, v29.8b, v28.8b"); - //TEST_SINGLE(sdot(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sdot v30.1d, v29.8b, v28.8b"); + // TEST_SINGLE(sdot(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sdot v30.1d, v29.8b, v28.8b"); TEST_SINGLE(usdot(QReg::q30, QReg::q29, QReg::q28), "usdot v30.4s, v29.16b, v28.16b"); TEST_SINGLE(usdot(DReg::d30, DReg::d29, DReg::d28), "usdot v30.2s, v29.8b, v28.8b"); @@ -340,7 +346,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three-register TEST_SINGLE(sqrdmlah(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmlah v30.8b, v29.8b, v28.8b"); TEST_SINGLE(sqrdmlah(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmlah v30.4h, v29.4h, v28.4h"); TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmlah v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(sqrdmlah(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmlah v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(sqrdmlah(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmlah v30.1d, v29.1d, v28.1d"); TEST_SINGLE(sqrdmlsh(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqrdmlsh v30.16b, v29.16b, v28.16b"); TEST_SINGLE(sqrdmlsh(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqrdmlsh v30.8h, v29.8h, v28.8h"); @@ -349,7 +355,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three-register TEST_SINGLE(sqrdmlsh(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmlsh v30.8b, v29.8b, v28.8b"); TEST_SINGLE(sqrdmlsh(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmlsh v30.4h, v29.4h, v28.4h"); TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmlsh v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(sqrdmlsh(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmlsh v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(sqrdmlsh(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmlsh v30.1d, v29.1d, v28.1d"); TEST_SINGLE(udot(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "udot v30.16b, v29.16b, v28.16b"); TEST_SINGLE(udot(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "udot v30.8h, v29.16b, v28.16b"); @@ -358,63 +364,63 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three-register TEST_SINGLE(udot(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "udot v30.8b, v29.8b, v28.8b"); TEST_SINGLE(udot(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "udot v30.4h, v29.8b, v28.8b"); TEST_SINGLE(udot(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "udot v30.2s, v29.8b, v28.8b"); - //TEST_SINGLE(udot(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "udot v30.1d, v29.8b, v28.8b"); + // TEST_SINGLE(udot(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "udot v30.1d, v29.8b, v28.8b"); - //TEST_SINGLE(fcmla(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_0), "fcmla v30.16b, v29.16b, v28.16b, #0"); + // TEST_SINGLE(fcmla(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_0), "fcmla v30.16b, v29.16b, v28.16b, #0"); TEST_SINGLE(fcmla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_0), "fcmla v30.8h, v29.8h, v28.8h, #0"); TEST_SINGLE(fcmla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_0), "fcmla v30.4s, v29.4s, v28.4s, #0"); TEST_SINGLE(fcmla(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_0), "fcmla v30.2d, v29.2d, v28.2d, #0"); - //TEST_SINGLE(fcmla(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_0), "fcmla v30.8b, v29.8b, v28.8b, #0"); + // TEST_SINGLE(fcmla(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_0), "fcmla v30.8b, v29.8b, v28.8b, #0"); TEST_SINGLE(fcmla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_0), "fcmla v30.4h, v29.4h, v28.4h, #0"); TEST_SINGLE(fcmla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_0), "fcmla v30.2s, v29.2s, v28.2s, #0"); - //TEST_SINGLE(fcmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_0), "fcmla v30.1d, v29.1d, v28.1d, #0"); + // TEST_SINGLE(fcmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_0), "fcmla v30.1d, v29.1d, v28.1d, #0"); - //TEST_SINGLE(fcmla(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcmla v30.16b, v29.16b, v28.16b, #90"); + // TEST_SINGLE(fcmla(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcmla v30.16b, v29.16b, v28.16b, #90"); TEST_SINGLE(fcmla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcmla v30.8h, v29.8h, v28.8h, #90"); TEST_SINGLE(fcmla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcmla v30.4s, v29.4s, v28.4s, #90"); TEST_SINGLE(fcmla(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcmla v30.2d, v29.2d, v28.2d, #90"); - //TEST_SINGLE(fcmla(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcmla v30.8b, v29.8b, v28.8b, #90"); + // TEST_SINGLE(fcmla(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcmla v30.8b, v29.8b, v28.8b, #90"); TEST_SINGLE(fcmla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcmla v30.4h, v29.4h, v28.4h, #90"); TEST_SINGLE(fcmla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcmla v30.2s, v29.2s, v28.2s, #90"); - //TEST_SINGLE(fcmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcmla v30.1d, v29.1d, v28.1d, #90"); + // TEST_SINGLE(fcmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcmla v30.1d, v29.1d, v28.1d, #90"); // Vixl disassembler has a bug that claims 8-bit fcmla exists - //TEST_SINGLE(fcmla(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_180), "fcmla v30.16b, v29.16b, v28.16b, #180"); + // TEST_SINGLE(fcmla(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_180), "fcmla v30.16b, v29.16b, v28.16b, #180"); TEST_SINGLE(fcmla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_180), "fcmla v30.8h, v29.8h, v28.8h, #180"); TEST_SINGLE(fcmla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_180), "fcmla v30.4s, v29.4s, v28.4s, #180"); TEST_SINGLE(fcmla(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_180), "fcmla v30.2d, v29.2d, v28.2d, #180"); - //TEST_SINGLE(fcmla(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_180), "fcmla v30.8b, v29.8b, v28.8b, #180"); + // TEST_SINGLE(fcmla(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_180), "fcmla v30.8b, v29.8b, v28.8b, #180"); TEST_SINGLE(fcmla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_180), "fcmla v30.4h, v29.4h, v28.4h, #180"); TEST_SINGLE(fcmla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_180), "fcmla v30.2s, v29.2s, v28.2s, #180"); - //TEST_SINGLE(fcmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_180), "fcmla v30.1d, v29.1d, v28.1d, #180"); + // TEST_SINGLE(fcmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_180), "fcmla v30.1d, v29.1d, v28.1d, #180"); - //TEST_SINGLE(fcmla(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcmla v30.16b, v29.16b, v28.16b, #270"); + // TEST_SINGLE(fcmla(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcmla v30.16b, v29.16b, v28.16b, #270"); TEST_SINGLE(fcmla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcmla v30.8h, v29.8h, v28.8h, #270"); TEST_SINGLE(fcmla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcmla v30.4s, v29.4s, v28.4s, #270"); TEST_SINGLE(fcmla(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcmla v30.2d, v29.2d, v28.2d, #270"); - //TEST_SINGLE(fcmla(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcmla v30.8b, v29.8b, v28.8b, #270"); + // TEST_SINGLE(fcmla(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcmla v30.8b, v29.8b, v28.8b, #270"); TEST_SINGLE(fcmla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcmla v30.4h, v29.4h, v28.4h, #270"); TEST_SINGLE(fcmla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcmla v30.2s, v29.2s, v28.2s, #270"); - //TEST_SINGLE(fcmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcmla v30.1d, v29.1d, v28.1d, #270"); + // TEST_SINGLE(fcmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcmla v30.1d, v29.1d, v28.1d, #270"); // Vixl disassembler has a bug that claims 8-bit fcadd exists - //TEST_SINGLE(fcadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcadd v30.16b, v29.16b, v28.16b, #90"); + // TEST_SINGLE(fcadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcadd v30.16b, v29.16b, v28.16b, #90"); TEST_SINGLE(fcadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcadd v30.8h, v29.8h, v28.8h, #90"); TEST_SINGLE(fcadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcadd v30.4s, v29.4s, v28.4s, #90"); TEST_SINGLE(fcadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcadd v30.2d, v29.2d, v28.2d, #90"); - //TEST_SINGLE(fcadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcadd v30.8b, v29.8b, v28.8b, #90"); + // TEST_SINGLE(fcadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcadd v30.8b, v29.8b, v28.8b, #90"); TEST_SINGLE(fcadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcadd v30.4h, v29.4h, v28.4h, #90"); TEST_SINGLE(fcadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcadd v30.2s, v29.2s, v28.2s, #90"); - //TEST_SINGLE(fcadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcadd v30.1d, v29.1d, v28.1d, #90"); + // TEST_SINGLE(fcadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcadd v30.1d, v29.1d, v28.1d, #90"); - //TEST_SINGLE(fcadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcadd v30.16b, v29.16b, v28.16b, #270"); + // TEST_SINGLE(fcadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcadd v30.16b, v29.16b, v28.16b, #270"); TEST_SINGLE(fcadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcadd v30.8h, v29.8h, v28.8h, #270"); TEST_SINGLE(fcadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcadd v30.4s, v29.4s, v28.4s, #270"); TEST_SINGLE(fcadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcadd v30.2d, v29.2d, v28.2d, #270"); - //TEST_SINGLE(fcadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcadd v30.8b, v29.8b, v28.8b, #270"); + // TEST_SINGLE(fcadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcadd v30.8b, v29.8b, v28.8b, #270"); TEST_SINGLE(fcadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcadd v30.4h, v29.4h, v28.4h, #270"); TEST_SINGLE(fcadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcadd v30.2s, v29.2s, v28.2s, #270"); - //TEST_SINGLE(fcadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcadd v30.1d, v29.1d, v28.1d, #270"); + // TEST_SINGLE(fcadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcadd v30.1d, v29.1d, v28.1d, #270"); // TODO: Enable once vixl disassembler supports these instructions // TEST_SINGLE(bfdot(QReg::q30, QReg::q29, QReg::q28), "bfdot v30.4s, v29.8h, v28.8h"); @@ -425,7 +431,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three-register TEST_SINGLE(smmla(VReg::v30, VReg::v29, VReg::v28), "smmla v30.4s, v29.16b, v28.16b"); TEST_SINGLE(usmmla(VReg::v30, VReg::v29, VReg::v28), "usmmla v30.4s, v29.16b, v28.16b"); // TODO: Enable once vixl disassembler supports these instructions - //TEST_SINGLE(bfmmla(VReg::v30, VReg::v29, VReg::v28), "bfmmla v30.4s, v29.8h, v28.8h"); + // TEST_SINGLE(bfmmla(VReg::v30, VReg::v29, VReg::v28), "bfmmla v30.4s, v29.8h, v28.8h"); TEST_SINGLE(ummla(VReg::v30, VReg::v29, VReg::v28), "ummla v30.4s, v29.16b, v28.16b"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD two-register miscellaneous") { @@ -433,29 +439,29 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD two-register m TEST_SINGLE(rev64(SubRegSize::i8Bit, QReg::q30, QReg::q29), "rev64 v30.16b, v29.16b"); TEST_SINGLE(rev64(SubRegSize::i16Bit, QReg::q30, QReg::q29), "rev64 v30.8h, v29.8h"); TEST_SINGLE(rev64(SubRegSize::i32Bit, QReg::q30, QReg::q29), "rev64 v30.4s, v29.4s"); - //TEST_SINGLE(rev64(SubRegSize::i64Bit, QReg::q30, QReg::q29), "rev64 v30.2d, v29.2d"); + // TEST_SINGLE(rev64(SubRegSize::i64Bit, QReg::q30, QReg::q29), "rev64 v30.2d, v29.2d"); TEST_SINGLE(rev64(SubRegSize::i8Bit, DReg::d30, DReg::d29), "rev64 v30.8b, v29.8b"); TEST_SINGLE(rev64(SubRegSize::i16Bit, DReg::d30, DReg::d29), "rev64 v30.4h, v29.4h"); TEST_SINGLE(rev64(SubRegSize::i32Bit, DReg::d30, DReg::d29), "rev64 v30.2s, v29.2s"); - //TEST_SINGLE(rev64(SubRegSize::i64Bit, DReg::d30, DReg::d29), "rev64 v30.1d, v29.1d"); + // TEST_SINGLE(rev64(SubRegSize::i64Bit, DReg::d30, DReg::d29), "rev64 v30.1d, v29.1d"); TEST_SINGLE(rev16(SubRegSize::i8Bit, QReg::q30, QReg::q29), "rev16 v30.16b, v29.16b"); - //TEST_SINGLE(rev16(SubRegSize::i16Bit, QReg::q30, QReg::q29), "rev16 v30.8h, v29.8h"); - //TEST_SINGLE(rev16(SubRegSize::i32Bit, QReg::q30, QReg::q29), "rev16 v30.4s, v29.4s"); - //TEST_SINGLE(rev16(SubRegSize::i64Bit, QReg::q30, QReg::q29), "rev16 v30.2d, v29.2d"); + // TEST_SINGLE(rev16(SubRegSize::i16Bit, QReg::q30, QReg::q29), "rev16 v30.8h, v29.8h"); + // TEST_SINGLE(rev16(SubRegSize::i32Bit, QReg::q30, QReg::q29), "rev16 v30.4s, v29.4s"); + // TEST_SINGLE(rev16(SubRegSize::i64Bit, QReg::q30, QReg::q29), "rev16 v30.2d, v29.2d"); TEST_SINGLE(rev16(SubRegSize::i8Bit, DReg::d30, DReg::d29), "rev16 v30.8b, v29.8b"); - //TEST_SINGLE(rev16(SubRegSize::i16Bit, DReg::d30, DReg::d29), "rev16 v30.4h, v29.4h"); - //TEST_SINGLE(rev16(SubRegSize::i32Bit, DReg::d30, DReg::d29), "rev16 v30.2s, v29.2s"); - //TEST_SINGLE(rev16(SubRegSize::i64Bit, DReg::d30, DReg::d29), "rev16 v30.1d, v29.1d"); + // TEST_SINGLE(rev16(SubRegSize::i16Bit, DReg::d30, DReg::d29), "rev16 v30.4h, v29.4h"); + // TEST_SINGLE(rev16(SubRegSize::i32Bit, DReg::d30, DReg::d29), "rev16 v30.2s, v29.2s"); + // TEST_SINGLE(rev16(SubRegSize::i64Bit, DReg::d30, DReg::d29), "rev16 v30.1d, v29.1d"); - //TEST_SINGLE(saddlp(SubRegSize::i8Bit, QReg::q30, QReg::q29), "saddlp v30.16b, v29.16b"); + // TEST_SINGLE(saddlp(SubRegSize::i8Bit, QReg::q30, QReg::q29), "saddlp v30.16b, v29.16b"); TEST_SINGLE(saddlp(SubRegSize::i16Bit, QReg::q30, QReg::q29), "saddlp v30.8h, v29.16b"); TEST_SINGLE(saddlp(SubRegSize::i32Bit, QReg::q30, QReg::q29), "saddlp v30.4s, v29.8h"); TEST_SINGLE(saddlp(SubRegSize::i64Bit, QReg::q30, QReg::q29), "saddlp v30.2d, v29.4s"); - //TEST_SINGLE(saddlp(SubRegSize::i8Bit, DReg::d30, DReg::d29), "saddlp v30.8b, v29.8b"); + // TEST_SINGLE(saddlp(SubRegSize::i8Bit, DReg::d30, DReg::d29), "saddlp v30.8b, v29.8b"); TEST_SINGLE(saddlp(SubRegSize::i16Bit, DReg::d30, DReg::d29), "saddlp v30.4h, v29.8b"); TEST_SINGLE(saddlp(SubRegSize::i32Bit, DReg::d30, DReg::d29), "saddlp v30.2s, v29.4h"); TEST_SINGLE(saddlp(SubRegSize::i64Bit, DReg::d30, DReg::d29), "saddlp v30.1d, v29.2s"); @@ -468,34 +474,34 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD two-register m TEST_SINGLE(suqadd(SubRegSize::i8Bit, DReg::d30, DReg::d29), "suqadd v30.8b, v29.8b"); TEST_SINGLE(suqadd(SubRegSize::i16Bit, DReg::d30, DReg::d29), "suqadd v30.4h, v29.4h"); TEST_SINGLE(suqadd(SubRegSize::i32Bit, DReg::d30, DReg::d29), "suqadd v30.2s, v29.2s"); - //TEST_SINGLE(suqadd(SubRegSize::i64Bit, DReg::d30, DReg::d29), "suqadd v30.1d, v29.1d"); + // TEST_SINGLE(suqadd(SubRegSize::i64Bit, DReg::d30, DReg::d29), "suqadd v30.1d, v29.1d"); TEST_SINGLE(cls(SubRegSize::i8Bit, QReg::q30, QReg::q29), "cls v30.16b, v29.16b"); TEST_SINGLE(cls(SubRegSize::i16Bit, QReg::q30, QReg::q29), "cls v30.8h, v29.8h"); TEST_SINGLE(cls(SubRegSize::i32Bit, QReg::q30, QReg::q29), "cls v30.4s, v29.4s"); - //TEST_SINGLE(cls(SubRegSize::i64Bit, QReg::q30, QReg::q29), "cls v30.2d, v29.2d"); + // TEST_SINGLE(cls(SubRegSize::i64Bit, QReg::q30, QReg::q29), "cls v30.2d, v29.2d"); TEST_SINGLE(cls(SubRegSize::i8Bit, DReg::d30, DReg::d29), "cls v30.8b, v29.8b"); TEST_SINGLE(cls(SubRegSize::i16Bit, DReg::d30, DReg::d29), "cls v30.4h, v29.4h"); TEST_SINGLE(cls(SubRegSize::i32Bit, DReg::d30, DReg::d29), "cls v30.2s, v29.2s"); - //TEST_SINGLE(cls(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cls v30.1d, v29.1d"); + // TEST_SINGLE(cls(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cls v30.1d, v29.1d"); TEST_SINGLE(cnt(SubRegSize::i8Bit, QReg::q30, QReg::q29), "cnt v30.16b, v29.16b"); - //TEST_SINGLE(cnt(SubRegSize::i16Bit, QReg::q30, QReg::q29), "cnt v30.8h, v29.8h"); - //TEST_SINGLE(cnt(SubRegSize::i32Bit, QReg::q30, QReg::q29), "cnt v30.4s, v29.4s"); - //TEST_SINGLE(cnt(SubRegSize::i64Bit, QReg::q30, QReg::q29), "cnt v30.2d, v29.2d"); + // TEST_SINGLE(cnt(SubRegSize::i16Bit, QReg::q30, QReg::q29), "cnt v30.8h, v29.8h"); + // TEST_SINGLE(cnt(SubRegSize::i32Bit, QReg::q30, QReg::q29), "cnt v30.4s, v29.4s"); + // TEST_SINGLE(cnt(SubRegSize::i64Bit, QReg::q30, QReg::q29), "cnt v30.2d, v29.2d"); TEST_SINGLE(cnt(SubRegSize::i8Bit, DReg::d30, DReg::d29), "cnt v30.8b, v29.8b"); - //TEST_SINGLE(cnt(SubRegSize::i16Bit, DReg::d30, DReg::d29), "cnt v30.4h, v29.4h"); - //TEST_SINGLE(cnt(SubRegSize::i32Bit, DReg::d30, DReg::d29), "cnt v30.2s, v29.2s"); - //TEST_SINGLE(cnt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cnt v30.1d, v29.1d"); + // TEST_SINGLE(cnt(SubRegSize::i16Bit, DReg::d30, DReg::d29), "cnt v30.4h, v29.4h"); + // TEST_SINGLE(cnt(SubRegSize::i32Bit, DReg::d30, DReg::d29), "cnt v30.2s, v29.2s"); + // TEST_SINGLE(cnt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cnt v30.1d, v29.1d"); - //TEST_SINGLE(sadalp(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sadalp v30.16b, v29.16b"); + // TEST_SINGLE(sadalp(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sadalp v30.16b, v29.16b"); TEST_SINGLE(sadalp(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sadalp v30.8h, v29.16b"); TEST_SINGLE(sadalp(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sadalp v30.4s, v29.8h"); TEST_SINGLE(sadalp(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sadalp v30.2d, v29.4s"); - //TEST_SINGLE(sadalp(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sadalp v30.8b, v29.8b"); + // TEST_SINGLE(sadalp(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sadalp v30.8b, v29.8b"); TEST_SINGLE(sadalp(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sadalp v30.4h, v29.8b"); TEST_SINGLE(sadalp(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sadalp v30.2s, v29.4h"); TEST_SINGLE(sadalp(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sadalp v30.1d, v29.2s"); @@ -508,7 +514,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD two-register m TEST_SINGLE(sqabs(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sqabs v30.8b, v29.8b"); TEST_SINGLE(sqabs(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sqabs v30.4h, v29.4h"); TEST_SINGLE(sqabs(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sqabs v30.2s, v29.2s"); - //TEST_SINGLE(sqabs(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqabs v30.1d, v29.1d"); + // TEST_SINGLE(sqabs(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqabs v30.1d, v29.1d"); TEST_SINGLE(cmgt(SubRegSize::i8Bit, QReg::q30, QReg::q29), "cmgt v30.16b, v29.16b, #0"); TEST_SINGLE(cmgt(SubRegSize::i16Bit, QReg::q30, QReg::q29), "cmgt v30.8h, v29.8h, #0"); @@ -518,7 +524,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD two-register m TEST_SINGLE(cmgt(SubRegSize::i8Bit, DReg::d30, DReg::d29), "cmgt v30.8b, v29.8b, #0"); TEST_SINGLE(cmgt(SubRegSize::i16Bit, DReg::d30, DReg::d29), "cmgt v30.4h, v29.4h, #0"); TEST_SINGLE(cmgt(SubRegSize::i32Bit, DReg::d30, DReg::d29), "cmgt v30.2s, v29.2s, #0"); - //TEST_SINGLE(cmgt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cmgt v30.1d, v29.1d, #0"); + // TEST_SINGLE(cmgt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cmgt v30.1d, v29.1d, #0"); TEST_SINGLE(cmeq(SubRegSize::i8Bit, QReg::q30, QReg::q29), "cmeq v30.16b, v29.16b, #0"); TEST_SINGLE(cmeq(SubRegSize::i16Bit, QReg::q30, QReg::q29), "cmeq v30.8h, v29.8h, #0"); @@ -528,7 +534,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD two-register m TEST_SINGLE(cmeq(SubRegSize::i8Bit, DReg::d30, DReg::d29), "cmeq v30.8b, v29.8b, #0"); TEST_SINGLE(cmeq(SubRegSize::i16Bit, DReg::d30, DReg::d29), "cmeq v30.4h, v29.4h, #0"); TEST_SINGLE(cmeq(SubRegSize::i32Bit, DReg::d30, DReg::d29), "cmeq v30.2s, v29.2s, #0"); - //TEST_SINGLE(cmeq(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cmeq v30.1d, v29.1d, #0"); + // TEST_SINGLE(cmeq(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cmeq v30.1d, v29.1d, #0"); TEST_SINGLE(cmlt(SubRegSize::i8Bit, QReg::q30, QReg::q29), "cmlt v30.16b, v29.16b, #0"); TEST_SINGLE(cmlt(SubRegSize::i16Bit, QReg::q30, QReg::q29), "cmlt v30.8h, v29.8h, #0"); @@ -538,7 +544,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD two-register m TEST_SINGLE(cmlt(SubRegSize::i8Bit, DReg::d30, DReg::d29), "cmlt v30.8b, v29.8b, #0"); TEST_SINGLE(cmlt(SubRegSize::i16Bit, DReg::d30, DReg::d29), "cmlt v30.4h, v29.4h, #0"); TEST_SINGLE(cmlt(SubRegSize::i32Bit, DReg::d30, DReg::d29), "cmlt v30.2s, v29.2s, #0"); - //TEST_SINGLE(cmlt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cmlt v30.1d, v29.1d, #0"); + // TEST_SINGLE(cmlt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cmlt v30.1d, v29.1d, #0"); TEST_SINGLE(abs(SubRegSize::i8Bit, QReg::q30, QReg::q29), "abs v30.16b, v29.16b"); TEST_SINGLE(abs(SubRegSize::i16Bit, QReg::q30, QReg::q29), "abs v30.8h, v29.8h"); @@ -553,189 +559,189 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD two-register m TEST_SINGLE(xtn(SubRegSize::i8Bit, QReg::q30, QReg::q29), "xtn v30.8b, v29.8h"); TEST_SINGLE(xtn(SubRegSize::i16Bit, QReg::q30, QReg::q29), "xtn v30.4h, v29.4s"); TEST_SINGLE(xtn(SubRegSize::i32Bit, QReg::q30, QReg::q29), "xtn v30.2s, v29.2d"); - //TEST_SINGLE(xtn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "xtn v30.2d, v29.1d"); + // TEST_SINGLE(xtn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "xtn v30.2d, v29.1d"); TEST_SINGLE(xtn(SubRegSize::i8Bit, DReg::d30, DReg::d29), "xtn v30.8b, v29.8h"); TEST_SINGLE(xtn(SubRegSize::i16Bit, DReg::d30, DReg::d29), "xtn v30.4h, v29.4s"); TEST_SINGLE(xtn(SubRegSize::i32Bit, DReg::d30, DReg::d29), "xtn v30.2s, v29.2d"); - //TEST_SINGLE(xtn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "xtn v30.1d, v29.1d"); + // TEST_SINGLE(xtn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "xtn v30.1d, v29.1d"); TEST_SINGLE(xtn2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "xtn2 v30.16b, v29.8h"); TEST_SINGLE(xtn2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "xtn2 v30.8h, v29.4s"); TEST_SINGLE(xtn2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "xtn2 v30.4s, v29.2d"); - //TEST_SINGLE(xtn2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "xtn2 v30.2d, v29.1d"); + // TEST_SINGLE(xtn2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "xtn2 v30.2d, v29.1d"); TEST_SINGLE(xtn2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "xtn2 v30.16b, v29.8h"); TEST_SINGLE(xtn2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "xtn2 v30.8h, v29.4s"); TEST_SINGLE(xtn2(SubRegSize::i32Bit, DReg::d30, DReg::d29), "xtn2 v30.4s, v29.2d"); - //TEST_SINGLE(xtn2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "xtn2 v30.2d, v29.1d"); + // TEST_SINGLE(xtn2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "xtn2 v30.2d, v29.1d"); TEST_SINGLE(sqxtn(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sqxtn v30.8b, v29.8h"); TEST_SINGLE(sqxtn(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sqxtn v30.4h, v29.4s"); TEST_SINGLE(sqxtn(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sqxtn v30.2s, v29.2d"); - //TEST_SINGLE(sqxtn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sqxtn v30.2d, v29.1d"); + // TEST_SINGLE(sqxtn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sqxtn v30.2d, v29.1d"); TEST_SINGLE(sqxtn(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sqxtn v30.8b, v29.8h"); TEST_SINGLE(sqxtn(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sqxtn v30.4h, v29.4s"); TEST_SINGLE(sqxtn(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sqxtn v30.2s, v29.2d"); - //TEST_SINGLE(sqxtn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqxtn v30.1d, v29.1d"); + // TEST_SINGLE(sqxtn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqxtn v30.1d, v29.1d"); TEST_SINGLE(sqxtn2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sqxtn2 v30.16b, v29.8h"); TEST_SINGLE(sqxtn2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sqxtn2 v30.8h, v29.4s"); TEST_SINGLE(sqxtn2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sqxtn2 v30.4s, v29.2d"); - //TEST_SINGLE(sqxtn2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sqxtn2 v30.2d, v29.1d"); + // TEST_SINGLE(sqxtn2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sqxtn2 v30.2d, v29.1d"); TEST_SINGLE(sqxtn2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sqxtn2 v30.16b, v29.8h"); TEST_SINGLE(sqxtn2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sqxtn2 v30.8h, v29.4s"); TEST_SINGLE(sqxtn2(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sqxtn2 v30.4s, v29.2d"); - //TEST_SINGLE(sqxtn2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqxtn2 v30.2d, v29.1d"); + // TEST_SINGLE(sqxtn2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqxtn2 v30.2d, v29.1d"); - //TEST_SINGLE(fcvtn(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtn v30.8b, v29.8h"); + // TEST_SINGLE(fcvtn(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtn v30.8b, v29.8h"); TEST_SINGLE(fcvtn(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtn v30.4h, v29.4s"); TEST_SINGLE(fcvtn(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtn v30.2s, v29.2d"); - //TEST_SINGLE(fcvtn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtn v30.2d, v29.1d"); + // TEST_SINGLE(fcvtn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtn v30.2d, v29.1d"); - //TEST_SINGLE(fcvtn(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtn v30.8b, v29.8h"); + // TEST_SINGLE(fcvtn(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtn v30.8b, v29.8h"); TEST_SINGLE(fcvtn(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fcvtn v30.4h, v29.4s"); TEST_SINGLE(fcvtn(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtn v30.2s, v29.2d"); - //TEST_SINGLE(fcvtn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtn v30.1d, v29.1d"); + // TEST_SINGLE(fcvtn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtn v30.1d, v29.1d"); - //TEST_SINGLE(fcvtn2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtn2 v30.16b, v29.8h"); + // TEST_SINGLE(fcvtn2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtn2 v30.16b, v29.8h"); TEST_SINGLE(fcvtn2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtn2 v30.8h, v29.4s"); TEST_SINGLE(fcvtn2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtn2 v30.4s, v29.2d"); - //TEST_SINGLE(fcvtn2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtn2 v30.2d, v29.1d"); + // TEST_SINGLE(fcvtn2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtn2 v30.2d, v29.1d"); - //TEST_SINGLE(fcvtn2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtn2 v30.16b, v29.8h"); + // TEST_SINGLE(fcvtn2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtn2 v30.16b, v29.8h"); TEST_SINGLE(fcvtn2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fcvtn2 v30.8h, v29.4s"); TEST_SINGLE(fcvtn2(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtn2 v30.4s, v29.2d"); - //TEST_SINGLE(fcvtn2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtn2 v30.2d, v29.1d"); + // TEST_SINGLE(fcvtn2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtn2 v30.2d, v29.1d"); - //TEST_SINGLE(fcvtl(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtl v30.8b, v29.8h"); - //TEST_SINGLE(fcvtl(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtl v30.4h, v29.4s"); + // TEST_SINGLE(fcvtl(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtl v30.8b, v29.8h"); + // TEST_SINGLE(fcvtl(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtl v30.4h, v29.4s"); TEST_SINGLE(fcvtl(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtl v30.4s, v29.4h"); TEST_SINGLE(fcvtl(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtl v30.2d, v29.2s"); - //TEST_SINGLE(fcvtl(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtl v30.8b, v29.8h"); - //TEST_SINGLE(fcvtl(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fcvtl v30.4h, v29.4s"); + // TEST_SINGLE(fcvtl(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtl v30.8b, v29.8h"); + // TEST_SINGLE(fcvtl(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fcvtl v30.4h, v29.4s"); TEST_SINGLE(fcvtl(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtl v30.4s, v29.4h"); TEST_SINGLE(fcvtl(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtl v30.2d, v29.2s"); - //TEST_SINGLE(fcvtl2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtl2 v30.16b, v29.8h"); - //TEST_SINGLE(fcvtl2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtl2 v30.8h, v29.4s"); + // TEST_SINGLE(fcvtl2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtl2 v30.16b, v29.8h"); + // TEST_SINGLE(fcvtl2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtl2 v30.8h, v29.4s"); TEST_SINGLE(fcvtl2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtl2 v30.4s, v29.8h"); TEST_SINGLE(fcvtl2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtl2 v30.2d, v29.4s"); - //TEST_SINGLE(fcvtl2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtl2 v30.16b, v29.8h"); - //TEST_SINGLE(fcvtl2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fcvtl2 v30.8h, v29.4s"); + // TEST_SINGLE(fcvtl2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtl2 v30.16b, v29.8h"); + // TEST_SINGLE(fcvtl2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fcvtl2 v30.8h, v29.4s"); TEST_SINGLE(fcvtl2(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtl2 v30.4s, v29.8h"); TEST_SINGLE(fcvtl2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtl2 v30.2d, v29.4s"); TEST_SINGLE(frintn(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frintn v30.4s, v29.4s"); TEST_SINGLE(frintn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frintn v30.2d, v29.2d"); TEST_SINGLE(frintn(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frintn v30.2s, v29.2s"); - //TEST_SINGLE(frintn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frintn v30.1d, v29.1d"); + // TEST_SINGLE(frintn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frintn v30.1d, v29.1d"); TEST_SINGLE(frintm(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frintm v30.4s, v29.4s"); TEST_SINGLE(frintm(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frintm v30.2d, v29.2d"); TEST_SINGLE(frintm(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frintm v30.2s, v29.2s"); - //TEST_SINGLE(frintm(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frintm v30.1d, v29.1d"); + // TEST_SINGLE(frintm(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frintm v30.1d, v29.1d"); TEST_SINGLE(fcvtns(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtns v30.4s, v29.4s"); TEST_SINGLE(fcvtns(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtns v30.2d, v29.2d"); TEST_SINGLE(fcvtns(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtns v30.2s, v29.2s"); - //TEST_SINGLE(fcvtns(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtns v30.1d, v29.1d"); + // TEST_SINGLE(fcvtns(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtns v30.1d, v29.1d"); TEST_SINGLE(fcvtms(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtms v30.4s, v29.4s"); TEST_SINGLE(fcvtms(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtms v30.2d, v29.2d"); TEST_SINGLE(fcvtms(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtms v30.2s, v29.2s"); - //TEST_SINGLE(fcvtms(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtms v30.1d, v29.1d"); + // TEST_SINGLE(fcvtms(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtms v30.1d, v29.1d"); TEST_SINGLE(fcvtas(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtas v30.4s, v29.4s"); TEST_SINGLE(fcvtas(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtas v30.2d, v29.2d"); TEST_SINGLE(fcvtas(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtas v30.2s, v29.2s"); - //TEST_SINGLE(fcvtas(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtas v30.1d, v29.1d"); + // TEST_SINGLE(fcvtas(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtas v30.1d, v29.1d"); TEST_SINGLE(scvtf(SubRegSize::i32Bit, QReg::q30, QReg::q29), "scvtf v30.4s, v29.4s"); TEST_SINGLE(scvtf(SubRegSize::i64Bit, QReg::q30, QReg::q29), "scvtf v30.2d, v29.2d"); TEST_SINGLE(scvtf(SubRegSize::i32Bit, DReg::d30, DReg::d29), "scvtf v30.2s, v29.2s"); - //TEST_SINGLE(scvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29), "scvtf v30.1d, v29.1d"); + // TEST_SINGLE(scvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29), "scvtf v30.1d, v29.1d"); TEST_SINGLE(frint32z(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frint32z v30.4s, v29.4s"); TEST_SINGLE(frint32z(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frint32z v30.2d, v29.2d"); TEST_SINGLE(frint32z(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frint32z v30.2s, v29.2s"); - //TEST_SINGLE(frint32z(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frint32z v30.1d, v29.1d"); + // TEST_SINGLE(frint32z(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frint32z v30.1d, v29.1d"); TEST_SINGLE(frint64z(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frint64z v30.4s, v29.4s"); TEST_SINGLE(frint64z(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frint64z v30.2d, v29.2d"); TEST_SINGLE(frint64z(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frint64z v30.2s, v29.2s"); - //TEST_SINGLE(frint64z(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frint64z v30.1d, v29.1d"); + // TEST_SINGLE(frint64z(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frint64z v30.1d, v29.1d"); TEST_SINGLE(fcmgt(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcmgt v30.4s, v29.4s, #0.0"); TEST_SINGLE(fcmgt(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcmgt v30.2d, v29.2d, #0.0"); TEST_SINGLE(fcmgt(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcmgt v30.2s, v29.2s, #0.0"); - //TEST_SINGLE(fcmgt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcmgt v30.1d, v29.1d, #0.0"); + // TEST_SINGLE(fcmgt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcmgt v30.1d, v29.1d, #0.0"); TEST_SINGLE(fcmeq(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcmeq v30.4s, v29.4s, #0.0"); TEST_SINGLE(fcmeq(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcmeq v30.2d, v29.2d, #0.0"); TEST_SINGLE(fcmeq(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcmeq v30.2s, v29.2s, #0.0"); - //TEST_SINGLE(fcmeq(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcmeq v30.1d, v29.1d, #0.0"); + // TEST_SINGLE(fcmeq(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcmeq v30.1d, v29.1d, #0.0"); TEST_SINGLE(fcmlt(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcmlt v30.4s, v29.4s, #0.0"); TEST_SINGLE(fcmlt(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcmlt v30.2d, v29.2d, #0.0"); TEST_SINGLE(fcmlt(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcmlt v30.2s, v29.2s, #0.0"); - //TEST_SINGLE(fcmlt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcmlt v30.1d, v29.1d, #0.0"); + // TEST_SINGLE(fcmlt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcmlt v30.1d, v29.1d, #0.0"); TEST_SINGLE(fabs(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fabs v30.4s, v29.4s"); TEST_SINGLE(fabs(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fabs v30.2d, v29.2d"); TEST_SINGLE(fabs(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fabs v30.2s, v29.2s"); - //TEST_SINGLE(fabs(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fabs v30.1d, v29.1d"); + // TEST_SINGLE(fabs(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fabs v30.1d, v29.1d"); TEST_SINGLE(frintp(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frintp v30.4s, v29.4s"); TEST_SINGLE(frintp(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frintp v30.2d, v29.2d"); TEST_SINGLE(frintp(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frintp v30.2s, v29.2s"); - //TEST_SINGLE(frintp(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frintp v30.1d, v29.1d"); + // TEST_SINGLE(frintp(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frintp v30.1d, v29.1d"); TEST_SINGLE(frintz(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frintz v30.4s, v29.4s"); TEST_SINGLE(frintz(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frintz v30.2d, v29.2d"); TEST_SINGLE(frintz(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frintz v30.2s, v29.2s"); - //TEST_SINGLE(frintz(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frintz v30.1d, v29.1d"); + // TEST_SINGLE(frintz(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frintz v30.1d, v29.1d"); TEST_SINGLE(fcvtps(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtps v30.4s, v29.4s"); TEST_SINGLE(fcvtps(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtps v30.2d, v29.2d"); TEST_SINGLE(fcvtps(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtps v30.2s, v29.2s"); - //TEST_SINGLE(fcvtps(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtps v30.1d, v29.1d"); + // TEST_SINGLE(fcvtps(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtps v30.1d, v29.1d"); TEST_SINGLE(fcvtzs(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtzs v30.4s, v29.4s"); TEST_SINGLE(fcvtzs(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtzs v30.2d, v29.2d"); TEST_SINGLE(fcvtzs(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtzs v30.2s, v29.2s"); - //TEST_SINGLE(fcvtzs(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtzs v30.1d, v29.1d"); + // TEST_SINGLE(fcvtzs(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtzs v30.1d, v29.1d"); TEST_SINGLE(urecpe(SubRegSize::i32Bit, QReg::q30, QReg::q29), "urecpe v30.4s, v29.4s"); - //TEST_SINGLE(urecpe(SubRegSize::i64Bit, QReg::q30, QReg::q29), "urecpe v30.2d, v29.2d"); + // TEST_SINGLE(urecpe(SubRegSize::i64Bit, QReg::q30, QReg::q29), "urecpe v30.2d, v29.2d"); TEST_SINGLE(urecpe(SubRegSize::i32Bit, DReg::d30, DReg::d29), "urecpe v30.2s, v29.2s"); - //TEST_SINGLE(urecpe(SubRegSize::i64Bit, DReg::d30, DReg::d29), "urecpe v30.1d, v29.1d"); + // TEST_SINGLE(urecpe(SubRegSize::i64Bit, DReg::d30, DReg::d29), "urecpe v30.1d, v29.1d"); TEST_SINGLE(frecpe(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frecpe v30.4s, v29.4s"); TEST_SINGLE(frecpe(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frecpe v30.2d, v29.2d"); TEST_SINGLE(frecpe(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frecpe v30.2s, v29.2s"); - //TEST_SINGLE(frecpe(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frecpe v30.1d, v29.1d"); + // TEST_SINGLE(frecpe(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frecpe v30.1d, v29.1d"); TEST_SINGLE(rev32(SubRegSize::i8Bit, QReg::q30, QReg::q29), "rev32 v30.16b, v29.16b"); TEST_SINGLE(rev32(SubRegSize::i16Bit, QReg::q30, QReg::q29), "rev32 v30.8h, v29.8h"); - //TEST_SINGLE(rev32(SubRegSize::i32Bit, QReg::q30, QReg::q29), "rev32 v30.4s, v29.4s"); - //TEST_SINGLE(rev32(SubRegSize::i64Bit, QReg::q30, QReg::q29), "rev32 v30.2d, v29.2d"); + // TEST_SINGLE(rev32(SubRegSize::i32Bit, QReg::q30, QReg::q29), "rev32 v30.4s, v29.4s"); + // TEST_SINGLE(rev32(SubRegSize::i64Bit, QReg::q30, QReg::q29), "rev32 v30.2d, v29.2d"); TEST_SINGLE(rev32(SubRegSize::i8Bit, DReg::d30, DReg::d29), "rev32 v30.8b, v29.8b"); TEST_SINGLE(rev32(SubRegSize::i16Bit, DReg::d30, DReg::d29), "rev32 v30.4h, v29.4h"); - //TEST_SINGLE(rev32(SubRegSize::i32Bit, DReg::d30, DReg::d29), "rev32 v30.2s, v29.2s"); - //TEST_SINGLE(rev32(SubRegSize::i64Bit, DReg::d30, DReg::d29), "rev32 v30.1d, v29.1d"); + // TEST_SINGLE(rev32(SubRegSize::i32Bit, DReg::d30, DReg::d29), "rev32 v30.2s, v29.2s"); + // TEST_SINGLE(rev32(SubRegSize::i64Bit, DReg::d30, DReg::d29), "rev32 v30.1d, v29.1d"); - //TEST_SINGLE(uaddlp(SubRegSize::i8Bit, QReg::q30, QReg::q29), "uaddlp v30.16b, v29.16b"); + // TEST_SINGLE(uaddlp(SubRegSize::i8Bit, QReg::q30, QReg::q29), "uaddlp v30.16b, v29.16b"); TEST_SINGLE(uaddlp(SubRegSize::i16Bit, QReg::q30, QReg::q29), "uaddlp v30.8h, v29.16b"); TEST_SINGLE(uaddlp(SubRegSize::i32Bit, QReg::q30, QReg::q29), "uaddlp v30.4s, v29.8h"); TEST_SINGLE(uaddlp(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uaddlp v30.2d, v29.4s"); - //TEST_SINGLE(uaddlp(SubRegSize::i8Bit, DReg::d30, DReg::d29), "uaddlp v30.8b, v29.8b"); + // TEST_SINGLE(uaddlp(SubRegSize::i8Bit, DReg::d30, DReg::d29), "uaddlp v30.8b, v29.8b"); TEST_SINGLE(uaddlp(SubRegSize::i16Bit, DReg::d30, DReg::d29), "uaddlp v30.4h, v29.8b"); TEST_SINGLE(uaddlp(SubRegSize::i32Bit, DReg::d30, DReg::d29), "uaddlp v30.2s, v29.4h"); TEST_SINGLE(uaddlp(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uaddlp v30.1d, v29.2s"); @@ -748,24 +754,24 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD two-register m TEST_SINGLE(usqadd(SubRegSize::i8Bit, DReg::d30, DReg::d29), "usqadd v30.8b, v29.8b"); TEST_SINGLE(usqadd(SubRegSize::i16Bit, DReg::d30, DReg::d29), "usqadd v30.4h, v29.4h"); TEST_SINGLE(usqadd(SubRegSize::i32Bit, DReg::d30, DReg::d29), "usqadd v30.2s, v29.2s"); - //TEST_SINGLE(usqadd(SubRegSize::i64Bit, DReg::d30, DReg::d29), "usqadd v30.1d, v29.1d"); + // TEST_SINGLE(usqadd(SubRegSize::i64Bit, DReg::d30, DReg::d29), "usqadd v30.1d, v29.1d"); TEST_SINGLE(clz(SubRegSize::i8Bit, QReg::q30, QReg::q29), "clz v30.16b, v29.16b"); TEST_SINGLE(clz(SubRegSize::i16Bit, QReg::q30, QReg::q29), "clz v30.8h, v29.8h"); TEST_SINGLE(clz(SubRegSize::i32Bit, QReg::q30, QReg::q29), "clz v30.4s, v29.4s"); - //TEST_SINGLE(clz(SubRegSize::i64Bit, QReg::q30, QReg::q29), "clz v30.2d, v29.2d"); + // TEST_SINGLE(clz(SubRegSize::i64Bit, QReg::q30, QReg::q29), "clz v30.2d, v29.2d"); TEST_SINGLE(clz(SubRegSize::i8Bit, DReg::d30, DReg::d29), "clz v30.8b, v29.8b"); TEST_SINGLE(clz(SubRegSize::i16Bit, DReg::d30, DReg::d29), "clz v30.4h, v29.4h"); TEST_SINGLE(clz(SubRegSize::i32Bit, DReg::d30, DReg::d29), "clz v30.2s, v29.2s"); - //TEST_SINGLE(clz(SubRegSize::i64Bit, DReg::d30, DReg::d29), "clz v30.1d, v29.1d"); + // TEST_SINGLE(clz(SubRegSize::i64Bit, DReg::d30, DReg::d29), "clz v30.1d, v29.1d"); - //TEST_SINGLE(uadalp(SubRegSize::i8Bit, QReg::q30, QReg::q29), "uadalp v30.16b, v29.16b"); + // TEST_SINGLE(uadalp(SubRegSize::i8Bit, QReg::q30, QReg::q29), "uadalp v30.16b, v29.16b"); TEST_SINGLE(uadalp(SubRegSize::i16Bit, QReg::q30, QReg::q29), "uadalp v30.8h, v29.16b"); TEST_SINGLE(uadalp(SubRegSize::i32Bit, QReg::q30, QReg::q29), "uadalp v30.4s, v29.8h"); TEST_SINGLE(uadalp(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uadalp v30.2d, v29.4s"); - //TEST_SINGLE(uadalp(SubRegSize::i8Bit, DReg::d30, DReg::d29), "uadalp v30.8b, v29.8b"); + // TEST_SINGLE(uadalp(SubRegSize::i8Bit, DReg::d30, DReg::d29), "uadalp v30.8b, v29.8b"); TEST_SINGLE(uadalp(SubRegSize::i16Bit, DReg::d30, DReg::d29), "uadalp v30.4h, v29.8b"); TEST_SINGLE(uadalp(SubRegSize::i32Bit, DReg::d30, DReg::d29), "uadalp v30.2s, v29.4h"); TEST_SINGLE(uadalp(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uadalp v30.1d, v29.2s"); @@ -778,7 +784,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD two-register m TEST_SINGLE(sqneg(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sqneg v30.8b, v29.8b"); TEST_SINGLE(sqneg(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sqneg v30.4h, v29.4h"); TEST_SINGLE(sqneg(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sqneg v30.2s, v29.2s"); - //TEST_SINGLE(sqneg(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqneg v30.1d, v29.1d"); + // TEST_SINGLE(sqneg(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqneg v30.1d, v29.1d"); TEST_SINGLE(cmge(SubRegSize::i8Bit, QReg::q30, QReg::q29), "cmge v30.16b, v29.16b, #0"); TEST_SINGLE(cmge(SubRegSize::i16Bit, QReg::q30, QReg::q29), "cmge v30.8h, v29.8h, #0"); @@ -788,7 +794,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD two-register m TEST_SINGLE(cmge(SubRegSize::i8Bit, DReg::d30, DReg::d29), "cmge v30.8b, v29.8b, #0"); TEST_SINGLE(cmge(SubRegSize::i16Bit, DReg::d30, DReg::d29), "cmge v30.4h, v29.4h, #0"); TEST_SINGLE(cmge(SubRegSize::i32Bit, DReg::d30, DReg::d29), "cmge v30.2s, v29.2s, #0"); - //TEST_SINGLE(cmge(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cmge v30.1d, v29.1d, #0"); + // TEST_SINGLE(cmge(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cmge v30.1d, v29.1d, #0"); // TEST_SINGLE(cmle(SubRegSize::i8Bit, QReg::q30, QReg::q29), "cmle v30.16b, v29.16b, #0"); TEST_SINGLE(cmle(SubRegSize::i16Bit, QReg::q30, QReg::q29), "cmle v30.8h, v29.8h, #0"); @@ -798,7 +804,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD two-register m TEST_SINGLE(cmle(SubRegSize::i8Bit, DReg::d30, DReg::d29), "cmle v30.8b, v29.8b, #0"); TEST_SINGLE(cmle(SubRegSize::i16Bit, DReg::d30, DReg::d29), "cmle v30.4h, v29.4h, #0"); TEST_SINGLE(cmle(SubRegSize::i32Bit, DReg::d30, DReg::d29), "cmle v30.2s, v29.2s, #0"); - //TEST_SINGLE(cmle(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cmle v30.1d, v29.1d, #0"); + // TEST_SINGLE(cmle(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cmle v30.1d, v29.1d, #0"); TEST_SINGLE(neg(SubRegSize::i8Bit, QReg::q30, QReg::q29), "neg v30.16b, v29.16b"); TEST_SINGLE(neg(SubRegSize::i16Bit, QReg::q30, QReg::q29), "neg v30.8h, v29.8h"); @@ -808,34 +814,34 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD two-register m TEST_SINGLE(neg(SubRegSize::i8Bit, DReg::d30, DReg::d29), "neg v30.8b, v29.8b"); TEST_SINGLE(neg(SubRegSize::i16Bit, DReg::d30, DReg::d29), "neg v30.4h, v29.4h"); TEST_SINGLE(neg(SubRegSize::i32Bit, DReg::d30, DReg::d29), "neg v30.2s, v29.2s"); - //TEST_SINGLE(neg(SubRegSize::i64Bit, DReg::d30, DReg::d29), "neg v30.1d, v29.1d"); + // TEST_SINGLE(neg(SubRegSize::i64Bit, DReg::d30, DReg::d29), "neg v30.1d, v29.1d"); TEST_SINGLE(sqxtun(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sqxtun v30.8b, v29.8h"); TEST_SINGLE(sqxtun(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sqxtun v30.4h, v29.4s"); TEST_SINGLE(sqxtun(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sqxtun v30.2s, v29.2d"); - //TEST_SINGLE(sqxtun(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sqxtun v30.2d, v29.1d"); + // TEST_SINGLE(sqxtun(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sqxtun v30.2d, v29.1d"); TEST_SINGLE(sqxtun(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sqxtun v30.8b, v29.8h"); TEST_SINGLE(sqxtun(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sqxtun v30.4h, v29.4s"); TEST_SINGLE(sqxtun(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sqxtun v30.2s, v29.2d"); - //TEST_SINGLE(sqxtun(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqxtun v30.1d, v29.1d"); + // TEST_SINGLE(sqxtun(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqxtun v30.1d, v29.1d"); TEST_SINGLE(sqxtun2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sqxtun2 v30.16b, v29.8h"); TEST_SINGLE(sqxtun2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sqxtun2 v30.8h, v29.4s"); TEST_SINGLE(sqxtun2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sqxtun2 v30.4s, v29.2d"); - //TEST_SINGLE(sqxtun2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sqxtun2 v30.2d, v29.1d"); + // TEST_SINGLE(sqxtun2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sqxtun2 v30.2d, v29.1d"); TEST_SINGLE(sqxtun2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sqxtun2 v30.16b, v29.8h"); TEST_SINGLE(sqxtun2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sqxtun2 v30.8h, v29.4s"); TEST_SINGLE(sqxtun2(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sqxtun2 v30.4s, v29.2d"); - //TEST_SINGLE(sqxtun2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqxtun2 v30.2d, v29.1d"); + // TEST_SINGLE(sqxtun2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqxtun2 v30.2d, v29.1d"); - //TEST_SINGLE(shll(SubRegSize::i8Bit, DReg::d30, DReg::d29), "shll v30.8b, v29.8b, #0"); + // TEST_SINGLE(shll(SubRegSize::i8Bit, DReg::d30, DReg::d29), "shll v30.8b, v29.8b, #0"); TEST_SINGLE(shll(SubRegSize::i16Bit, DReg::d30, DReg::d29), "shll v30.8h, v29.8b, #8"); TEST_SINGLE(shll(SubRegSize::i32Bit, DReg::d30, DReg::d29), "shll v30.4s, v29.4h, #16"); TEST_SINGLE(shll(SubRegSize::i64Bit, DReg::d30, DReg::d29), "shll v30.2d, v29.2s, #32"); - //TEST_SINGLE(shll2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "shll2 v30.16b, v29.16b, #0"); + // TEST_SINGLE(shll2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "shll2 v30.16b, v29.16b, #0"); TEST_SINGLE(shll2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "shll2 v30.8h, v29.16b, #8"); TEST_SINGLE(shll2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "shll2 v30.4s, v29.8h, #16"); TEST_SINGLE(shll2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "shll2 v30.2d, v29.4s, #32"); @@ -843,307 +849,307 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD two-register m TEST_SINGLE(uqxtn(SubRegSize::i8Bit, QReg::q30, QReg::q29), "uqxtn v30.8b, v29.8h"); TEST_SINGLE(uqxtn(SubRegSize::i16Bit, QReg::q30, QReg::q29), "uqxtn v30.4h, v29.4s"); TEST_SINGLE(uqxtn(SubRegSize::i32Bit, QReg::q30, QReg::q29), "uqxtn v30.2s, v29.2d"); - //TEST_SINGLE(uqxtn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uqxtn v30.2d, v29.1d"); + // TEST_SINGLE(uqxtn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uqxtn v30.2d, v29.1d"); TEST_SINGLE(uqxtn(SubRegSize::i8Bit, DReg::d30, DReg::d29), "uqxtn v30.8b, v29.8h"); TEST_SINGLE(uqxtn(SubRegSize::i16Bit, DReg::d30, DReg::d29), "uqxtn v30.4h, v29.4s"); TEST_SINGLE(uqxtn(SubRegSize::i32Bit, DReg::d30, DReg::d29), "uqxtn v30.2s, v29.2d"); - //TEST_SINGLE(uqxtn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uqxtn v30.1d, v29.1d"); + // TEST_SINGLE(uqxtn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uqxtn v30.1d, v29.1d"); TEST_SINGLE(uqxtn2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "uqxtn2 v30.16b, v29.8h"); TEST_SINGLE(uqxtn2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "uqxtn2 v30.8h, v29.4s"); TEST_SINGLE(uqxtn2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "uqxtn2 v30.4s, v29.2d"); - //TEST_SINGLE(uqxtn2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uqxtn2 v30.2d, v29.1d"); + // TEST_SINGLE(uqxtn2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uqxtn2 v30.2d, v29.1d"); TEST_SINGLE(uqxtn2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "uqxtn2 v30.16b, v29.8h"); TEST_SINGLE(uqxtn2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "uqxtn2 v30.8h, v29.4s"); TEST_SINGLE(uqxtn2(SubRegSize::i32Bit, DReg::d30, DReg::d29), "uqxtn2 v30.4s, v29.2d"); - //TEST_SINGLE(uqxtn2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uqxtn2 v30.2d, v29.1d"); + // TEST_SINGLE(uqxtn2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uqxtn2 v30.2d, v29.1d"); // - //TEST_SINGLE(fcvtxn(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtxn v30.8b, v29.8h"); - //TEST_SINGLE(fcvtxn(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtxn v30.4h, v29.4s"); + // TEST_SINGLE(fcvtxn(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtxn v30.8b, v29.8h"); + // TEST_SINGLE(fcvtxn(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtxn v30.4h, v29.4s"); TEST_SINGLE(fcvtxn(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtxn v30.2s, v29.2d"); - //TEST_SINGLE(fcvtxn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtxn v30.2d, v29.1d"); + // TEST_SINGLE(fcvtxn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtxn v30.2d, v29.1d"); - //TEST_SINGLE(fcvtxn(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtxn v30.8b, v29.8h"); - //TEST_SINGLE(fcvtxn(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fcvtxn v30.4h, v29.4s"); + // TEST_SINGLE(fcvtxn(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtxn v30.8b, v29.8h"); + // TEST_SINGLE(fcvtxn(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fcvtxn v30.4h, v29.4s"); TEST_SINGLE(fcvtxn(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtxn v30.2s, v29.2d"); - //TEST_SINGLE(fcvtxn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtxn v30.1d, v29.1d"); + // TEST_SINGLE(fcvtxn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtxn v30.1d, v29.1d"); - //TEST_SINGLE(fcvtxn2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtxn2 v30.16b, v29.8h"); - //TEST_SINGLE(fcvtxn2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtxn2 v30.8h, v29.4s"); + // TEST_SINGLE(fcvtxn2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtxn2 v30.16b, v29.8h"); + // TEST_SINGLE(fcvtxn2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtxn2 v30.8h, v29.4s"); TEST_SINGLE(fcvtxn2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtxn2 v30.4s, v29.2d"); - //TEST_SINGLE(fcvtxn2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtxn2 v30.2d, v29.1d"); + // TEST_SINGLE(fcvtxn2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtxn2 v30.2d, v29.1d"); - //TEST_SINGLE(fcvtxn2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtxn2 v30.16b, v29.8h"); - //TEST_SINGLE(fcvtxn2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fcvtxn2 v30.8h, v29.4s"); + // TEST_SINGLE(fcvtxn2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtxn2 v30.16b, v29.8h"); + // TEST_SINGLE(fcvtxn2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fcvtxn2 v30.8h, v29.4s"); TEST_SINGLE(fcvtxn2(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtxn2 v30.4s, v29.2d"); - //TEST_SINGLE(fcvtxn2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtxn2 v30.2d, v29.1d"); + // TEST_SINGLE(fcvtxn2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtxn2 v30.2d, v29.1d"); TEST_SINGLE(frinta(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frinta v30.4s, v29.4s"); TEST_SINGLE(frinta(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frinta v30.2d, v29.2d"); TEST_SINGLE(frinta(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frinta v30.2s, v29.2s"); - //TEST_SINGLE(frinta(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frinta v30.1d, v29.1d"); + // TEST_SINGLE(frinta(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frinta v30.1d, v29.1d"); TEST_SINGLE(frintx(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frintx v30.4s, v29.4s"); TEST_SINGLE(frintx(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frintx v30.2d, v29.2d"); TEST_SINGLE(frintx(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frintx v30.2s, v29.2s"); - //TEST_SINGLE(frintx(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frintx v30.1d, v29.1d"); + // TEST_SINGLE(frintx(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frintx v30.1d, v29.1d"); TEST_SINGLE(fcvtnu(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtnu v30.4s, v29.4s"); TEST_SINGLE(fcvtnu(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtnu v30.2d, v29.2d"); TEST_SINGLE(fcvtnu(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtnu v30.2s, v29.2s"); - //TEST_SINGLE(fcvtnu(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtnu v30.1d, v29.1d"); + // TEST_SINGLE(fcvtnu(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtnu v30.1d, v29.1d"); TEST_SINGLE(fcvtmu(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtmu v30.4s, v29.4s"); TEST_SINGLE(fcvtmu(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtmu v30.2d, v29.2d"); TEST_SINGLE(fcvtmu(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtmu v30.2s, v29.2s"); - //TEST_SINGLE(fcvtmu(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtmu v30.1d, v29.1d"); + // TEST_SINGLE(fcvtmu(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtmu v30.1d, v29.1d"); TEST_SINGLE(fcvtau(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtau v30.4s, v29.4s"); TEST_SINGLE(fcvtau(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtau v30.2d, v29.2d"); TEST_SINGLE(fcvtau(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtau v30.2s, v29.2s"); - //TEST_SINGLE(fcvtau(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtau v30.1d, v29.1d"); + // TEST_SINGLE(fcvtau(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtau v30.1d, v29.1d"); TEST_SINGLE(ucvtf(SubRegSize::i32Bit, QReg::q30, QReg::q29), "ucvtf v30.4s, v29.4s"); TEST_SINGLE(ucvtf(SubRegSize::i64Bit, QReg::q30, QReg::q29), "ucvtf v30.2d, v29.2d"); TEST_SINGLE(ucvtf(SubRegSize::i32Bit, DReg::d30, DReg::d29), "ucvtf v30.2s, v29.2s"); - //TEST_SINGLE(ucvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29), "ucvtf v30.1d, v29.1d"); + // TEST_SINGLE(ucvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29), "ucvtf v30.1d, v29.1d"); TEST_SINGLE(frint32x(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frint32x v30.4s, v29.4s"); TEST_SINGLE(frint32x(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frint32x v30.2d, v29.2d"); TEST_SINGLE(frint32x(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frint32x v30.2s, v29.2s"); - //TEST_SINGLE(frint32x(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frint32x v30.1d, v29.1d"); + // TEST_SINGLE(frint32x(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frint32x v30.1d, v29.1d"); TEST_SINGLE(frint64x(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frint64x v30.4s, v29.4s"); TEST_SINGLE(frint64x(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frint64x v30.2d, v29.2d"); TEST_SINGLE(frint64x(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frint64x v30.2s, v29.2s"); - //TEST_SINGLE(frint64x(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frint64x v30.1d, v29.1d"); + // TEST_SINGLE(frint64x(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frint64x v30.1d, v29.1d"); TEST_SINGLE(not_(SubRegSize::i8Bit, QReg::q30, QReg::q29), "mvn v30.16b, v29.16b"); - //TEST_SINGLE(not_(SubRegSize::i16Bit, QReg::q30, QReg::q29), "not v30.8h, v29.8h"); - //TEST_SINGLE(not_(SubRegSize::i32Bit, QReg::q30, QReg::q29), "not v30.4s, v29.4s"); - //TEST_SINGLE(not_(SubRegSize::i64Bit, QReg::q30, QReg::q29), "not v30.2d, v29.2d"); + // TEST_SINGLE(not_(SubRegSize::i16Bit, QReg::q30, QReg::q29), "not v30.8h, v29.8h"); + // TEST_SINGLE(not_(SubRegSize::i32Bit, QReg::q30, QReg::q29), "not v30.4s, v29.4s"); + // TEST_SINGLE(not_(SubRegSize::i64Bit, QReg::q30, QReg::q29), "not v30.2d, v29.2d"); TEST_SINGLE(not_(SubRegSize::i8Bit, DReg::d30, DReg::d29), "mvn v30.8b, v29.8b"); - //TEST_SINGLE(not_(SubRegSize::i16Bit, DReg::d30, DReg::d29), "not v30.4h, v29.4h"); - //TEST_SINGLE(not_(SubRegSize::i32Bit, DReg::d30, DReg::d29), "not v30.2s, v29.2s"); - //TEST_SINGLE(not_(SubRegSize::i64Bit, DReg::d30, DReg::d29), "not v30.1d, v29.1d"); + // TEST_SINGLE(not_(SubRegSize::i16Bit, DReg::d30, DReg::d29), "not v30.4h, v29.4h"); + // TEST_SINGLE(not_(SubRegSize::i32Bit, DReg::d30, DReg::d29), "not v30.2s, v29.2s"); + // TEST_SINGLE(not_(SubRegSize::i64Bit, DReg::d30, DReg::d29), "not v30.1d, v29.1d"); TEST_SINGLE(mvn(SubRegSize::i8Bit, QReg::q30, QReg::q29), "mvn v30.16b, v29.16b"); - //TEST_SINGLE(mvn(SubRegSize::i16Bit, QReg::q30, QReg::q29), "mvn v30.8h, v29.8h"); - //TEST_SINGLE(mvn(SubRegSize::i32Bit, QReg::q30, QReg::q29), "mvn v30.4s, v29.4s"); - //TEST_SINGLE(mvn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "mvn v30.2d, v29.2d"); + // TEST_SINGLE(mvn(SubRegSize::i16Bit, QReg::q30, QReg::q29), "mvn v30.8h, v29.8h"); + // TEST_SINGLE(mvn(SubRegSize::i32Bit, QReg::q30, QReg::q29), "mvn v30.4s, v29.4s"); + // TEST_SINGLE(mvn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "mvn v30.2d, v29.2d"); TEST_SINGLE(mvn(SubRegSize::i8Bit, DReg::d30, DReg::d29), "mvn v30.8b, v29.8b"); - //TEST_SINGLE(mvn(SubRegSize::i16Bit, DReg::d30, DReg::d29), "mvn v30.4h, v29.4h"); - //TEST_SINGLE(mvn(SubRegSize::i32Bit, DReg::d30, DReg::d29), "mvn v30.2s, v29.2s"); - //TEST_SINGLE(mvn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "mvn v30.1d, v29.1d"); + // TEST_SINGLE(mvn(SubRegSize::i16Bit, DReg::d30, DReg::d29), "mvn v30.4h, v29.4h"); + // TEST_SINGLE(mvn(SubRegSize::i32Bit, DReg::d30, DReg::d29), "mvn v30.2s, v29.2s"); + // TEST_SINGLE(mvn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "mvn v30.1d, v29.1d"); TEST_SINGLE(rbit(SubRegSize::i8Bit, QReg::q30, QReg::q29), "rbit v30.16b, v29.16b"); - //TEST_SINGLE(rbit(SubRegSize::i16Bit, QReg::q30, QReg::q29), "rbit v30.8h, v29.8h"); - //TEST_SINGLE(rbit(SubRegSize::i32Bit, QReg::q30, QReg::q29), "rbit v30.4s, v29.4s"); - //TEST_SINGLE(rbit(SubRegSize::i64Bit, QReg::q30, QReg::q29), "rbit v30.2d, v29.2d"); + // TEST_SINGLE(rbit(SubRegSize::i16Bit, QReg::q30, QReg::q29), "rbit v30.8h, v29.8h"); + // TEST_SINGLE(rbit(SubRegSize::i32Bit, QReg::q30, QReg::q29), "rbit v30.4s, v29.4s"); + // TEST_SINGLE(rbit(SubRegSize::i64Bit, QReg::q30, QReg::q29), "rbit v30.2d, v29.2d"); TEST_SINGLE(rbit(SubRegSize::i8Bit, DReg::d30, DReg::d29), "rbit v30.8b, v29.8b"); - //TEST_SINGLE(rbit(SubRegSize::i16Bit, DReg::d30, DReg::d29), "rbit v30.4h, v29.4h"); - //TEST_SINGLE(rbit(SubRegSize::i32Bit, DReg::d30, DReg::d29), "rbit v30.2s, v29.2s"); - //TEST_SINGLE(rbit(SubRegSize::i64Bit, DReg::d30, DReg::d29), "rbit v30.1d, v29.1d"); + // TEST_SINGLE(rbit(SubRegSize::i16Bit, DReg::d30, DReg::d29), "rbit v30.4h, v29.4h"); + // TEST_SINGLE(rbit(SubRegSize::i32Bit, DReg::d30, DReg::d29), "rbit v30.2s, v29.2s"); + // TEST_SINGLE(rbit(SubRegSize::i64Bit, DReg::d30, DReg::d29), "rbit v30.1d, v29.1d"); TEST_SINGLE(fcmge(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcmge v30.4s, v29.4s, #0.0"); TEST_SINGLE(fcmge(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcmge v30.2d, v29.2d, #0.0"); TEST_SINGLE(fcmge(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcmge v30.2s, v29.2s, #0.0"); - //TEST_SINGLE(fcmge(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcmge v30.1d, v29.1d, #0.0"); + // TEST_SINGLE(fcmge(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcmge v30.1d, v29.1d, #0.0"); TEST_SINGLE(fcmle(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcmle v30.4s, v29.4s, #0.0"); TEST_SINGLE(fcmle(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcmle v30.2d, v29.2d, #0.0"); TEST_SINGLE(fcmle(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcmle v30.2s, v29.2s, #0.0"); - //TEST_SINGLE(fcmle(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcmle v30.1d, v29.1d, #0.0"); + // TEST_SINGLE(fcmle(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcmle v30.1d, v29.1d, #0.0"); TEST_SINGLE(fneg(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fneg v30.4s, v29.4s"); TEST_SINGLE(fneg(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fneg v30.2d, v29.2d"); TEST_SINGLE(fneg(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fneg v30.2s, v29.2s"); - //TEST_SINGLE(fneg(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fneg v30.1d, v29.1d"); + // TEST_SINGLE(fneg(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fneg v30.1d, v29.1d"); TEST_SINGLE(frinti(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frinti v30.4s, v29.4s"); TEST_SINGLE(frinti(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frinti v30.2d, v29.2d"); TEST_SINGLE(frinti(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frinti v30.2s, v29.2s"); - //TEST_SINGLE(frinti(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frinti v30.1d, v29.1d"); + // TEST_SINGLE(frinti(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frinti v30.1d, v29.1d"); TEST_SINGLE(fcvtpu(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtpu v30.4s, v29.4s"); TEST_SINGLE(fcvtpu(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtpu v30.2d, v29.2d"); TEST_SINGLE(fcvtpu(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtpu v30.2s, v29.2s"); - //TEST_SINGLE(fcvtpu(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtpu v30.1d, v29.1d"); + // TEST_SINGLE(fcvtpu(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtpu v30.1d, v29.1d"); TEST_SINGLE(fcvtzu(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtzu v30.4s, v29.4s"); TEST_SINGLE(fcvtzu(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtzu v30.2d, v29.2d"); TEST_SINGLE(fcvtzu(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtzu v30.2s, v29.2s"); - //TEST_SINGLE(fcvtzu(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtzu v30.1d, v29.1d"); + // TEST_SINGLE(fcvtzu(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtzu v30.1d, v29.1d"); TEST_SINGLE(ursqrte(SubRegSize::i32Bit, QReg::q30, QReg::q29), "ursqrte v30.4s, v29.4s"); - //TEST_SINGLE(ursqrte(SubRegSize::i64Bit, QReg::q30, QReg::q29), "ursqrte v30.2d, v29.2d"); + // TEST_SINGLE(ursqrte(SubRegSize::i64Bit, QReg::q30, QReg::q29), "ursqrte v30.2d, v29.2d"); TEST_SINGLE(ursqrte(SubRegSize::i32Bit, DReg::d30, DReg::d29), "ursqrte v30.2s, v29.2s"); - //TEST_SINGLE(ursqrte(SubRegSize::i64Bit, DReg::d30, DReg::d29), "ursqrte v30.1d, v29.1d"); + // TEST_SINGLE(ursqrte(SubRegSize::i64Bit, DReg::d30, DReg::d29), "ursqrte v30.1d, v29.1d"); TEST_SINGLE(frsqrte(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frsqrte v30.4s, v29.4s"); TEST_SINGLE(frsqrte(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frsqrte v30.2d, v29.2d"); TEST_SINGLE(frsqrte(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frsqrte v30.2s, v29.2s"); - //TEST_SINGLE(frsqrte(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frsqrte v30.1d, v29.1d"); + // TEST_SINGLE(frsqrte(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frsqrte v30.1d, v29.1d"); TEST_SINGLE(fsqrt(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fsqrt v30.4s, v29.4s"); TEST_SINGLE(fsqrt(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fsqrt v30.2d, v29.2d"); TEST_SINGLE(fsqrt(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fsqrt v30.2s, v29.2s"); - //TEST_SINGLE(fsqrt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fsqrt v30.1d, v29.1d"); + // TEST_SINGLE(fsqrt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fsqrt v30.1d, v29.1d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD across lanes") { - //TEST_SINGLE(saddlv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "saddlv v30.16b, v29.16b"); + // TEST_SINGLE(saddlv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "saddlv v30.16b, v29.16b"); TEST_SINGLE(saddlv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "saddlv h30, v29.16b"); TEST_SINGLE(saddlv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "saddlv s30, v29.8h"); TEST_SINGLE(saddlv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "saddlv d30, v29.4s"); - //TEST_SINGLE(saddlv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "saddlv v30.8b, v29.8b"); + // TEST_SINGLE(saddlv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "saddlv v30.8b, v29.8b"); TEST_SINGLE(saddlv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "saddlv h30, v29.8b"); TEST_SINGLE(saddlv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "saddlv s30, v29.4h"); - //TEST_SINGLE(saddlv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "saddlv d30, v29.1d"); + // TEST_SINGLE(saddlv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "saddlv d30, v29.1d"); TEST_SINGLE(smaxv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "smaxv b30, v29.16b"); TEST_SINGLE(smaxv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "smaxv h30, v29.8h"); TEST_SINGLE(smaxv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "smaxv s30, v29.4s"); - //TEST_SINGLE(smaxv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "smaxv d30, v29.4s"); + // TEST_SINGLE(smaxv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "smaxv d30, v29.4s"); TEST_SINGLE(smaxv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "smaxv b30, v29.8b"); TEST_SINGLE(smaxv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "smaxv h30, v29.4h"); - //TEST_SINGLE(smaxv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "smaxv s30, v29.2s"); - //TEST_SINGLE(smaxv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "smaxv d30, v29.1d"); + // TEST_SINGLE(smaxv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "smaxv s30, v29.2s"); + // TEST_SINGLE(smaxv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "smaxv d30, v29.1d"); TEST_SINGLE(sminv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sminv b30, v29.16b"); TEST_SINGLE(sminv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sminv h30, v29.8h"); TEST_SINGLE(sminv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sminv s30, v29.4s"); - //TEST_SINGLE(sminv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sminv d30, v29.4s"); + // TEST_SINGLE(sminv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sminv d30, v29.4s"); TEST_SINGLE(sminv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sminv b30, v29.8b"); TEST_SINGLE(sminv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sminv h30, v29.4h"); - //TEST_SINGLE(sminv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sminv s30, v29.2s"); - //TEST_SINGLE(sminv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sminv d30, v29.1d"); + // TEST_SINGLE(sminv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sminv s30, v29.2s"); + // TEST_SINGLE(sminv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sminv d30, v29.1d"); TEST_SINGLE(addv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "addv b30, v29.16b"); TEST_SINGLE(addv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "addv h30, v29.8h"); TEST_SINGLE(addv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "addv s30, v29.4s"); - //TEST_SINGLE(addv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "addv d30, v29.4s"); + // TEST_SINGLE(addv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "addv d30, v29.4s"); TEST_SINGLE(addv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "addv b30, v29.8b"); TEST_SINGLE(addv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "addv h30, v29.4h"); - //TEST_SINGLE(addv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "addv s30, v29.2s"); - //TEST_SINGLE(addv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "addv d30, v29.1d"); + // TEST_SINGLE(addv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "addv s30, v29.2s"); + // TEST_SINGLE(addv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "addv d30, v29.1d"); - //TEST_SINGLE(uaddlv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "uaddlv v30.16b, v29.16b"); + // TEST_SINGLE(uaddlv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "uaddlv v30.16b, v29.16b"); TEST_SINGLE(uaddlv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "uaddlv h30, v29.16b"); TEST_SINGLE(uaddlv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "uaddlv s30, v29.8h"); TEST_SINGLE(uaddlv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uaddlv d30, v29.4s"); - //TEST_SINGLE(uaddlv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "uaddlv v30.8b, v29.8b"); + // TEST_SINGLE(uaddlv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "uaddlv v30.8b, v29.8b"); TEST_SINGLE(uaddlv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "uaddlv h30, v29.8b"); TEST_SINGLE(uaddlv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "uaddlv s30, v29.4h"); - //TEST_SINGLE(uaddlv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uaddlv d30, v29.1d"); + // TEST_SINGLE(uaddlv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uaddlv d30, v29.1d"); TEST_SINGLE(umaxv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "umaxv b30, v29.16b"); TEST_SINGLE(umaxv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "umaxv h30, v29.8h"); TEST_SINGLE(umaxv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "umaxv s30, v29.4s"); - //TEST_SINGLE(umaxv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "umaxv d30, v29.4s"); + // TEST_SINGLE(umaxv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "umaxv d30, v29.4s"); TEST_SINGLE(umaxv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "umaxv b30, v29.8b"); TEST_SINGLE(umaxv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "umaxv h30, v29.4h"); - //TEST_SINGLE(umaxv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "umaxv s30, v29.2s"); - //TEST_SINGLE(umaxv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "umaxv d30, v29.1d"); + // TEST_SINGLE(umaxv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "umaxv s30, v29.2s"); + // TEST_SINGLE(umaxv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "umaxv d30, v29.1d"); TEST_SINGLE(uminv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "uminv b30, v29.16b"); TEST_SINGLE(uminv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "uminv h30, v29.8h"); TEST_SINGLE(uminv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "uminv s30, v29.4s"); - //TEST_SINGLE(uminv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uminv d30, v29.4s"); + // TEST_SINGLE(uminv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uminv d30, v29.4s"); TEST_SINGLE(uminv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "uminv b30, v29.8b"); TEST_SINGLE(uminv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "uminv h30, v29.4h"); - //TEST_SINGLE(uminv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "uminv s30, v29.2s"); - //TEST_SINGLE(uminv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uminv d30, v29.1d"); + // TEST_SINGLE(uminv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "uminv s30, v29.2s"); + // TEST_SINGLE(uminv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uminv d30, v29.1d"); - //TEST_SINGLE(fmaxnmv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fmaxnmv b30, v29.16b"); + // TEST_SINGLE(fmaxnmv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fmaxnmv b30, v29.16b"); TEST_SINGLE(fmaxnmv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fmaxnmv h30, v29.8h"); TEST_SINGLE(fmaxnmv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fmaxnmv s30, v29.4s"); - //TEST_SINGLE(fmaxnmv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fmaxnmv d30, v29.4s"); + // TEST_SINGLE(fmaxnmv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fmaxnmv d30, v29.4s"); - //TEST_SINGLE(fmaxnmv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fmaxnmv b30, v29.8b"); + // TEST_SINGLE(fmaxnmv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fmaxnmv b30, v29.8b"); TEST_SINGLE(fmaxnmv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fmaxnmv h30, v29.4h"); - //TEST_SINGLE(fmaxnmv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fmaxnmv s30, v29.2s"); - //TEST_SINGLE(fmaxnmv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fmaxnmv d30, v29.1d"); + // TEST_SINGLE(fmaxnmv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fmaxnmv s30, v29.2s"); + // TEST_SINGLE(fmaxnmv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fmaxnmv d30, v29.1d"); - //TEST_SINGLE(fmaxv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fmaxv b30, v29.16b"); + // TEST_SINGLE(fmaxv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fmaxv b30, v29.16b"); TEST_SINGLE(fmaxv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fmaxv h30, v29.8h"); TEST_SINGLE(fmaxv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fmaxv s30, v29.4s"); - //TEST_SINGLE(fmaxv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fmaxv d30, v29.4s"); + // TEST_SINGLE(fmaxv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fmaxv d30, v29.4s"); - //TEST_SINGLE(fmaxv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fmaxv b30, v29.8b"); + // TEST_SINGLE(fmaxv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fmaxv b30, v29.8b"); TEST_SINGLE(fmaxv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fmaxv h30, v29.4h"); - //TEST_SINGLE(fmaxv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fmaxv s30, v29.2s"); - //TEST_SINGLE(fmaxv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fmaxv d30, v29.1d"); + // TEST_SINGLE(fmaxv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fmaxv s30, v29.2s"); + // TEST_SINGLE(fmaxv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fmaxv d30, v29.1d"); - //TEST_SINGLE(fminnmv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fminnmv b30, v29.16b"); + // TEST_SINGLE(fminnmv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fminnmv b30, v29.16b"); TEST_SINGLE(fminnmv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fminnmv h30, v29.8h"); TEST_SINGLE(fminnmv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fminnmv s30, v29.4s"); - //TEST_SINGLE(fminnmv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fminnmv d30, v29.4s"); + // TEST_SINGLE(fminnmv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fminnmv d30, v29.4s"); - //TEST_SINGLE(fminnmv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fminnmv b30, v29.8b"); + // TEST_SINGLE(fminnmv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fminnmv b30, v29.8b"); TEST_SINGLE(fminnmv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fminnmv h30, v29.4h"); - //TEST_SINGLE(fminnmv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fminnmv s30, v29.2s"); - //TEST_SINGLE(fminnmv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fminnmv d30, v29.1d"); + // TEST_SINGLE(fminnmv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fminnmv s30, v29.2s"); + // TEST_SINGLE(fminnmv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fminnmv d30, v29.1d"); - //TEST_SINGLE(fminv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fminv b30, v29.16b"); + // TEST_SINGLE(fminv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fminv b30, v29.16b"); TEST_SINGLE(fminv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fminv h30, v29.8h"); TEST_SINGLE(fminv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fminv s30, v29.4s"); - //TEST_SINGLE(fminv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fminv d30, v29.4s"); + // TEST_SINGLE(fminv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fminv d30, v29.4s"); - //TEST_SINGLE(fminv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fminv b30, v29.8b"); + // TEST_SINGLE(fminv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fminv b30, v29.8b"); TEST_SINGLE(fminv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fminv h30, v29.4h"); - //TEST_SINGLE(fminv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fminv s30, v29.2s"); - //TEST_SINGLE(fminv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fminv d30, v29.1d"); + // TEST_SINGLE(fminv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fminv s30, v29.2s"); + // TEST_SINGLE(fminv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fminv d30, v29.1d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three different") { - //TEST_SINGLE(saddl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "saddl v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(saddl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "saddl v30.8b, v29.8b, v28.8b"); TEST_SINGLE(saddl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "saddl v30.8h, v29.8b, v28.8b"); TEST_SINGLE(saddl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "saddl v30.4s, v29.4h, v28.4h"); TEST_SINGLE(saddl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "saddl v30.2d, v29.2s, v28.2s"); - //TEST_SINGLE(saddl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "saddl2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(saddl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "saddl2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(saddl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "saddl2 v30.8h, v29.16b, v28.16b"); TEST_SINGLE(saddl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "saddl2 v30.4s, v29.8h, v28.8h"); TEST_SINGLE(saddl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "saddl2 v30.2d, v29.4s, v28.4s"); - //TEST_SINGLE(saddw(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "saddw v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(saddw(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "saddw v30.8b, v29.8b, v28.8b"); TEST_SINGLE(saddw(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "saddw v30.8h, v29.8h, v28.8b"); TEST_SINGLE(saddw(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "saddw v30.4s, v29.4s, v28.4h"); TEST_SINGLE(saddw(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "saddw v30.2d, v29.2d, v28.2s"); - //TEST_SINGLE(saddw2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "saddw2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(saddw2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "saddw2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(saddw2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "saddw2 v30.8h, v29.8h, v28.16b"); TEST_SINGLE(saddw2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "saddw2 v30.4s, v29.4s, v28.8h"); TEST_SINGLE(saddw2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "saddw2 v30.2d, v29.2d, v28.4s"); - //TEST_SINGLE(ssubl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "ssubl v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(ssubl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "ssubl v30.8b, v29.8b, v28.8b"); TEST_SINGLE(ssubl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "ssubl v30.8h, v29.8b, v28.8b"); TEST_SINGLE(ssubl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "ssubl v30.4s, v29.4h, v28.4h"); TEST_SINGLE(ssubl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "ssubl v30.2d, v29.2s, v28.2s"); - //TEST_SINGLE(ssubl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "ssubl2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(ssubl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "ssubl2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(ssubl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "ssubl2 v30.8h, v29.16b, v28.16b"); TEST_SINGLE(ssubl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "ssubl2 v30.4s, v29.8h, v28.8h"); TEST_SINGLE(ssubl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "ssubl2 v30.2d, v29.4s, v28.4s"); - //TEST_SINGLE(ssubw(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "ssubw v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(ssubw(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "ssubw v30.8b, v29.8b, v28.8b"); TEST_SINGLE(ssubw(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "ssubw v30.8h, v29.8h, v28.8b"); TEST_SINGLE(ssubw(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "ssubw v30.4s, v29.4s, v28.4h"); TEST_SINGLE(ssubw(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "ssubw v30.2d, v29.2d, v28.2s"); - //TEST_SINGLE(ssubw2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "ssubw2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(ssubw2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "ssubw2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(ssubw2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "ssubw2 v30.8h, v29.8h, v28.16b"); TEST_SINGLE(ssubw2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "ssubw2 v30.4s, v29.4s, v28.8h"); TEST_SINGLE(ssubw2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "ssubw2 v30.2d, v29.2d, v28.4s"); @@ -1151,19 +1157,19 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three differen TEST_SINGLE(addhn(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "addhn v30.8b, v29.8h, v28.8h"); TEST_SINGLE(addhn(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "addhn v30.4h, v29.4s, v28.4s"); TEST_SINGLE(addhn(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "addhn v30.2s, v29.2d, v28.2d"); - //TEST_SINGLE(addhn(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "addhn v30.2d, v29.2d, v28.2s"); + // TEST_SINGLE(addhn(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "addhn v30.2d, v29.2d, v28.2s"); TEST_SINGLE(addhn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "addhn2 v30.16b, v29.8h, v28.8h"); TEST_SINGLE(addhn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "addhn2 v30.8h, v29.4s, v28.4s"); TEST_SINGLE(addhn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "addhn2 v30.4s, v29.2d, v28.2d"); - //TEST_SINGLE(addhn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "addhn2 v30.2d, v29.2d, v28.4s"); + // TEST_SINGLE(addhn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "addhn2 v30.2d, v29.2d, v28.4s"); - //TEST_SINGLE(sabal(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sabal v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(sabal(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sabal v30.8b, v29.8b, v28.8b"); TEST_SINGLE(sabal(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sabal v30.8h, v29.8b, v28.8b"); TEST_SINGLE(sabal(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sabal v30.4s, v29.4h, v28.4h"); TEST_SINGLE(sabal(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sabal v30.2d, v29.2s, v28.2s"); - //TEST_SINGLE(sabal2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sabal2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(sabal2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sabal2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(sabal2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sabal2 v30.8h, v29.16b, v28.16b"); TEST_SINGLE(sabal2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sabal2 v30.4s, v29.8h, v28.8h"); TEST_SINGLE(sabal2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sabal2 v30.2d, v29.4s, v28.4s"); @@ -1171,208 +1177,208 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three differen TEST_SINGLE(subhn(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "subhn v30.8b, v29.8h, v28.8h"); TEST_SINGLE(subhn(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "subhn v30.4h, v29.4s, v28.4s"); TEST_SINGLE(subhn(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "subhn v30.2s, v29.2d, v28.2d"); - //TEST_SINGLE(subhn(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "subhn v30.2d, v29.2d, v28.2s"); + // TEST_SINGLE(subhn(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "subhn v30.2d, v29.2d, v28.2s"); TEST_SINGLE(subhn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "subhn2 v30.16b, v29.8h, v28.8h"); TEST_SINGLE(subhn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "subhn2 v30.8h, v29.4s, v28.4s"); TEST_SINGLE(subhn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "subhn2 v30.4s, v29.2d, v28.2d"); - //TEST_SINGLE(subhn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "subhn2 v30.2d, v29.2d, v28.4s"); + // TEST_SINGLE(subhn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "subhn2 v30.2d, v29.2d, v28.4s"); - //TEST_SINGLE(sabdl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sabdl v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(sabdl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sabdl v30.8b, v29.8b, v28.8b"); TEST_SINGLE(sabdl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sabdl v30.8h, v29.8b, v28.8b"); TEST_SINGLE(sabdl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sabdl v30.4s, v29.4h, v28.4h"); TEST_SINGLE(sabdl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sabdl v30.2d, v29.2s, v28.2s"); - //TEST_SINGLE(sabdl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sabdl2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(sabdl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sabdl2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(sabdl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sabdl2 v30.8h, v29.16b, v28.16b"); TEST_SINGLE(sabdl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sabdl2 v30.4s, v29.8h, v28.8h"); TEST_SINGLE(sabdl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sabdl2 v30.2d, v29.4s, v28.4s"); - //TEST_SINGLE(smlal(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "smlal v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(smlal(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "smlal v30.8b, v29.8b, v28.8b"); TEST_SINGLE(smlal(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "smlal v30.8h, v29.8b, v28.8b"); TEST_SINGLE(smlal(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "smlal v30.4s, v29.4h, v28.4h"); TEST_SINGLE(smlal(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "smlal v30.2d, v29.2s, v28.2s"); - //TEST_SINGLE(smlal2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smlal2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(smlal2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smlal2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(smlal2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "smlal2 v30.8h, v29.16b, v28.16b"); TEST_SINGLE(smlal2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "smlal2 v30.4s, v29.8h, v28.8h"); TEST_SINGLE(smlal2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "smlal2 v30.2d, v29.4s, v28.4s"); - //TEST_SINGLE(sqdmlal(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlal v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(sqdmlal(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlal v30.8h, v29.8b, v28.8b"); + // TEST_SINGLE(sqdmlal(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlal v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(sqdmlal(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlal v30.8h, v29.8b, v28.8b"); TEST_SINGLE(sqdmlal(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlal v30.4s, v29.4h, v28.4h"); TEST_SINGLE(sqdmlal(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlal v30.2d, v29.2s, v28.2s"); - //TEST_SINGLE(sqdmlal2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlal2 v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(sqdmlal2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlal2 v30.8h, v29.16b, v28.16b"); + // TEST_SINGLE(sqdmlal2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlal2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(sqdmlal2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlal2 v30.8h, v29.16b, v28.16b"); TEST_SINGLE(sqdmlal2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlal2 v30.4s, v29.8h, v28.8h"); TEST_SINGLE(sqdmlal2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlal2 v30.2d, v29.4s, v28.4s"); - //TEST_SINGLE(smlsl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "smlsl v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(smlsl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "smlsl v30.8b, v29.8b, v28.8b"); TEST_SINGLE(smlsl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "smlsl v30.8h, v29.8b, v28.8b"); TEST_SINGLE(smlsl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "smlsl v30.4s, v29.4h, v28.4h"); TEST_SINGLE(smlsl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "smlsl v30.2d, v29.2s, v28.2s"); - //TEST_SINGLE(smlsl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smlsl2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(smlsl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smlsl2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(smlsl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "smlsl2 v30.8h, v29.16b, v28.16b"); TEST_SINGLE(smlsl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "smlsl2 v30.4s, v29.8h, v28.8h"); TEST_SINGLE(smlsl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "smlsl2 v30.2d, v29.4s, v28.4s"); - //TEST_SINGLE(sqdmlsl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlsl v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(sqdmlsl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlsl v30.8h, v29.8b, v28.8b"); + // TEST_SINGLE(sqdmlsl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlsl v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(sqdmlsl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlsl v30.8h, v29.8b, v28.8b"); TEST_SINGLE(sqdmlsl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlsl v30.4s, v29.4h, v28.4h"); TEST_SINGLE(sqdmlsl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlsl v30.2d, v29.2s, v28.2s"); - //TEST_SINGLE(sqdmlsl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlsl2 v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(sqdmlsl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlsl2 v30.8h, v29.16b, v28.16b"); + // TEST_SINGLE(sqdmlsl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlsl2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(sqdmlsl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlsl2 v30.8h, v29.16b, v28.16b"); TEST_SINGLE(sqdmlsl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlsl2 v30.4s, v29.8h, v28.8h"); TEST_SINGLE(sqdmlsl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlsl2 v30.2d, v29.4s, v28.4s"); - //TEST_SINGLE(smull(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "smull v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(smull(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "smull v30.8b, v29.8b, v28.8b"); TEST_SINGLE(smull(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "smull v30.8h, v29.8b, v28.8b"); TEST_SINGLE(smull(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "smull v30.4s, v29.4h, v28.4h"); TEST_SINGLE(smull(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "smull v30.2d, v29.2s, v28.2s"); - //TEST_SINGLE(smull2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smull2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(smull2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smull2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(smull2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "smull2 v30.8h, v29.16b, v28.16b"); TEST_SINGLE(smull2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "smull2 v30.4s, v29.8h, v28.8h"); TEST_SINGLE(smull2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "smull2 v30.2d, v29.4s, v28.4s"); - //TEST_SINGLE(sqdmull(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmull v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(sqdmull(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmull v30.8h, v29.8b, v28.8b"); + // TEST_SINGLE(sqdmull(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmull v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(sqdmull(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmull v30.8h, v29.8b, v28.8b"); TEST_SINGLE(sqdmull(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmull v30.4s, v29.4h, v28.4h"); TEST_SINGLE(sqdmull(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmull v30.2d, v29.2s, v28.2s"); - //TEST_SINGLE(sqdmull2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmull2 v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(sqdmull2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmull2 v30.8h, v29.16b, v28.16b"); + // TEST_SINGLE(sqdmull2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmull2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(sqdmull2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmull2 v30.8h, v29.16b, v28.16b"); TEST_SINGLE(sqdmull2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmull2 v30.4s, v29.8h, v28.8h"); TEST_SINGLE(sqdmull2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmull2 v30.2d, v29.4s, v28.4s"); - //TEST_SINGLE(pmull(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "pmull v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(pmull(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "pmull v30.8b, v29.8b, v28.8b"); TEST_SINGLE(pmull(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "pmull v30.8h, v29.8b, v28.8b"); - //TEST_SINGLE(pmull(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "pmull v30.4s, v29.4h, v28.4h"); - //TEST_SINGLE(pmull(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "pmull v30.2d, v29.2s, v28.2s"); + // TEST_SINGLE(pmull(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "pmull v30.4s, v29.4h, v28.4h"); + // TEST_SINGLE(pmull(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "pmull v30.2d, v29.2s, v28.2s"); if (false) { // Vixl doesn't support this TEST_SINGLE(pmull(SubRegSize::i128Bit, DReg::d30, DReg::d29, DReg::d28), "pmull v30.1q, v29.1d, v28.1d"); } - //TEST_SINGLE(pmull2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "pmull2 v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(pmull2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "pmull2 v30.8h, v29.16b, v28.16b"); - //TEST_SINGLE(pmull2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "pmull2 v30.4s, v29.8h, v28.8h"); - //TEST_SINGLE(pmull2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "pmull2 v30.2d, v29.4s, v28.4s"); + // TEST_SINGLE(pmull2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "pmull2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(pmull2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "pmull2 v30.8h, v29.16b, v28.16b"); + // TEST_SINGLE(pmull2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "pmull2 v30.4s, v29.8h, v28.8h"); + // TEST_SINGLE(pmull2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "pmull2 v30.2d, v29.4s, v28.4s"); if (false) { // Vixl doesn't support this TEST_SINGLE(pmull2(SubRegSize::i128Bit, QReg::q30, QReg::q29, QReg::q28), "pmull2 v30.1q, v29.2d, v28.2d"); } - //TEST_SINGLE(uaddl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uaddl v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(uaddl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uaddl v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uaddl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uaddl v30.8h, v29.8b, v28.8b"); TEST_SINGLE(uaddl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uaddl v30.4s, v29.4h, v28.4h"); TEST_SINGLE(uaddl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uaddl v30.2d, v29.2s, v28.2s"); - //TEST_SINGLE(uaddl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uaddl2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(uaddl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uaddl2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uaddl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uaddl2 v30.8h, v29.16b, v28.16b"); TEST_SINGLE(uaddl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uaddl2 v30.4s, v29.8h, v28.8h"); TEST_SINGLE(uaddl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uaddl2 v30.2d, v29.4s, v28.4s"); - //TEST_SINGLE(uaddw(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uaddw v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(uaddw(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uaddw v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uaddw(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uaddw v30.8h, v29.8h, v28.8b"); TEST_SINGLE(uaddw(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uaddw v30.4s, v29.4s, v28.4h"); TEST_SINGLE(uaddw(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uaddw v30.2d, v29.2d, v28.2s"); - //TEST_SINGLE(uaddw2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uaddw2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(uaddw2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uaddw2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uaddw2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uaddw2 v30.8h, v29.8h, v28.16b"); TEST_SINGLE(uaddw2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uaddw2 v30.4s, v29.4s, v28.8h"); TEST_SINGLE(uaddw2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uaddw2 v30.2d, v29.2d, v28.4s"); - //TEST_SINGLE(usubl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "usubl v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(usubl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "usubl v30.8b, v29.8b, v28.8b"); TEST_SINGLE(usubl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "usubl v30.8h, v29.8b, v28.8b"); TEST_SINGLE(usubl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "usubl v30.4s, v29.4h, v28.4h"); TEST_SINGLE(usubl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "usubl v30.2d, v29.2s, v28.2s"); - //TEST_SINGLE(usubl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "usubl2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(usubl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "usubl2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(usubl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "usubl2 v30.8h, v29.16b, v28.16b"); TEST_SINGLE(usubl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "usubl2 v30.4s, v29.8h, v28.8h"); TEST_SINGLE(usubl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "usubl2 v30.2d, v29.4s, v28.4s"); - //TEST_SINGLE(usubw(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "usubw v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(usubw(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "usubw v30.8b, v29.8b, v28.8b"); TEST_SINGLE(usubw(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "usubw v30.8h, v29.8h, v28.8b"); TEST_SINGLE(usubw(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "usubw v30.4s, v29.4s, v28.4h"); TEST_SINGLE(usubw(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "usubw v30.2d, v29.2d, v28.2s"); - //TEST_SINGLE(usubw2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "usubw2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(usubw2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "usubw2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(usubw2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "usubw2 v30.8h, v29.8h, v28.16b"); TEST_SINGLE(usubw2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "usubw2 v30.4s, v29.4s, v28.8h"); TEST_SINGLE(usubw2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "usubw2 v30.2d, v29.2d, v28.4s"); //// XXX: RADDHN/2 - //TEST_SINGLE(uabal(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uabal v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(uabal(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uabal v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uabal(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uabal v30.8h, v29.8b, v28.8b"); TEST_SINGLE(uabal(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uabal v30.4s, v29.4h, v28.4h"); TEST_SINGLE(uabal(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uabal v30.2d, v29.2s, v28.2s"); - //TEST_SINGLE(uabal2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uabal2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(uabal2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uabal2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uabal2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uabal2 v30.8h, v29.16b, v28.16b"); TEST_SINGLE(uabal2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uabal2 v30.4s, v29.8h, v28.8h"); TEST_SINGLE(uabal2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uabal2 v30.2d, v29.4s, v28.4s"); //// XXX: RSUBHN/2 - //TEST_SINGLE(uabdl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uabdl v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(uabdl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uabdl v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uabdl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uabdl v30.8h, v29.8b, v28.8b"); TEST_SINGLE(uabdl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uabdl v30.4s, v29.4h, v28.4h"); TEST_SINGLE(uabdl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uabdl v30.2d, v29.2s, v28.2s"); - //TEST_SINGLE(uabdl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uabdl2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(uabdl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uabdl2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uabdl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uabdl2 v30.8h, v29.16b, v28.16b"); TEST_SINGLE(uabdl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uabdl2 v30.4s, v29.8h, v28.8h"); TEST_SINGLE(uabdl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uabdl2 v30.2d, v29.4s, v28.4s"); - //TEST_SINGLE(umlal(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "umlal v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(umlal(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "umlal v30.8b, v29.8b, v28.8b"); TEST_SINGLE(umlal(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "umlal v30.8h, v29.8b, v28.8b"); TEST_SINGLE(umlal(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "umlal v30.4s, v29.4h, v28.4h"); TEST_SINGLE(umlal(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "umlal v30.2d, v29.2s, v28.2s"); - //TEST_SINGLE(umlal2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umlal2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(umlal2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umlal2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(umlal2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "umlal2 v30.8h, v29.16b, v28.16b"); TEST_SINGLE(umlal2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "umlal2 v30.4s, v29.8h, v28.8h"); TEST_SINGLE(umlal2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "umlal2 v30.2d, v29.4s, v28.4s"); - //TEST_SINGLE(umlsl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "umlsl v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(umlsl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "umlsl v30.8b, v29.8b, v28.8b"); TEST_SINGLE(umlsl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "umlsl v30.8h, v29.8b, v28.8b"); TEST_SINGLE(umlsl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "umlsl v30.4s, v29.4h, v28.4h"); TEST_SINGLE(umlsl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "umlsl v30.2d, v29.2s, v28.2s"); - //TEST_SINGLE(umlsl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umlsl2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(umlsl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umlsl2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(umlsl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "umlsl2 v30.8h, v29.16b, v28.16b"); TEST_SINGLE(umlsl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "umlsl2 v30.4s, v29.8h, v28.8h"); TEST_SINGLE(umlsl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "umlsl2 v30.2d, v29.4s, v28.4s"); - //TEST_SINGLE(umull(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "umull v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(umull(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "umull v30.8b, v29.8b, v28.8b"); TEST_SINGLE(umull(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "umull v30.8h, v29.8b, v28.8b"); TEST_SINGLE(umull(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "umull v30.4s, v29.4h, v28.4h"); TEST_SINGLE(umull(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "umull v30.2d, v29.2s, v28.2s"); - //TEST_SINGLE(umull2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umull2 v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(umull2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umull2 v30.8b, v29.8b, v28.8b"); TEST_SINGLE(umull2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "umull2 v30.8h, v29.16b, v28.16b"); TEST_SINGLE(umull2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "umull2 v30.4s, v29.8h, v28.8h"); TEST_SINGLE(umull2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "umull2 v30.2d, v29.4s, v28.4s"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { - TEST_SINGLE(shadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "shadd v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(shadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "shadd v30.16b, v29.16b, v28.16b"); TEST_SINGLE(shadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "shadd v30.8h, v29.8h, v28.8h"); TEST_SINGLE(shadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "shadd v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(shadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "shadd v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(shadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "shadd v30.2d, v29.2d, v28.2d"); TEST_SINGLE(shadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "shadd v30.8b, v29.8b, v28.8b"); TEST_SINGLE(shadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "shadd v30.4h, v29.4h, v28.4h"); TEST_SINGLE(shadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "shadd v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(shadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "shadd v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(shadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "shadd v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(sqadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqadd v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(sqadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqadd v30.16b, v29.16b, v28.16b"); TEST_SINGLE(sqadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqadd v30.8h, v29.8h, v28.8h"); TEST_SINGLE(sqadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqadd v30.4s, v29.4s, v28.4s"); TEST_SINGLE(sqadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqadd v30.2d, v29.2d, v28.2d"); @@ -1380,29 +1386,29 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(sqadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqadd v30.8b, v29.8b, v28.8b"); TEST_SINGLE(sqadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqadd v30.4h, v29.4h, v28.4h"); TEST_SINGLE(sqadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqadd v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(sqadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqadd v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(sqadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqadd v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(srhadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "srhadd v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(srhadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "srhadd v30.16b, v29.16b, v28.16b"); TEST_SINGLE(srhadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "srhadd v30.8h, v29.8h, v28.8h"); TEST_SINGLE(srhadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "srhadd v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(srhadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "srhadd v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(srhadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "srhadd v30.2d, v29.2d, v28.2d"); TEST_SINGLE(srhadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "srhadd v30.8b, v29.8b, v28.8b"); TEST_SINGLE(srhadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "srhadd v30.4h, v29.4h, v28.4h"); TEST_SINGLE(srhadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "srhadd v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(srhadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "srhadd v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(srhadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "srhadd v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(shsub(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "shsub v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(shsub(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "shsub v30.16b, v29.16b, v28.16b"); TEST_SINGLE(shsub(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "shsub v30.8h, v29.8h, v28.8h"); TEST_SINGLE(shsub(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "shsub v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(shsub(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "shsub v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(shsub(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "shsub v30.2d, v29.2d, v28.2d"); TEST_SINGLE(shsub(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "shsub v30.8b, v29.8b, v28.8b"); TEST_SINGLE(shsub(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "shsub v30.4h, v29.4h, v28.4h"); TEST_SINGLE(shsub(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "shsub v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(shsub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "shsub v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(shsub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "shsub v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(sqsub(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqsub v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(sqsub(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqsub v30.16b, v29.16b, v28.16b"); TEST_SINGLE(sqsub(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqsub v30.8h, v29.8h, v28.8h"); TEST_SINGLE(sqsub(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqsub v30.4s, v29.4s, v28.4s"); TEST_SINGLE(sqsub(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqsub v30.2d, v29.2d, v28.2d"); @@ -1410,9 +1416,9 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(sqsub(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqsub v30.8b, v29.8b, v28.8b"); TEST_SINGLE(sqsub(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqsub v30.4h, v29.4h, v28.4h"); TEST_SINGLE(sqsub(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqsub v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(sqsub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqsub v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(sqsub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqsub v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(cmgt(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmgt v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(cmgt(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmgt v30.16b, v29.16b, v28.16b"); TEST_SINGLE(cmgt(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "cmgt v30.8h, v29.8h, v28.8h"); TEST_SINGLE(cmgt(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "cmgt v30.4s, v29.4s, v28.4s"); TEST_SINGLE(cmgt(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "cmgt v30.2d, v29.2d, v28.2d"); @@ -1420,9 +1426,9 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(cmgt(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "cmgt v30.8b, v29.8b, v28.8b"); TEST_SINGLE(cmgt(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "cmgt v30.4h, v29.4h, v28.4h"); TEST_SINGLE(cmgt(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "cmgt v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(cmgt(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmgt v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(cmgt(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmgt v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(cmge(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmge v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(cmge(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmge v30.16b, v29.16b, v28.16b"); TEST_SINGLE(cmge(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "cmge v30.8h, v29.8h, v28.8h"); TEST_SINGLE(cmge(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "cmge v30.4s, v29.4s, v28.4s"); TEST_SINGLE(cmge(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "cmge v30.2d, v29.2d, v28.2d"); @@ -1430,9 +1436,9 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(cmge(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "cmge v30.8b, v29.8b, v28.8b"); TEST_SINGLE(cmge(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "cmge v30.4h, v29.4h, v28.4h"); TEST_SINGLE(cmge(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "cmge v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(cmge(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmge v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(cmge(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmge v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(sshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sshl v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(sshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sshl v30.16b, v29.16b, v28.16b"); TEST_SINGLE(sshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sshl v30.8h, v29.8h, v28.8h"); TEST_SINGLE(sshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sshl v30.4s, v29.4s, v28.4s"); TEST_SINGLE(sshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sshl v30.2d, v29.2d, v28.2d"); @@ -1440,9 +1446,9 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(sshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sshl v30.8b, v29.8b, v28.8b"); TEST_SINGLE(sshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sshl v30.4h, v29.4h, v28.4h"); TEST_SINGLE(sshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sshl v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(sshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sshl v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(sshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sshl v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(sqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqshl v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(sqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqshl v30.16b, v29.16b, v28.16b"); TEST_SINGLE(sqshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqshl v30.8h, v29.8h, v28.8h"); TEST_SINGLE(sqshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqshl v30.4s, v29.4s, v28.4s"); TEST_SINGLE(sqshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqshl v30.2d, v29.2d, v28.2d"); @@ -1450,9 +1456,9 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(sqshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqshl v30.8b, v29.8b, v28.8b"); TEST_SINGLE(sqshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqshl v30.4h, v29.4h, v28.4h"); TEST_SINGLE(sqshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqshl v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(sqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqshl v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(sqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqshl v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(srshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "srshl v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(srshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "srshl v30.16b, v29.16b, v28.16b"); TEST_SINGLE(srshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "srshl v30.8h, v29.8h, v28.8h"); TEST_SINGLE(srshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "srshl v30.4s, v29.4s, v28.4s"); TEST_SINGLE(srshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "srshl v30.2d, v29.2d, v28.2d"); @@ -1460,9 +1466,9 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(srshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "srshl v30.8b, v29.8b, v28.8b"); TEST_SINGLE(srshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "srshl v30.4h, v29.4h, v28.4h"); TEST_SINGLE(srshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "srshl v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(srshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "srshl v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(srshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "srshl v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(sqrshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqrshl v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(sqrshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqrshl v30.16b, v29.16b, v28.16b"); TEST_SINGLE(sqrshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqrshl v30.8h, v29.8h, v28.8h"); TEST_SINGLE(sqrshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqrshl v30.4s, v29.4s, v28.4s"); TEST_SINGLE(sqrshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqrshl v30.2d, v29.2d, v28.2d"); @@ -1470,49 +1476,49 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(sqrshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqrshl v30.8b, v29.8b, v28.8b"); TEST_SINGLE(sqrshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqrshl v30.4h, v29.4h, v28.4h"); TEST_SINGLE(sqrshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqrshl v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(sqrshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqrshl v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(sqrshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqrshl v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(smax(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smax v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(smax(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smax v30.16b, v29.16b, v28.16b"); TEST_SINGLE(smax(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "smax v30.8h, v29.8h, v28.8h"); TEST_SINGLE(smax(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "smax v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(smax(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "smax v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(smax(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "smax v30.2d, v29.2d, v28.2d"); TEST_SINGLE(smax(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "smax v30.8b, v29.8b, v28.8b"); TEST_SINGLE(smax(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "smax v30.4h, v29.4h, v28.4h"); TEST_SINGLE(smax(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "smax v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(smax(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "smax v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(smax(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "smax v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(smin(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smin v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(smin(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smin v30.16b, v29.16b, v28.16b"); TEST_SINGLE(smin(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "smin v30.8h, v29.8h, v28.8h"); TEST_SINGLE(smin(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "smin v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(smin(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "smin v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(smin(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "smin v30.2d, v29.2d, v28.2d"); TEST_SINGLE(smin(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "smin v30.8b, v29.8b, v28.8b"); TEST_SINGLE(smin(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "smin v30.4h, v29.4h, v28.4h"); TEST_SINGLE(smin(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "smin v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(smin(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "smin v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(smin(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "smin v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(sabd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sabd v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(sabd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sabd v30.16b, v29.16b, v28.16b"); TEST_SINGLE(sabd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sabd v30.8h, v29.8h, v28.8h"); TEST_SINGLE(sabd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sabd v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(sabd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sabd v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(sabd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sabd v30.2d, v29.2d, v28.2d"); TEST_SINGLE(sabd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sabd v30.8b, v29.8b, v28.8b"); TEST_SINGLE(sabd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sabd v30.4h, v29.4h, v28.4h"); TEST_SINGLE(sabd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sabd v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(sabd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sabd v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(sabd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sabd v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(saba(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "saba v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(saba(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "saba v30.16b, v29.16b, v28.16b"); TEST_SINGLE(saba(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "saba v30.8h, v29.8h, v28.8h"); TEST_SINGLE(saba(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "saba v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(saba(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "saba v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(saba(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "saba v30.2d, v29.2d, v28.2d"); TEST_SINGLE(saba(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "saba v30.8b, v29.8b, v28.8b"); TEST_SINGLE(saba(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "saba v30.4h, v29.4h, v28.4h"); TEST_SINGLE(saba(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "saba v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(saba(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "saba v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(saba(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "saba v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(add(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "add v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(add(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "add v30.16b, v29.16b, v28.16b"); TEST_SINGLE(add(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "add v30.8h, v29.8h, v28.8h"); TEST_SINGLE(add(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "add v30.4s, v29.4s, v28.4s"); TEST_SINGLE(add(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "add v30.2d, v29.2d, v28.2d"); @@ -1520,9 +1526,9 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(add(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "add v30.8b, v29.8b, v28.8b"); TEST_SINGLE(add(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "add v30.4h, v29.4h, v28.4h"); TEST_SINGLE(add(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "add v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(add(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "add v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(add(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "add v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(cmtst(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmtst v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(cmtst(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmtst v30.16b, v29.16b, v28.16b"); TEST_SINGLE(cmtst(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "cmtst v30.8h, v29.8h, v28.8h"); TEST_SINGLE(cmtst(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "cmtst v30.4s, v29.4s, v28.4s"); TEST_SINGLE(cmtst(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "cmtst v30.2d, v29.2d, v28.2d"); @@ -1530,59 +1536,59 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(cmtst(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "cmtst v30.8b, v29.8b, v28.8b"); TEST_SINGLE(cmtst(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "cmtst v30.4h, v29.4h, v28.4h"); TEST_SINGLE(cmtst(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "cmtst v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(cmtst(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmtst v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(cmtst(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmtst v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(mla(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "mla v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(mla(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "mla v30.16b, v29.16b, v28.16b"); TEST_SINGLE(mla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "mla v30.8h, v29.8h, v28.8h"); TEST_SINGLE(mla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "mla v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(mla(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "mla v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(mla(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "mla v30.2d, v29.2d, v28.2d"); TEST_SINGLE(mla(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "mla v30.8b, v29.8b, v28.8b"); TEST_SINGLE(mla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "mla v30.4h, v29.4h, v28.4h"); TEST_SINGLE(mla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "mla v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(mla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "mla v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(mla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "mla v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(mul(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "mul v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(mul(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "mul v30.16b, v29.16b, v28.16b"); TEST_SINGLE(mul(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "mul v30.8h, v29.8h, v28.8h"); TEST_SINGLE(mul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "mul v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(mul(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "mul v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(mul(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "mul v30.2d, v29.2d, v28.2d"); TEST_SINGLE(mul(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "mul v30.8b, v29.8b, v28.8b"); TEST_SINGLE(mul(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "mul v30.4h, v29.4h, v28.4h"); TEST_SINGLE(mul(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "mul v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(mul(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "mul v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(mul(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "mul v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(smaxp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smaxp v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(smaxp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smaxp v30.16b, v29.16b, v28.16b"); TEST_SINGLE(smaxp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "smaxp v30.8h, v29.8h, v28.8h"); TEST_SINGLE(smaxp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "smaxp v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(smaxp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "smaxp v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(smaxp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "smaxp v30.2d, v29.2d, v28.2d"); TEST_SINGLE(smaxp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "smaxp v30.8b, v29.8b, v28.8b"); TEST_SINGLE(smaxp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "smaxp v30.4h, v29.4h, v28.4h"); TEST_SINGLE(smaxp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "smaxp v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(smaxp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "smaxp v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(smaxp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "smaxp v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(sminp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sminp v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(sminp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sminp v30.16b, v29.16b, v28.16b"); TEST_SINGLE(sminp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sminp v30.8h, v29.8h, v28.8h"); TEST_SINGLE(sminp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sminp v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(sminp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sminp v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(sminp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sminp v30.2d, v29.2d, v28.2d"); TEST_SINGLE(sminp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sminp v30.8b, v29.8b, v28.8b"); TEST_SINGLE(sminp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sminp v30.4h, v29.4h, v28.4h"); TEST_SINGLE(sminp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sminp v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(sminp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sminp v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(sminp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sminp v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(sqdmulh(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmulh v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(sqdmulh(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmulh v30.16b, v29.16b, v28.16b"); TEST_SINGLE(sqdmulh(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmulh v30.8h, v29.8h, v28.8h"); TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmulh v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(sqdmulh(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmulh v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(sqdmulh(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmulh v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(sqdmulh(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmulh v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(sqdmulh(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmulh v30.8b, v29.8b, v28.8b"); TEST_SINGLE(sqdmulh(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmulh v30.4h, v29.4h, v28.4h"); TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmulh v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(sqdmulh(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmulh v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(sqdmulh(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmulh v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(addp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "addp v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(addp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "addp v30.16b, v29.16b, v28.16b"); TEST_SINGLE(addp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "addp v30.8h, v29.8h, v28.8h"); TEST_SINGLE(addp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "addp v30.4s, v29.4s, v28.4s"); TEST_SINGLE(addp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "addp v30.2d, v29.2d, v28.2d"); @@ -1590,77 +1596,77 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(addp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "addp v30.8b, v29.8b, v28.8b"); TEST_SINGLE(addp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "addp v30.4h, v29.4h, v28.4h"); TEST_SINGLE(addp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "addp v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(addp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "addp v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(addp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "addp v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(fmaxnm(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnm v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fmaxnm(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnm v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fmaxnm(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnm v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fmaxnm(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnm v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fmaxnm(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnm v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fmaxnm(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnm v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fmaxnm(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnm v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fmaxnm(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnm v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fmaxnm(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnm v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fmaxnm(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnm v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fmaxnm(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnm v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fmaxnm(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnm v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fmaxnm(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnm v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(fmla(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmla v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fmla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmla v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fmla(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmla v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fmla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmla v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fmla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmla v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fmla(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmla v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fmla(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmla v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fmla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmla v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fmla(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmla v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fmla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmla v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fmla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmla v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmla v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmla v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(fadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fadd v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fadd v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fadd v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fadd v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fadd v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fadd v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fadd v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fadd v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fadd v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fadd v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fadd v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fadd v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fadd v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(fmulx(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmulx v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fmulx(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmulx v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fmulx(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmulx v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fmulx(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmulx v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fmulx(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmulx v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fmulx(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmulx v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fmulx(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmulx v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fmulx(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmulx v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fmulx(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmulx v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fmulx(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmulx v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fmulx(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmulx v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fmulx(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmulx v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fmulx(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmulx v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(fcmeq(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fcmeq v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fcmeq(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fcmeq v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fcmeq(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fcmeq v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fcmeq(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fcmeq v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fcmeq(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fcmeq v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fcmeq(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fcmeq v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fcmeq(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fcmeq v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fcmeq(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fcmeq v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fcmeq(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fcmeq v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fcmeq(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fcmeq v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fcmeq(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fcmeq v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fcmeq(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fcmeq v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fcmeq(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fcmeq v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(fmax(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmax v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fmax(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmax v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fmax(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmax v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fmax(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmax v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fmax(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmax v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fmax(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmax v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fmax(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmax v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fmax(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmax v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fmax(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmax v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fmax(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmax v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fmax(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmax v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fmax(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmax v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fmax(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmax v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(frecps(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "frecps v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(frecps(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "frecps v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(frecps(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "frecps v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(frecps(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "frecps v30.8h, v29.8h, v28.8h"); TEST_SINGLE(frecps(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "frecps v30.4s, v29.4s, v28.4s"); TEST_SINGLE(frecps(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "frecps v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(frecps(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "frecps v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(frecps(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "frecps v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(frecps(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "frecps v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(frecps(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "frecps v30.4h, v29.4h, v28.4h"); TEST_SINGLE(frecps(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "frecps v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(frecps(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "frecps v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(frecps(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "frecps v30.1d, v29.1d, v28.1d"); TEST_SINGLE(and_(QReg::q30, QReg::q29, QReg::q28), "and v30.16b, v29.16b, v28.16b"); TEST_SINGLE(and_(DReg::d30, DReg::d29, DReg::d28), "and v30.8b, v29.8b, v28.8b"); @@ -1674,55 +1680,55 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(bic(QReg::q30, QReg::q29, QReg::q28), "bic v30.16b, v29.16b, v28.16b"); TEST_SINGLE(bic(DReg::d30, DReg::d29, DReg::d28), "bic v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fminnm(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fminnm v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fminnm(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fminnm v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fminnm(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fminnm v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fminnm(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fminnm v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fminnm(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fminnm v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fminnm(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fminnm v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fminnm(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fminnm v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fminnm(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fminnm v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fminnm(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fminnm v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fminnm(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fminnm v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fminnm(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fminnm v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fminnm(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fminnm v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fminnm(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fminnm v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(fmls(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmls v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fmls(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmls v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fmls(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmls v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fmls(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmls v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fmls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmls v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fmls(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmls v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fmls(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmls v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fmls(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmls v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fmls(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmls v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fmls(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmls v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fmls(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmls v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fmls(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmls v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fmls(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmls v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(fsub(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fsub v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fsub(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fsub v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fsub(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fsub v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fsub(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fsub v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fsub(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fsub v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fsub(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fsub v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fsub(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fsub v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fsub(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fsub v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fsub(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fsub v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fsub(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fsub v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fsub(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fsub v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fsub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fsub v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fsub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fsub v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(fmin(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmin v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fmin(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmin v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fmin(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmin v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fmin(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmin v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fmin(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmin v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fmin(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmin v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fmin(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmin v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fmin(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmin v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fmin(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmin v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fmin(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmin v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fmin(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmin v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fmin(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmin v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fmin(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmin v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(frsqrts(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "frsqrts v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(frsqrts(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "frsqrts v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(frsqrts(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "frsqrts v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(frsqrts(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "frsqrts v30.8h, v29.8h, v28.8h"); TEST_SINGLE(frsqrts(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "frsqrts v30.4s, v29.4s, v28.4s"); TEST_SINGLE(frsqrts(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "frsqrts v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(frsqrts(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "frsqrts v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(frsqrts(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "frsqrts v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(frsqrts(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "frsqrts v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(frsqrts(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "frsqrts v30.4h, v29.4h, v28.4h"); TEST_SINGLE(frsqrts(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "frsqrts v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(frsqrts(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "frsqrts v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(frsqrts(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "frsqrts v30.1d, v29.1d, v28.1d"); TEST_SINGLE(orr(QReg::q30, QReg::q29, QReg::q28), "orr v30.16b, v29.16b, v28.16b"); TEST_SINGLE(orr(DReg::d30, DReg::d29, DReg::d28), "orr v30.8b, v29.8b, v28.8b"); @@ -1739,17 +1745,17 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(orn(QReg::q30, QReg::q29, QReg::q28), "orn v30.16b, v29.16b, v28.16b"); TEST_SINGLE(orn(DReg::d30, DReg::d29, DReg::d28), "orn v30.8b, v29.8b, v28.8b"); - TEST_SINGLE(uhadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uhadd v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(uhadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uhadd v30.16b, v29.16b, v28.16b"); TEST_SINGLE(uhadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uhadd v30.8h, v29.8h, v28.8h"); TEST_SINGLE(uhadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uhadd v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(uhadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uhadd v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(uhadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uhadd v30.2d, v29.2d, v28.2d"); TEST_SINGLE(uhadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uhadd v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uhadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uhadd v30.4h, v29.4h, v28.4h"); TEST_SINGLE(uhadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uhadd v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(uhadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uhadd v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(uhadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uhadd v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(uqadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uqadd v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(uqadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uqadd v30.16b, v29.16b, v28.16b"); TEST_SINGLE(uqadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uqadd v30.8h, v29.8h, v28.8h"); TEST_SINGLE(uqadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uqadd v30.4s, v29.4s, v28.4s"); TEST_SINGLE(uqadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uqadd v30.2d, v29.2d, v28.2d"); @@ -1757,39 +1763,39 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(uqadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uqadd v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uqadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uqadd v30.4h, v29.4h, v28.4h"); TEST_SINGLE(uqadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uqadd v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(uqadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uqadd v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(uqadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uqadd v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(urhadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "urhadd v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(urhadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "urhadd v30.16b, v29.16b, v28.16b"); TEST_SINGLE(urhadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "urhadd v30.8h, v29.8h, v28.8h"); TEST_SINGLE(urhadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "urhadd v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(urhadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "urhadd v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(urhadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "urhadd v30.2d, v29.2d, v28.2d"); TEST_SINGLE(urhadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "urhadd v30.8b, v29.8b, v28.8b"); TEST_SINGLE(urhadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "urhadd v30.4h, v29.4h, v28.4h"); TEST_SINGLE(urhadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "urhadd v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(urhadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "urhadd v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(urhadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "urhadd v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(uhsub(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uhsub v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(uhsub(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uhsub v30.16b, v29.16b, v28.16b"); TEST_SINGLE(uhsub(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uhsub v30.8h, v29.8h, v28.8h"); TEST_SINGLE(uhsub(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uhsub v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(uhsub(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uhsub v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(uhsub(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uhsub v30.2d, v29.2d, v28.2d"); TEST_SINGLE(uhsub(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uhsub v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uhsub(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uhsub v30.4h, v29.4h, v28.4h"); TEST_SINGLE(uhsub(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uhsub v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(uhsub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uhsub v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(uhsub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uhsub v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(uqsub(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uqsub v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(uqsub(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uqsub v30.16b, v29.16b, v28.16b"); TEST_SINGLE(uqsub(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uqsub v30.8h, v29.8h, v28.8h"); TEST_SINGLE(uqsub(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uqsub v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(uqsub(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uqsub v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(uqsub(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uqsub v30.2d, v29.2d, v28.2d"); TEST_SINGLE(uqsub(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uqsub v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uqsub(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uqsub v30.4h, v29.4h, v28.4h"); TEST_SINGLE(uqsub(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uqsub v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(uqsub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uqsub v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(uqsub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uqsub v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(cmhi(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmhi v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(cmhi(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmhi v30.16b, v29.16b, v28.16b"); TEST_SINGLE(cmhi(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "cmhi v30.8h, v29.8h, v28.8h"); TEST_SINGLE(cmhi(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "cmhi v30.4s, v29.4s, v28.4s"); TEST_SINGLE(cmhi(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "cmhi v30.2d, v29.2d, v28.2d"); @@ -1797,9 +1803,9 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(cmhi(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "cmhi v30.8b, v29.8b, v28.8b"); TEST_SINGLE(cmhi(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "cmhi v30.4h, v29.4h, v28.4h"); TEST_SINGLE(cmhi(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "cmhi v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(cmhi(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmhi v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(cmhi(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmhi v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(cmhs(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmhs v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(cmhs(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmhs v30.16b, v29.16b, v28.16b"); TEST_SINGLE(cmhs(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "cmhs v30.8h, v29.8h, v28.8h"); TEST_SINGLE(cmhs(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "cmhs v30.4s, v29.4s, v28.4s"); TEST_SINGLE(cmhs(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "cmhs v30.2d, v29.2d, v28.2d"); @@ -1807,9 +1813,9 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(cmhs(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "cmhs v30.8b, v29.8b, v28.8b"); TEST_SINGLE(cmhs(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "cmhs v30.4h, v29.4h, v28.4h"); TEST_SINGLE(cmhs(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "cmhs v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(cmhs(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmhs v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(cmhs(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmhs v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(ushl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "ushl v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(ushl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "ushl v30.16b, v29.16b, v28.16b"); TEST_SINGLE(ushl(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "ushl v30.8h, v29.8h, v28.8h"); TEST_SINGLE(ushl(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "ushl v30.4s, v29.4s, v28.4s"); TEST_SINGLE(ushl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "ushl v30.2d, v29.2d, v28.2d"); @@ -1817,19 +1823,19 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(ushl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "ushl v30.8b, v29.8b, v28.8b"); TEST_SINGLE(ushl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "ushl v30.4h, v29.4h, v28.4h"); TEST_SINGLE(ushl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "ushl v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(ushl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "ushl v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(ushl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "ushl v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(uqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uqshl v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(uqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uqshl v30.16b, v29.16b, v28.16b"); TEST_SINGLE(uqshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uqshl v30.8h, v29.8h, v28.8h"); TEST_SINGLE(uqshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uqshl v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(uqshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uqshl v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(uqshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uqshl v30.2d, v29.2d, v28.2d"); TEST_SINGLE(uqshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uqshl v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uqshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uqshl v30.4h, v29.4h, v28.4h"); TEST_SINGLE(uqshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uqshl v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(uqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uqshl v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(uqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uqshl v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(urshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "urshl v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(urshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "urshl v30.16b, v29.16b, v28.16b"); TEST_SINGLE(urshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "urshl v30.8h, v29.8h, v28.8h"); TEST_SINGLE(urshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "urshl v30.4s, v29.4s, v28.4s"); TEST_SINGLE(urshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "urshl v30.2d, v29.2d, v28.2d"); @@ -1837,59 +1843,59 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(urshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "urshl v30.8b, v29.8b, v28.8b"); TEST_SINGLE(urshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "urshl v30.4h, v29.4h, v28.4h"); TEST_SINGLE(urshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "urshl v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(urshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "urshl v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(urshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "urshl v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(uqrshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uqrshl v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(uqrshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uqrshl v30.16b, v29.16b, v28.16b"); TEST_SINGLE(uqrshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uqrshl v30.8h, v29.8h, v28.8h"); TEST_SINGLE(uqrshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uqrshl v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(uqrshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uqrshl v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(uqrshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uqrshl v30.2d, v29.2d, v28.2d"); TEST_SINGLE(uqrshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uqrshl v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uqrshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uqrshl v30.4h, v29.4h, v28.4h"); TEST_SINGLE(uqrshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uqrshl v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(uqrshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uqrshl v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(uqrshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uqrshl v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(umax(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umax v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(umax(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umax v30.16b, v29.16b, v28.16b"); TEST_SINGLE(umax(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "umax v30.8h, v29.8h, v28.8h"); TEST_SINGLE(umax(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "umax v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(umax(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "umax v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(umax(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "umax v30.2d, v29.2d, v28.2d"); TEST_SINGLE(umax(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "umax v30.8b, v29.8b, v28.8b"); TEST_SINGLE(umax(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "umax v30.4h, v29.4h, v28.4h"); TEST_SINGLE(umax(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "umax v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(umax(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "umax v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(umax(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "umax v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(umin(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umin v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(umin(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umin v30.16b, v29.16b, v28.16b"); TEST_SINGLE(umin(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "umin v30.8h, v29.8h, v28.8h"); TEST_SINGLE(umin(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "umin v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(umin(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "umin v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(umin(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "umin v30.2d, v29.2d, v28.2d"); TEST_SINGLE(umin(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "umin v30.8b, v29.8b, v28.8b"); TEST_SINGLE(umin(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "umin v30.4h, v29.4h, v28.4h"); TEST_SINGLE(umin(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "umin v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(umin(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "umin v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(umin(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "umin v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(uabd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uabd v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(uabd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uabd v30.16b, v29.16b, v28.16b"); TEST_SINGLE(uabd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uabd v30.8h, v29.8h, v28.8h"); TEST_SINGLE(uabd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uabd v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(uabd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uabd v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(uabd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uabd v30.2d, v29.2d, v28.2d"); TEST_SINGLE(uabd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uabd v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uabd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uabd v30.4h, v29.4h, v28.4h"); TEST_SINGLE(uabd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uabd v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(uabd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uabd v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(uabd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uabd v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(uaba(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uaba v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(uaba(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uaba v30.16b, v29.16b, v28.16b"); TEST_SINGLE(uaba(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uaba v30.8h, v29.8h, v28.8h"); TEST_SINGLE(uaba(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uaba v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(uaba(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uaba v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(uaba(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uaba v30.2d, v29.2d, v28.2d"); TEST_SINGLE(uaba(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uaba v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uaba(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uaba v30.4h, v29.4h, v28.4h"); TEST_SINGLE(uaba(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uaba v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(uaba(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uaba v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(uaba(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uaba v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(sub(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sub v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(sub(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sub v30.16b, v29.16b, v28.16b"); TEST_SINGLE(sub(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sub v30.8h, v29.8h, v28.8h"); TEST_SINGLE(sub(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sub v30.4s, v29.4s, v28.4s"); TEST_SINGLE(sub(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sub v30.2d, v29.2d, v28.2d"); @@ -1897,9 +1903,9 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(sub(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sub v30.8b, v29.8b, v28.8b"); TEST_SINGLE(sub(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sub v30.4h, v29.4h, v28.4h"); TEST_SINGLE(sub(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sub v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(sub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sub v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(sub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sub v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(cmeq(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmeq v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(cmeq(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmeq v30.16b, v29.16b, v28.16b"); TEST_SINGLE(cmeq(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "cmeq v30.8h, v29.8h, v28.8h"); TEST_SINGLE(cmeq(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "cmeq v30.4s, v29.4s, v28.4s"); TEST_SINGLE(cmeq(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "cmeq v30.2d, v29.2d, v28.2d"); @@ -1907,120 +1913,120 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(cmeq(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "cmeq v30.8b, v29.8b, v28.8b"); TEST_SINGLE(cmeq(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "cmeq v30.4h, v29.4h, v28.4h"); TEST_SINGLE(cmeq(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "cmeq v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(cmeq(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmeq v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(cmeq(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmeq v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(mls(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "mls v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(mls(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "mls v30.16b, v29.16b, v28.16b"); TEST_SINGLE(mls(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "mls v30.8h, v29.8h, v28.8h"); TEST_SINGLE(mls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "mls v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(mls(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "mls v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(mls(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "mls v30.2d, v29.2d, v28.2d"); TEST_SINGLE(mls(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "mls v30.8b, v29.8b, v28.8b"); TEST_SINGLE(mls(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "mls v30.4h, v29.4h, v28.4h"); TEST_SINGLE(mls(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "mls v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(mls(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "mls v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(mls(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "mls v30.1d, v29.1d, v28.1d"); TEST_SINGLE(pmul(QReg::q30, QReg::q29, QReg::q28), "pmul v30.16b, v29.16b, v28.16b"); TEST_SINGLE(pmul(DReg::d30, DReg::d29, DReg::d28), "pmul v30.8b, v29.8b, v28.8b"); - TEST_SINGLE(umaxp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umaxp v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(umaxp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umaxp v30.16b, v29.16b, v28.16b"); TEST_SINGLE(umaxp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "umaxp v30.8h, v29.8h, v28.8h"); TEST_SINGLE(umaxp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "umaxp v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(umaxp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "umaxp v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(umaxp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "umaxp v30.2d, v29.2d, v28.2d"); TEST_SINGLE(umaxp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "umaxp v30.8b, v29.8b, v28.8b"); TEST_SINGLE(umaxp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "umaxp v30.4h, v29.4h, v28.4h"); TEST_SINGLE(umaxp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "umaxp v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(umaxp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "umaxp v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(umaxp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "umaxp v30.1d, v29.1d, v28.1d"); - TEST_SINGLE(uminp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uminp v30.16b, v29.16b, v28.16b"); + TEST_SINGLE(uminp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uminp v30.16b, v29.16b, v28.16b"); TEST_SINGLE(uminp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uminp v30.8h, v29.8h, v28.8h"); TEST_SINGLE(uminp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uminp v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(uminp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uminp v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(uminp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uminp v30.2d, v29.2d, v28.2d"); TEST_SINGLE(uminp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uminp v30.8b, v29.8b, v28.8b"); TEST_SINGLE(uminp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uminp v30.4h, v29.4h, v28.4h"); TEST_SINGLE(uminp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uminp v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(uminp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uminp v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(uminp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uminp v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(sqrdmulh(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqrdmulh v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(sqrdmulh(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqrdmulh v30.16b, v29.16b, v28.16b"); TEST_SINGLE(sqrdmulh(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqrdmulh v30.8h, v29.8h, v28.8h"); TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqrdmulh v30.4s, v29.4s, v28.4s"); - //TEST_SINGLE(sqrdmulh(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqrdmulh v30.2d, v29.2d, v28.2d"); + // TEST_SINGLE(sqrdmulh(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqrdmulh v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(sqrdmulh(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmulh v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(sqrdmulh(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmulh v30.8b, v29.8b, v28.8b"); TEST_SINGLE(sqrdmulh(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmulh v30.4h, v29.4h, v28.4h"); TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmulh v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(sqrdmulh(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmulh v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(sqrdmulh(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmulh v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(fmaxnmp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnmp v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fmaxnmp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnmp v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fmaxnmp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnmp v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fmaxnmp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnmp v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fmaxnmp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnmp v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fmaxnmp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnmp v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fmaxnmp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnmp v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fmaxnmp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnmp v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fmaxnmp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnmp v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fmaxnmp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnmp v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fmaxnmp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnmp v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fmaxnmp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnmp v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fmaxnmp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnmp v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(faddp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "faddp v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(faddp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "faddp v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(faddp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "faddp v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(faddp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "faddp v30.8h, v29.8h, v28.8h"); TEST_SINGLE(faddp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "faddp v30.4s, v29.4s, v28.4s"); TEST_SINGLE(faddp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "faddp v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(faddp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "faddp v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(faddp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "faddp v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(faddp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "faddp v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(faddp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "faddp v30.4h, v29.4h, v28.4h"); TEST_SINGLE(faddp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "faddp v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(faddp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "faddp v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(faddp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "faddp v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(fmul(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmul v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fmul(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmul v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fmul(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmul v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fmul(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmul v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fmul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmul v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fmul(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmul v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fmul(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmul v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fmul(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmul v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fmul(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmul v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fmul(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmul v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fmul(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmul v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fmul(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmul v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fmul(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmul v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(fcmge(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fcmge v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fcmge(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fcmge v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fcmge(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fcmge v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fcmge(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fcmge v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fcmge(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fcmge v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fcmge(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fcmge v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fcmge(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fcmge v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fcmge(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fcmge v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fcmge(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fcmge v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fcmge(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fcmge v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fcmge(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fcmge v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fcmge(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fcmge v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fcmge(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fcmge v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(facge(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "facge v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(facge(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "facge v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(facge(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "facge v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(facge(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "facge v30.8h, v29.8h, v28.8h"); TEST_SINGLE(facge(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "facge v30.4s, v29.4s, v28.4s"); TEST_SINGLE(facge(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "facge v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(facge(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "facge v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(facge(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "facge v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(facge(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "facge v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(facge(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "facge v30.4h, v29.4h, v28.4h"); TEST_SINGLE(facge(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "facge v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(facge(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "facge v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(facge(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "facge v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(fmaxp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxp v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fmaxp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxp v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fmaxp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxp v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fmaxp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxp v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fmaxp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxp v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fmaxp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxp v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fmaxp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxp v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fmaxp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxp v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fmaxp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxp v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fmaxp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxp v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fmaxp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxp v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fmaxp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxp v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fmaxp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxp v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(fdiv(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fdiv v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fdiv(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fdiv v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fdiv(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fdiv v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fdiv(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fdiv v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fdiv(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fdiv v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fdiv(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fdiv v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fdiv(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fdiv v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fdiv(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fdiv v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fdiv(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fdiv v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fdiv(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fdiv v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fdiv(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fdiv v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fdiv(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fdiv v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fdiv(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fdiv v30.1d, v29.1d, v28.1d"); TEST_SINGLE(eor(QReg::q30, QReg::q29, QReg::q28), "eor v30.16b, v29.16b, v28.16b"); TEST_SINGLE(eor(DReg::d30, DReg::d29, DReg::d28), "eor v30.8b, v29.8b, v28.8b"); @@ -2028,55 +2034,55 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") { TEST_SINGLE(bsl(QReg::q30, QReg::q29, QReg::q28), "bsl v30.16b, v29.16b, v28.16b"); TEST_SINGLE(bsl(DReg::d30, DReg::d29, DReg::d28), "bsl v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fminnmp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fminnmp v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fminnmp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fminnmp v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fminnmp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fminnmp v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fminnmp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fminnmp v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fminnmp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fminnmp v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fminnmp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fminnmp v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fminnmp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fminnmp v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fminnmp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fminnmp v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fminnmp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fminnmp v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fminnmp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fminnmp v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fminnmp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fminnmp v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fminnmp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fminnmp v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fminnmp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fminnmp v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(fabd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fabd v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fabd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fabd v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fabd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fabd v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fabd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fabd v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fabd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fabd v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fabd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fabd v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fabd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fabd v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fabd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fabd v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fabd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fabd v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fabd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fabd v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fabd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fabd v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fabd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fabd v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fabd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fabd v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(fcmgt(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fcmgt v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fcmgt(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fcmgt v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fcmgt(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fcmgt v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fcmgt(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fcmgt v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fcmgt(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fcmgt v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fcmgt(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fcmgt v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fcmgt(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fcmgt v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fcmgt(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fcmgt v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fcmgt(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fcmgt v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fcmgt(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fcmgt v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fcmgt(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fcmgt v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fcmgt(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fcmgt v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fcmgt(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fcmgt v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(facgt(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "facgt v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(facgt(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "facgt v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(facgt(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "facgt v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(facgt(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "facgt v30.8h, v29.8h, v28.8h"); TEST_SINGLE(facgt(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "facgt v30.4s, v29.4s, v28.4s"); TEST_SINGLE(facgt(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "facgt v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(facgt(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "facgt v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(facgt(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "facgt v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(facgt(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "facgt v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(facgt(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "facgt v30.4h, v29.4h, v28.4h"); TEST_SINGLE(facgt(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "facgt v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(facgt(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "facgt v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(facgt(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "facgt v30.1d, v29.1d, v28.1d"); - //TEST_SINGLE(fminp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fminp v30.16b, v29.16b, v28.16b"); - //TEST_SINGLE(fminp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fminp v30.8h, v29.8h, v28.8h"); + // TEST_SINGLE(fminp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "fminp v30.16b, v29.16b, v28.16b"); + // TEST_SINGLE(fminp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fminp v30.8h, v29.8h, v28.8h"); TEST_SINGLE(fminp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fminp v30.4s, v29.4s, v28.4s"); TEST_SINGLE(fminp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fminp v30.2d, v29.2d, v28.2d"); - //TEST_SINGLE(fminp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fminp v30.8b, v29.8b, v28.8b"); - //TEST_SINGLE(fminp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fminp v30.4h, v29.4h, v28.4h"); + // TEST_SINGLE(fminp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fminp v30.8b, v29.8b, v28.8b"); + // TEST_SINGLE(fminp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fminp v30.4h, v29.4h, v28.4h"); TEST_SINGLE(fminp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fminp v30.2s, v29.2s, v28.2s"); - //TEST_SINGLE(fminp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fminp v30.1d, v29.1d, v28.1d"); + // TEST_SINGLE(fminp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fminp v30.1d, v29.1d, v28.1d"); TEST_SINGLE(bit(QReg::q30, QReg::q29, QReg::q28), "bit v30.16b, v29.16b, v28.16b"); TEST_SINGLE(bit(DReg::d30, DReg::d29, DReg::d28), "bit v30.8b, v29.8b, v28.8b"); @@ -2094,7 +2100,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD modified immed TEST_SINGLE(fmov(SubRegSize::i16Bit, DReg::d30, 1.0), "fmov v30.4h, #0x70 (1.0000)"); TEST_SINGLE(fmov(SubRegSize::i32Bit, DReg::d30, 1.0), "fmov v30.2s, #0x70 (1.0000)"); - //TEST_SINGLE(fmov(SubRegSize::i64Bit, DReg::d30, 1.0), "fmov v30.1d, #0x70 (1.0000)"); + // TEST_SINGLE(fmov(SubRegSize::i64Bit, DReg::d30, 1.0), "fmov v30.1d, #0x70 (1.0000)"); // XXX: MVNI - Shifted immediate // XXX: BIC @@ -2118,539 +2124,539 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD modified immed } TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD shift by immediate") { - TEST_SINGLE(sshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sshr v30.16b, v29.16b, #1"); - TEST_SINGLE(sshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sshr v30.16b, v29.16b, #7"); - TEST_SINGLE(sshr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sshr v30.8h, v29.8h, #1"); + TEST_SINGLE(sshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sshr v30.16b, v29.16b, #1"); + TEST_SINGLE(sshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sshr v30.16b, v29.16b, #7"); + TEST_SINGLE(sshr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sshr v30.8h, v29.8h, #1"); TEST_SINGLE(sshr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sshr v30.8h, v29.8h, #15"); - TEST_SINGLE(sshr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sshr v30.4s, v29.4s, #1"); + TEST_SINGLE(sshr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sshr v30.4s, v29.4s, #1"); TEST_SINGLE(sshr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sshr v30.4s, v29.4s, #31"); - TEST_SINGLE(sshr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sshr v30.2d, v29.2d, #1"); + TEST_SINGLE(sshr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sshr v30.2d, v29.2d, #1"); TEST_SINGLE(sshr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sshr v30.2d, v29.2d, #63"); - TEST_SINGLE(sshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sshr v30.8b, v29.8b, #1"); - TEST_SINGLE(sshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sshr v30.8b, v29.8b, #7"); - TEST_SINGLE(sshr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sshr v30.4h, v29.4h, #1"); + TEST_SINGLE(sshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sshr v30.8b, v29.8b, #1"); + TEST_SINGLE(sshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sshr v30.8b, v29.8b, #7"); + TEST_SINGLE(sshr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sshr v30.4h, v29.4h, #1"); TEST_SINGLE(sshr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sshr v30.4h, v29.4h, #15"); - TEST_SINGLE(sshr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sshr v30.2s, v29.2s, #1"); + TEST_SINGLE(sshr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sshr v30.2s, v29.2s, #1"); TEST_SINGLE(sshr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sshr v30.2s, v29.2s, #31"); - //TEST_SINGLE(sshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sshr v30.1d, v29.1d, #1"); - //TEST_SINGLE(sshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sshr v30.1d, v29.1d, #63"); + // TEST_SINGLE(sshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sshr v30.1d, v29.1d, #1"); + // TEST_SINGLE(sshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sshr v30.1d, v29.1d, #63"); - TEST_SINGLE(ssra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "ssra v30.16b, v29.16b, #1"); - TEST_SINGLE(ssra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "ssra v30.16b, v29.16b, #7"); - TEST_SINGLE(ssra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "ssra v30.8h, v29.8h, #1"); + TEST_SINGLE(ssra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "ssra v30.16b, v29.16b, #1"); + TEST_SINGLE(ssra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "ssra v30.16b, v29.16b, #7"); + TEST_SINGLE(ssra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "ssra v30.8h, v29.8h, #1"); TEST_SINGLE(ssra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "ssra v30.8h, v29.8h, #15"); - TEST_SINGLE(ssra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "ssra v30.4s, v29.4s, #1"); + TEST_SINGLE(ssra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "ssra v30.4s, v29.4s, #1"); TEST_SINGLE(ssra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "ssra v30.4s, v29.4s, #31"); - TEST_SINGLE(ssra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "ssra v30.2d, v29.2d, #1"); + TEST_SINGLE(ssra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "ssra v30.2d, v29.2d, #1"); TEST_SINGLE(ssra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "ssra v30.2d, v29.2d, #63"); - TEST_SINGLE(ssra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "ssra v30.8b, v29.8b, #1"); - TEST_SINGLE(ssra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "ssra v30.8b, v29.8b, #7"); - TEST_SINGLE(ssra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "ssra v30.4h, v29.4h, #1"); + TEST_SINGLE(ssra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "ssra v30.8b, v29.8b, #1"); + TEST_SINGLE(ssra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "ssra v30.8b, v29.8b, #7"); + TEST_SINGLE(ssra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "ssra v30.4h, v29.4h, #1"); TEST_SINGLE(ssra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "ssra v30.4h, v29.4h, #15"); - TEST_SINGLE(ssra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "ssra v30.2s, v29.2s, #1"); + TEST_SINGLE(ssra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "ssra v30.2s, v29.2s, #1"); TEST_SINGLE(ssra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "ssra v30.2s, v29.2s, #31"); - //TEST_SINGLE(ssra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "ssra v30.1d, v29.1d, #1"); - //TEST_SINGLE(ssra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "ssra v30.1d, v29.1d, #63"); + // TEST_SINGLE(ssra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "ssra v30.1d, v29.1d, #1"); + // TEST_SINGLE(ssra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "ssra v30.1d, v29.1d, #63"); - TEST_SINGLE(srshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "srshr v30.16b, v29.16b, #1"); - TEST_SINGLE(srshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "srshr v30.16b, v29.16b, #7"); - TEST_SINGLE(srshr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "srshr v30.8h, v29.8h, #1"); + TEST_SINGLE(srshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "srshr v30.16b, v29.16b, #1"); + TEST_SINGLE(srshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "srshr v30.16b, v29.16b, #7"); + TEST_SINGLE(srshr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "srshr v30.8h, v29.8h, #1"); TEST_SINGLE(srshr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "srshr v30.8h, v29.8h, #15"); - TEST_SINGLE(srshr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "srshr v30.4s, v29.4s, #1"); + TEST_SINGLE(srshr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "srshr v30.4s, v29.4s, #1"); TEST_SINGLE(srshr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "srshr v30.4s, v29.4s, #31"); - TEST_SINGLE(srshr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "srshr v30.2d, v29.2d, #1"); + TEST_SINGLE(srshr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "srshr v30.2d, v29.2d, #1"); TEST_SINGLE(srshr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "srshr v30.2d, v29.2d, #63"); - TEST_SINGLE(srshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "srshr v30.8b, v29.8b, #1"); - TEST_SINGLE(srshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "srshr v30.8b, v29.8b, #7"); - TEST_SINGLE(srshr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "srshr v30.4h, v29.4h, #1"); + TEST_SINGLE(srshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "srshr v30.8b, v29.8b, #1"); + TEST_SINGLE(srshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "srshr v30.8b, v29.8b, #7"); + TEST_SINGLE(srshr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "srshr v30.4h, v29.4h, #1"); TEST_SINGLE(srshr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "srshr v30.4h, v29.4h, #15"); - TEST_SINGLE(srshr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "srshr v30.2s, v29.2s, #1"); + TEST_SINGLE(srshr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "srshr v30.2s, v29.2s, #1"); TEST_SINGLE(srshr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "srshr v30.2s, v29.2s, #31"); - //TEST_SINGLE(srshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "srshr v30.1d, v29.1d, #1"); - //TEST_SINGLE(srshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "srshr v30.1d, v29.1d, #63"); + // TEST_SINGLE(srshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "srshr v30.1d, v29.1d, #1"); + // TEST_SINGLE(srshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "srshr v30.1d, v29.1d, #63"); - TEST_SINGLE(srsra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "srsra v30.16b, v29.16b, #1"); - TEST_SINGLE(srsra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "srsra v30.16b, v29.16b, #7"); - TEST_SINGLE(srsra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "srsra v30.8h, v29.8h, #1"); + TEST_SINGLE(srsra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "srsra v30.16b, v29.16b, #1"); + TEST_SINGLE(srsra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "srsra v30.16b, v29.16b, #7"); + TEST_SINGLE(srsra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "srsra v30.8h, v29.8h, #1"); TEST_SINGLE(srsra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "srsra v30.8h, v29.8h, #15"); - TEST_SINGLE(srsra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "srsra v30.4s, v29.4s, #1"); + TEST_SINGLE(srsra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "srsra v30.4s, v29.4s, #1"); TEST_SINGLE(srsra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "srsra v30.4s, v29.4s, #31"); - TEST_SINGLE(srsra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "srsra v30.2d, v29.2d, #1"); + TEST_SINGLE(srsra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "srsra v30.2d, v29.2d, #1"); TEST_SINGLE(srsra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "srsra v30.2d, v29.2d, #63"); - TEST_SINGLE(srsra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "srsra v30.8b, v29.8b, #1"); - TEST_SINGLE(srsra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "srsra v30.8b, v29.8b, #7"); - TEST_SINGLE(srsra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "srsra v30.4h, v29.4h, #1"); + TEST_SINGLE(srsra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "srsra v30.8b, v29.8b, #1"); + TEST_SINGLE(srsra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "srsra v30.8b, v29.8b, #7"); + TEST_SINGLE(srsra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "srsra v30.4h, v29.4h, #1"); TEST_SINGLE(srsra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "srsra v30.4h, v29.4h, #15"); - TEST_SINGLE(srsra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "srsra v30.2s, v29.2s, #1"); + TEST_SINGLE(srsra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "srsra v30.2s, v29.2s, #1"); TEST_SINGLE(srsra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "srsra v30.2s, v29.2s, #31"); - //TEST_SINGLE(srsra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "srsra v30.1d, v29.1d, #1"); - //TEST_SINGLE(srsra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "srsra v30.1d, v29.1d, #63"); + // TEST_SINGLE(srsra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "srsra v30.1d, v29.1d, #1"); + // TEST_SINGLE(srsra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "srsra v30.1d, v29.1d, #63"); - TEST_SINGLE(shl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "shl v30.16b, v29.16b, #1"); - TEST_SINGLE(shl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "shl v30.16b, v29.16b, #7"); - TEST_SINGLE(shl(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "shl v30.8h, v29.8h, #1"); + TEST_SINGLE(shl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "shl v30.16b, v29.16b, #1"); + TEST_SINGLE(shl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "shl v30.16b, v29.16b, #7"); + TEST_SINGLE(shl(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "shl v30.8h, v29.8h, #1"); TEST_SINGLE(shl(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "shl v30.8h, v29.8h, #15"); - TEST_SINGLE(shl(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "shl v30.4s, v29.4s, #1"); + TEST_SINGLE(shl(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "shl v30.4s, v29.4s, #1"); TEST_SINGLE(shl(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "shl v30.4s, v29.4s, #31"); - TEST_SINGLE(shl(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "shl v30.2d, v29.2d, #1"); + TEST_SINGLE(shl(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "shl v30.2d, v29.2d, #1"); TEST_SINGLE(shl(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "shl v30.2d, v29.2d, #63"); - TEST_SINGLE(shl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "shl v30.8b, v29.8b, #1"); - TEST_SINGLE(shl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "shl v30.8b, v29.8b, #7"); - TEST_SINGLE(shl(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "shl v30.4h, v29.4h, #1"); + TEST_SINGLE(shl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "shl v30.8b, v29.8b, #1"); + TEST_SINGLE(shl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "shl v30.8b, v29.8b, #7"); + TEST_SINGLE(shl(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "shl v30.4h, v29.4h, #1"); TEST_SINGLE(shl(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "shl v30.4h, v29.4h, #15"); - TEST_SINGLE(shl(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "shl v30.2s, v29.2s, #1"); + TEST_SINGLE(shl(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "shl v30.2s, v29.2s, #1"); TEST_SINGLE(shl(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "shl v30.2s, v29.2s, #31"); - //TEST_SINGLE(shl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "shl v30.1d, v29.1d, #1"); - //TEST_SINGLE(shl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "shl v30.1d, v29.1d, #63"); + // TEST_SINGLE(shl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "shl v30.1d, v29.1d, #1"); + // TEST_SINGLE(shl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "shl v30.1d, v29.1d, #63"); - TEST_SINGLE(sqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqshl v30.16b, v29.16b, #1"); - TEST_SINGLE(sqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqshl v30.16b, v29.16b, #7"); - TEST_SINGLE(sqshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqshl v30.8h, v29.8h, #1"); + TEST_SINGLE(sqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqshl v30.16b, v29.16b, #1"); + TEST_SINGLE(sqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqshl v30.16b, v29.16b, #7"); + TEST_SINGLE(sqshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqshl v30.8h, v29.8h, #1"); TEST_SINGLE(sqshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sqshl v30.8h, v29.8h, #15"); - TEST_SINGLE(sqshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqshl v30.4s, v29.4s, #1"); + TEST_SINGLE(sqshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqshl v30.4s, v29.4s, #1"); TEST_SINGLE(sqshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sqshl v30.4s, v29.4s, #31"); - TEST_SINGLE(sqshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sqshl v30.2d, v29.2d, #1"); + TEST_SINGLE(sqshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sqshl v30.2d, v29.2d, #1"); TEST_SINGLE(sqshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sqshl v30.2d, v29.2d, #63"); - TEST_SINGLE(sqshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqshl v30.8b, v29.8b, #1"); - TEST_SINGLE(sqshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqshl v30.8b, v29.8b, #7"); - TEST_SINGLE(sqshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqshl v30.4h, v29.4h, #1"); + TEST_SINGLE(sqshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqshl v30.8b, v29.8b, #1"); + TEST_SINGLE(sqshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqshl v30.8b, v29.8b, #7"); + TEST_SINGLE(sqshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqshl v30.4h, v29.4h, #1"); TEST_SINGLE(sqshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sqshl v30.4h, v29.4h, #15"); - TEST_SINGLE(sqshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqshl v30.2s, v29.2s, #1"); + TEST_SINGLE(sqshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqshl v30.2s, v29.2s, #1"); TEST_SINGLE(sqshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sqshl v30.2s, v29.2s, #31"); - //TEST_SINGLE(sqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sqshl v30.1d, v29.1d, #1"); - //TEST_SINGLE(sqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqshl v30.1d, v29.1d, #63"); + // TEST_SINGLE(sqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sqshl v30.1d, v29.1d, #1"); + // TEST_SINGLE(sqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqshl v30.1d, v29.1d, #63"); - TEST_SINGLE(shrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "shrn v30.8b, v29.8h, #1"); - TEST_SINGLE(shrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "shrn v30.8b, v29.8h, #7"); - TEST_SINGLE(shrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "shrn v30.4h, v29.4s, #1"); + TEST_SINGLE(shrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "shrn v30.8b, v29.8h, #1"); + TEST_SINGLE(shrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "shrn v30.8b, v29.8h, #7"); + TEST_SINGLE(shrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "shrn v30.4h, v29.4s, #1"); TEST_SINGLE(shrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "shrn v30.4h, v29.4s, #15"); - TEST_SINGLE(shrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "shrn v30.2s, v29.2d, #1"); + TEST_SINGLE(shrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "shrn v30.2s, v29.2d, #1"); TEST_SINGLE(shrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "shrn v30.2s, v29.2d, #31"); - //TEST_SINGLE(shrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "shrn v30.1d, v29.1d, #1"); - //TEST_SINGLE(shrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "shrn v30.1d, v29.1d, #63"); + // TEST_SINGLE(shrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "shrn v30.1d, v29.1d, #1"); + // TEST_SINGLE(shrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "shrn v30.1d, v29.1d, #63"); - TEST_SINGLE(shrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "shrn2 v30.16b, v29.8h, #1"); - TEST_SINGLE(shrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "shrn2 v30.16b, v29.8h, #7"); - TEST_SINGLE(shrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "shrn2 v30.8h, v29.4s, #1"); + TEST_SINGLE(shrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "shrn2 v30.16b, v29.8h, #1"); + TEST_SINGLE(shrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "shrn2 v30.16b, v29.8h, #7"); + TEST_SINGLE(shrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "shrn2 v30.8h, v29.4s, #1"); TEST_SINGLE(shrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "shrn2 v30.8h, v29.4s, #15"); - TEST_SINGLE(shrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "shrn2 v30.4s, v29.2d, #1"); + TEST_SINGLE(shrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "shrn2 v30.4s, v29.2d, #1"); TEST_SINGLE(shrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "shrn2 v30.4s, v29.2d, #31"); - //TEST_SINGLE(shrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "shrn2 v30.2d, v29.2d, #1"); - //TEST_SINGLE(shrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "shrn2 v30.2d, v29.2d, #63"); + // TEST_SINGLE(shrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "shrn2 v30.2d, v29.2d, #1"); + // TEST_SINGLE(shrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "shrn2 v30.2d, v29.2d, #63"); - TEST_SINGLE(rshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "rshrn v30.8b, v29.8h, #1"); - TEST_SINGLE(rshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "rshrn v30.8b, v29.8h, #7"); - TEST_SINGLE(rshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "rshrn v30.4h, v29.4s, #1"); + TEST_SINGLE(rshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "rshrn v30.8b, v29.8h, #1"); + TEST_SINGLE(rshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "rshrn v30.8b, v29.8h, #7"); + TEST_SINGLE(rshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "rshrn v30.4h, v29.4s, #1"); TEST_SINGLE(rshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "rshrn v30.4h, v29.4s, #15"); - TEST_SINGLE(rshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "rshrn v30.2s, v29.2d, #1"); + TEST_SINGLE(rshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "rshrn v30.2s, v29.2d, #1"); TEST_SINGLE(rshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "rshrn v30.2s, v29.2d, #31"); - //TEST_SINGLE(rshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "rshrn v30.1d, v29.1d, #1"); - //TEST_SINGLE(rshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "rshrn v30.1d, v29.1d, #63"); + // TEST_SINGLE(rshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "rshrn v30.1d, v29.1d, #1"); + // TEST_SINGLE(rshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "rshrn v30.1d, v29.1d, #63"); - TEST_SINGLE(rshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "rshrn2 v30.16b, v29.8h, #1"); - TEST_SINGLE(rshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "rshrn2 v30.16b, v29.8h, #7"); - TEST_SINGLE(rshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "rshrn2 v30.8h, v29.4s, #1"); + TEST_SINGLE(rshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "rshrn2 v30.16b, v29.8h, #1"); + TEST_SINGLE(rshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "rshrn2 v30.16b, v29.8h, #7"); + TEST_SINGLE(rshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "rshrn2 v30.8h, v29.4s, #1"); TEST_SINGLE(rshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "rshrn2 v30.8h, v29.4s, #15"); - TEST_SINGLE(rshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "rshrn2 v30.4s, v29.2d, #1"); + TEST_SINGLE(rshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "rshrn2 v30.4s, v29.2d, #1"); TEST_SINGLE(rshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "rshrn2 v30.4s, v29.2d, #31"); - //TEST_SINGLE(rshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "rshrn2 v30.2d, v29.2d, #1"); - //TEST_SINGLE(rshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "rshrn2 v30.2d, v29.2d, #63"); + // TEST_SINGLE(rshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "rshrn2 v30.2d, v29.2d, #1"); + // TEST_SINGLE(rshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "rshrn2 v30.2d, v29.2d, #63"); - TEST_SINGLE(sqshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqshrn v30.8b, v29.8h, #1"); - TEST_SINGLE(sqshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqshrn v30.8b, v29.8h, #7"); - TEST_SINGLE(sqshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqshrn v30.4h, v29.4s, #1"); + TEST_SINGLE(sqshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqshrn v30.8b, v29.8h, #1"); + TEST_SINGLE(sqshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqshrn v30.8b, v29.8h, #7"); + TEST_SINGLE(sqshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqshrn v30.4h, v29.4s, #1"); TEST_SINGLE(sqshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sqshrn v30.4h, v29.4s, #15"); - TEST_SINGLE(sqshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqshrn v30.2s, v29.2d, #1"); + TEST_SINGLE(sqshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqshrn v30.2s, v29.2d, #1"); TEST_SINGLE(sqshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sqshrn v30.2s, v29.2d, #31"); - //TEST_SINGLE(sqshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sqshrn v30.1d, v29.1d, #1"); - //TEST_SINGLE(sqshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqshrn v30.1d, v29.1d, #63"); + // TEST_SINGLE(sqshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sqshrn v30.1d, v29.1d, #1"); + // TEST_SINGLE(sqshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqshrn v30.1d, v29.1d, #63"); - TEST_SINGLE(sqshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqshrn2 v30.16b, v29.8h, #1"); - TEST_SINGLE(sqshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqshrn2 v30.16b, v29.8h, #7"); - TEST_SINGLE(sqshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqshrn2 v30.8h, v29.4s, #1"); + TEST_SINGLE(sqshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqshrn2 v30.16b, v29.8h, #1"); + TEST_SINGLE(sqshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqshrn2 v30.16b, v29.8h, #7"); + TEST_SINGLE(sqshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqshrn2 v30.8h, v29.4s, #1"); TEST_SINGLE(sqshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sqshrn2 v30.8h, v29.4s, #15"); - TEST_SINGLE(sqshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqshrn2 v30.4s, v29.2d, #1"); + TEST_SINGLE(sqshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqshrn2 v30.4s, v29.2d, #1"); TEST_SINGLE(sqshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sqshrn2 v30.4s, v29.2d, #31"); - //TEST_SINGLE(sqshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sqshrn2 v30.2d, v29.2d, #1"); - //TEST_SINGLE(sqshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sqshrn2 v30.2d, v29.2d, #63"); + // TEST_SINGLE(sqshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sqshrn2 v30.2d, v29.2d, #1"); + // TEST_SINGLE(sqshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sqshrn2 v30.2d, v29.2d, #63"); - TEST_SINGLE(sqrshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqrshrn v30.8b, v29.8h, #1"); - TEST_SINGLE(sqrshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqrshrn v30.8b, v29.8h, #7"); - TEST_SINGLE(sqrshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqrshrn v30.4h, v29.4s, #1"); + TEST_SINGLE(sqrshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqrshrn v30.8b, v29.8h, #1"); + TEST_SINGLE(sqrshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqrshrn v30.8b, v29.8h, #7"); + TEST_SINGLE(sqrshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqrshrn v30.4h, v29.4s, #1"); TEST_SINGLE(sqrshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sqrshrn v30.4h, v29.4s, #15"); - TEST_SINGLE(sqrshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqrshrn v30.2s, v29.2d, #1"); + TEST_SINGLE(sqrshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqrshrn v30.2s, v29.2d, #1"); TEST_SINGLE(sqrshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sqrshrn v30.2s, v29.2d, #31"); - //TEST_SINGLE(sqrshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sqrshrn v30.1d, v29.1d, #1"); - //TEST_SINGLE(sqrshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqrshrn v30.1d, v29.1d, #63"); + // TEST_SINGLE(sqrshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sqrshrn v30.1d, v29.1d, #1"); + // TEST_SINGLE(sqrshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqrshrn v30.1d, v29.1d, #63"); - TEST_SINGLE(sqrshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqrshrn2 v30.16b, v29.8h, #1"); - TEST_SINGLE(sqrshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqrshrn2 v30.16b, v29.8h, #7"); - TEST_SINGLE(sqrshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqrshrn2 v30.8h, v29.4s, #1"); + TEST_SINGLE(sqrshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqrshrn2 v30.16b, v29.8h, #1"); + TEST_SINGLE(sqrshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqrshrn2 v30.16b, v29.8h, #7"); + TEST_SINGLE(sqrshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqrshrn2 v30.8h, v29.4s, #1"); TEST_SINGLE(sqrshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sqrshrn2 v30.8h, v29.4s, #15"); - TEST_SINGLE(sqrshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqrshrn2 v30.4s, v29.2d, #1"); + TEST_SINGLE(sqrshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqrshrn2 v30.4s, v29.2d, #1"); TEST_SINGLE(sqrshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sqrshrn2 v30.4s, v29.2d, #31"); - //TEST_SINGLE(sqrshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sqrshrn2 v30.2d, v29.2d, #1"); - //TEST_SINGLE(sqrshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sqrshrn2 v30.2d, v29.2d, #63"); + // TEST_SINGLE(sqrshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sqrshrn2 v30.2d, v29.2d, #1"); + // TEST_SINGLE(sqrshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sqrshrn2 v30.2d, v29.2d, #63"); - //TEST_SINGLE(sshll(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sshll v30.8b, v29.8h, #1"); - //TEST_SINGLE(sshll(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sshll v30.8b, v29.8h, #7"); - TEST_SINGLE(sshll(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sshll v30.8h, v29.8b, #1"); + // TEST_SINGLE(sshll(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sshll v30.8b, v29.8h, #1"); + // TEST_SINGLE(sshll(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sshll v30.8b, v29.8h, #7"); + TEST_SINGLE(sshll(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sshll v30.8h, v29.8b, #1"); TEST_SINGLE(sshll(SubRegSize::i16Bit, DReg::d30, DReg::d29, 7), "sshll v30.8h, v29.8b, #7"); - TEST_SINGLE(sshll(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sshll v30.4s, v29.4h, #1"); + TEST_SINGLE(sshll(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sshll v30.4s, v29.4h, #1"); TEST_SINGLE(sshll(SubRegSize::i32Bit, DReg::d30, DReg::d29, 15), "sshll v30.4s, v29.4h, #15"); - TEST_SINGLE(sshll(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sshll v30.2d, v29.2s, #1"); + TEST_SINGLE(sshll(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sshll v30.2d, v29.2s, #1"); TEST_SINGLE(sshll(SubRegSize::i64Bit, DReg::d30, DReg::d29, 31), "sshll v30.2d, v29.2s, #31"); - //TEST_SINGLE(sshll2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sshll2 v30.16b, v29.8h, #1"); - //TEST_SINGLE(sshll2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sshll2 v30.16b, v29.8h, #7"); - TEST_SINGLE(sshll2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sshll2 v30.8h, v29.16b, #1"); + // TEST_SINGLE(sshll2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sshll2 v30.16b, v29.8h, #1"); + // TEST_SINGLE(sshll2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sshll2 v30.16b, v29.8h, #7"); + TEST_SINGLE(sshll2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sshll2 v30.8h, v29.16b, #1"); TEST_SINGLE(sshll2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 7), "sshll2 v30.8h, v29.16b, #7"); - TEST_SINGLE(sshll2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sshll2 v30.4s, v29.8h, #1"); + TEST_SINGLE(sshll2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sshll2 v30.4s, v29.8h, #1"); TEST_SINGLE(sshll2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 15), "sshll2 v30.4s, v29.8h, #15"); - TEST_SINGLE(sshll2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sshll2 v30.2d, v29.4s, #1"); + TEST_SINGLE(sshll2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sshll2 v30.2d, v29.4s, #1"); TEST_SINGLE(sshll2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 31), "sshll2 v30.2d, v29.4s, #31"); - //TEST_SINGLE(sxtl(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sxtl v30.8b, v29.8h"); - TEST_SINGLE(sxtl(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sxtl v30.8h, v29.8b"); - TEST_SINGLE(sxtl(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sxtl v30.4s, v29.4h"); - TEST_SINGLE(sxtl(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sxtl v30.2d, v29.2s"); - - //TEST_SINGLE(sxtl(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sxtl v30.8b, v29.8h"); - TEST_SINGLE(sxtl(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sxtl v30.8h, v29.8b"); - TEST_SINGLE(sxtl(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sxtl v30.4s, v29.4h"); - TEST_SINGLE(sxtl(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sxtl v30.2d, v29.2s"); - - //TEST_SINGLE(sxtl2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sxtl2 v30.16b, v29.8h"); - TEST_SINGLE(sxtl2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sxtl2 v30.8h, v29.16b"); - TEST_SINGLE(sxtl2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sxtl2 v30.4s, v29.8h"); - TEST_SINGLE(sxtl2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sxtl2 v30.2d, v29.4s"); - - //TEST_SINGLE(sxtl2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sxtl2 v30.16b, v29.8h"); - TEST_SINGLE(sxtl2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sxtl2 v30.8h, v29.16b"); - TEST_SINGLE(sxtl2(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sxtl2 v30.4s, v29.8h"); - TEST_SINGLE(sxtl2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sxtl2 v30.2d, v29.4s"); - - //TEST_SINGLE(scvtf(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "scvtf v30.16b, v29.16b, #1"); - //TEST_SINGLE(scvtf(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "scvtf v30.16b, v29.16b, #7"); - TEST_SINGLE(scvtf(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "scvtf v30.8h, v29.8h, #1"); + // TEST_SINGLE(sxtl(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sxtl v30.8b, v29.8h"); + TEST_SINGLE(sxtl(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sxtl v30.8h, v29.8b"); + TEST_SINGLE(sxtl(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sxtl v30.4s, v29.4h"); + TEST_SINGLE(sxtl(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sxtl v30.2d, v29.2s"); + + // TEST_SINGLE(sxtl(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sxtl v30.8b, v29.8h"); + TEST_SINGLE(sxtl(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sxtl v30.8h, v29.8b"); + TEST_SINGLE(sxtl(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sxtl v30.4s, v29.4h"); + TEST_SINGLE(sxtl(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sxtl v30.2d, v29.2s"); + + // TEST_SINGLE(sxtl2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sxtl2 v30.16b, v29.8h"); + TEST_SINGLE(sxtl2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sxtl2 v30.8h, v29.16b"); + TEST_SINGLE(sxtl2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sxtl2 v30.4s, v29.8h"); + TEST_SINGLE(sxtl2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sxtl2 v30.2d, v29.4s"); + + // TEST_SINGLE(sxtl2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sxtl2 v30.16b, v29.8h"); + TEST_SINGLE(sxtl2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sxtl2 v30.8h, v29.16b"); + TEST_SINGLE(sxtl2(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sxtl2 v30.4s, v29.8h"); + TEST_SINGLE(sxtl2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sxtl2 v30.2d, v29.4s"); + + // TEST_SINGLE(scvtf(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "scvtf v30.16b, v29.16b, #1"); + // TEST_SINGLE(scvtf(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "scvtf v30.16b, v29.16b, #7"); + TEST_SINGLE(scvtf(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "scvtf v30.8h, v29.8h, #1"); TEST_SINGLE(scvtf(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "scvtf v30.8h, v29.8h, #15"); - TEST_SINGLE(scvtf(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "scvtf v30.4s, v29.4s, #1"); + TEST_SINGLE(scvtf(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "scvtf v30.4s, v29.4s, #1"); TEST_SINGLE(scvtf(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "scvtf v30.4s, v29.4s, #31"); - TEST_SINGLE(scvtf(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "scvtf v30.2d, v29.2d, #1"); + TEST_SINGLE(scvtf(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "scvtf v30.2d, v29.2d, #1"); TEST_SINGLE(scvtf(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "scvtf v30.2d, v29.2d, #63"); - //TEST_SINGLE(scvtf(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "scvtf v30.8b, v29.8b, #1"); - //TEST_SINGLE(scvtf(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "scvtf v30.8b, v29.8b, #7"); - TEST_SINGLE(scvtf(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "scvtf v30.4h, v29.4h, #1"); + // TEST_SINGLE(scvtf(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "scvtf v30.8b, v29.8b, #1"); + // TEST_SINGLE(scvtf(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "scvtf v30.8b, v29.8b, #7"); + TEST_SINGLE(scvtf(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "scvtf v30.4h, v29.4h, #1"); TEST_SINGLE(scvtf(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "scvtf v30.4h, v29.4h, #15"); - TEST_SINGLE(scvtf(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "scvtf v30.2s, v29.2s, #1"); + TEST_SINGLE(scvtf(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "scvtf v30.2s, v29.2s, #1"); TEST_SINGLE(scvtf(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "scvtf v30.2s, v29.2s, #31"); - //TEST_SINGLE(scvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "scvtf v30.1d, v29.1d, #1"); - //TEST_SINGLE(scvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "scvtf v30.1d, v29.1d, #63"); + // TEST_SINGLE(scvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "scvtf v30.1d, v29.1d, #1"); + // TEST_SINGLE(scvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "scvtf v30.1d, v29.1d, #63"); - //TEST_SINGLE(fcvtzs(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "fcvtzs v30.16b, v29.16b, #1"); - //TEST_SINGLE(fcvtzs(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "fcvtzs v30.16b, v29.16b, #7"); - TEST_SINGLE(fcvtzs(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "fcvtzs v30.8h, v29.8h, #1"); + // TEST_SINGLE(fcvtzs(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "fcvtzs v30.16b, v29.16b, #1"); + // TEST_SINGLE(fcvtzs(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "fcvtzs v30.16b, v29.16b, #7"); + TEST_SINGLE(fcvtzs(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "fcvtzs v30.8h, v29.8h, #1"); TEST_SINGLE(fcvtzs(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "fcvtzs v30.8h, v29.8h, #15"); - TEST_SINGLE(fcvtzs(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "fcvtzs v30.4s, v29.4s, #1"); + TEST_SINGLE(fcvtzs(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "fcvtzs v30.4s, v29.4s, #1"); TEST_SINGLE(fcvtzs(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "fcvtzs v30.4s, v29.4s, #31"); - TEST_SINGLE(fcvtzs(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "fcvtzs v30.2d, v29.2d, #1"); + TEST_SINGLE(fcvtzs(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "fcvtzs v30.2d, v29.2d, #1"); TEST_SINGLE(fcvtzs(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "fcvtzs v30.2d, v29.2d, #63"); - //TEST_SINGLE(fcvtzs(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "fcvtzs v30.8b, v29.8b, #1"); - //TEST_SINGLE(fcvtzs(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "fcvtzs v30.8b, v29.8b, #7"); - TEST_SINGLE(fcvtzs(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "fcvtzs v30.4h, v29.4h, #1"); + // TEST_SINGLE(fcvtzs(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "fcvtzs v30.8b, v29.8b, #1"); + // TEST_SINGLE(fcvtzs(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "fcvtzs v30.8b, v29.8b, #7"); + TEST_SINGLE(fcvtzs(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "fcvtzs v30.4h, v29.4h, #1"); TEST_SINGLE(fcvtzs(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "fcvtzs v30.4h, v29.4h, #15"); - TEST_SINGLE(fcvtzs(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "fcvtzs v30.2s, v29.2s, #1"); + TEST_SINGLE(fcvtzs(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "fcvtzs v30.2s, v29.2s, #1"); TEST_SINGLE(fcvtzs(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "fcvtzs v30.2s, v29.2s, #31"); - //TEST_SINGLE(fcvtzs(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "fcvtzs v30.1d, v29.1d, #1"); - //TEST_SINGLE(fcvtzs(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "fcvtzs v30.1d, v29.1d, #63"); + // TEST_SINGLE(fcvtzs(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "fcvtzs v30.1d, v29.1d, #1"); + // TEST_SINGLE(fcvtzs(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "fcvtzs v30.1d, v29.1d, #63"); - TEST_SINGLE(ushr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "ushr v30.16b, v29.16b, #1"); - TEST_SINGLE(ushr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "ushr v30.16b, v29.16b, #7"); - TEST_SINGLE(ushr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "ushr v30.8h, v29.8h, #1"); + TEST_SINGLE(ushr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "ushr v30.16b, v29.16b, #1"); + TEST_SINGLE(ushr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "ushr v30.16b, v29.16b, #7"); + TEST_SINGLE(ushr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "ushr v30.8h, v29.8h, #1"); TEST_SINGLE(ushr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "ushr v30.8h, v29.8h, #15"); - TEST_SINGLE(ushr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "ushr v30.4s, v29.4s, #1"); + TEST_SINGLE(ushr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "ushr v30.4s, v29.4s, #1"); TEST_SINGLE(ushr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "ushr v30.4s, v29.4s, #31"); - TEST_SINGLE(ushr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "ushr v30.2d, v29.2d, #1"); + TEST_SINGLE(ushr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "ushr v30.2d, v29.2d, #1"); TEST_SINGLE(ushr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "ushr v30.2d, v29.2d, #63"); - TEST_SINGLE(ushr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "ushr v30.8b, v29.8b, #1"); - TEST_SINGLE(ushr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "ushr v30.8b, v29.8b, #7"); - TEST_SINGLE(ushr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "ushr v30.4h, v29.4h, #1"); + TEST_SINGLE(ushr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "ushr v30.8b, v29.8b, #1"); + TEST_SINGLE(ushr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "ushr v30.8b, v29.8b, #7"); + TEST_SINGLE(ushr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "ushr v30.4h, v29.4h, #1"); TEST_SINGLE(ushr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "ushr v30.4h, v29.4h, #15"); - TEST_SINGLE(ushr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "ushr v30.2s, v29.2s, #1"); + TEST_SINGLE(ushr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "ushr v30.2s, v29.2s, #1"); TEST_SINGLE(ushr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "ushr v30.2s, v29.2s, #31"); - //TEST_SINGLE(ushr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "ushr v30.1d, v29.1d, #1"); - //TEST_SINGLE(ushr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "ushr v30.1d, v29.1d, #63"); + // TEST_SINGLE(ushr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "ushr v30.1d, v29.1d, #1"); + // TEST_SINGLE(ushr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "ushr v30.1d, v29.1d, #63"); - TEST_SINGLE(usra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "usra v30.16b, v29.16b, #1"); - TEST_SINGLE(usra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "usra v30.16b, v29.16b, #7"); - TEST_SINGLE(usra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "usra v30.8h, v29.8h, #1"); + TEST_SINGLE(usra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "usra v30.16b, v29.16b, #1"); + TEST_SINGLE(usra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "usra v30.16b, v29.16b, #7"); + TEST_SINGLE(usra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "usra v30.8h, v29.8h, #1"); TEST_SINGLE(usra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "usra v30.8h, v29.8h, #15"); - TEST_SINGLE(usra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "usra v30.4s, v29.4s, #1"); + TEST_SINGLE(usra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "usra v30.4s, v29.4s, #1"); TEST_SINGLE(usra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "usra v30.4s, v29.4s, #31"); - TEST_SINGLE(usra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "usra v30.2d, v29.2d, #1"); + TEST_SINGLE(usra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "usra v30.2d, v29.2d, #1"); TEST_SINGLE(usra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "usra v30.2d, v29.2d, #63"); - TEST_SINGLE(usra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "usra v30.8b, v29.8b, #1"); - TEST_SINGLE(usra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "usra v30.8b, v29.8b, #7"); - TEST_SINGLE(usra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "usra v30.4h, v29.4h, #1"); + TEST_SINGLE(usra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "usra v30.8b, v29.8b, #1"); + TEST_SINGLE(usra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "usra v30.8b, v29.8b, #7"); + TEST_SINGLE(usra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "usra v30.4h, v29.4h, #1"); TEST_SINGLE(usra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "usra v30.4h, v29.4h, #15"); - TEST_SINGLE(usra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "usra v30.2s, v29.2s, #1"); + TEST_SINGLE(usra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "usra v30.2s, v29.2s, #1"); TEST_SINGLE(usra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "usra v30.2s, v29.2s, #31"); - //TEST_SINGLE(usra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "usra v30.1d, v29.1d, #1"); - //TEST_SINGLE(usra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "usra v30.1d, v29.1d, #63"); + // TEST_SINGLE(usra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "usra v30.1d, v29.1d, #1"); + // TEST_SINGLE(usra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "usra v30.1d, v29.1d, #63"); - TEST_SINGLE(urshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "urshr v30.16b, v29.16b, #1"); - TEST_SINGLE(urshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "urshr v30.16b, v29.16b, #7"); - TEST_SINGLE(urshr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "urshr v30.8h, v29.8h, #1"); + TEST_SINGLE(urshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "urshr v30.16b, v29.16b, #1"); + TEST_SINGLE(urshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "urshr v30.16b, v29.16b, #7"); + TEST_SINGLE(urshr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "urshr v30.8h, v29.8h, #1"); TEST_SINGLE(urshr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "urshr v30.8h, v29.8h, #15"); - TEST_SINGLE(urshr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "urshr v30.4s, v29.4s, #1"); + TEST_SINGLE(urshr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "urshr v30.4s, v29.4s, #1"); TEST_SINGLE(urshr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "urshr v30.4s, v29.4s, #31"); - TEST_SINGLE(urshr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "urshr v30.2d, v29.2d, #1"); + TEST_SINGLE(urshr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "urshr v30.2d, v29.2d, #1"); TEST_SINGLE(urshr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "urshr v30.2d, v29.2d, #63"); - TEST_SINGLE(urshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "urshr v30.8b, v29.8b, #1"); - TEST_SINGLE(urshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "urshr v30.8b, v29.8b, #7"); - TEST_SINGLE(urshr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "urshr v30.4h, v29.4h, #1"); + TEST_SINGLE(urshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "urshr v30.8b, v29.8b, #1"); + TEST_SINGLE(urshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "urshr v30.8b, v29.8b, #7"); + TEST_SINGLE(urshr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "urshr v30.4h, v29.4h, #1"); TEST_SINGLE(urshr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "urshr v30.4h, v29.4h, #15"); - TEST_SINGLE(urshr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "urshr v30.2s, v29.2s, #1"); + TEST_SINGLE(urshr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "urshr v30.2s, v29.2s, #1"); TEST_SINGLE(urshr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "urshr v30.2s, v29.2s, #31"); - //TEST_SINGLE(urshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "urshr v30.1d, v29.1d, #1"); - //TEST_SINGLE(urshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "urshr v30.1d, v29.1d, #63"); + // TEST_SINGLE(urshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "urshr v30.1d, v29.1d, #1"); + // TEST_SINGLE(urshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "urshr v30.1d, v29.1d, #63"); - TEST_SINGLE(ursra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "ursra v30.16b, v29.16b, #1"); - TEST_SINGLE(ursra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "ursra v30.16b, v29.16b, #7"); - TEST_SINGLE(ursra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "ursra v30.8h, v29.8h, #1"); + TEST_SINGLE(ursra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "ursra v30.16b, v29.16b, #1"); + TEST_SINGLE(ursra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "ursra v30.16b, v29.16b, #7"); + TEST_SINGLE(ursra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "ursra v30.8h, v29.8h, #1"); TEST_SINGLE(ursra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "ursra v30.8h, v29.8h, #15"); - TEST_SINGLE(ursra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "ursra v30.4s, v29.4s, #1"); + TEST_SINGLE(ursra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "ursra v30.4s, v29.4s, #1"); TEST_SINGLE(ursra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "ursra v30.4s, v29.4s, #31"); - TEST_SINGLE(ursra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "ursra v30.2d, v29.2d, #1"); + TEST_SINGLE(ursra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "ursra v30.2d, v29.2d, #1"); TEST_SINGLE(ursra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "ursra v30.2d, v29.2d, #63"); - TEST_SINGLE(ursra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "ursra v30.8b, v29.8b, #1"); - TEST_SINGLE(ursra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "ursra v30.8b, v29.8b, #7"); - TEST_SINGLE(ursra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "ursra v30.4h, v29.4h, #1"); + TEST_SINGLE(ursra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "ursra v30.8b, v29.8b, #1"); + TEST_SINGLE(ursra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "ursra v30.8b, v29.8b, #7"); + TEST_SINGLE(ursra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "ursra v30.4h, v29.4h, #1"); TEST_SINGLE(ursra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "ursra v30.4h, v29.4h, #15"); - TEST_SINGLE(ursra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "ursra v30.2s, v29.2s, #1"); + TEST_SINGLE(ursra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "ursra v30.2s, v29.2s, #1"); TEST_SINGLE(ursra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "ursra v30.2s, v29.2s, #31"); - //TEST_SINGLE(ursra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "ursra v30.1d, v29.1d, #1"); - //TEST_SINGLE(ursra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "ursra v30.1d, v29.1d, #63"); + // TEST_SINGLE(ursra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "ursra v30.1d, v29.1d, #1"); + // TEST_SINGLE(ursra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "ursra v30.1d, v29.1d, #63"); - TEST_SINGLE(sri(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sri v30.16b, v29.16b, #1"); - TEST_SINGLE(sri(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sri v30.16b, v29.16b, #7"); - TEST_SINGLE(sri(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sri v30.8h, v29.8h, #1"); + TEST_SINGLE(sri(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sri v30.16b, v29.16b, #1"); + TEST_SINGLE(sri(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sri v30.16b, v29.16b, #7"); + TEST_SINGLE(sri(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sri v30.8h, v29.8h, #1"); TEST_SINGLE(sri(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sri v30.8h, v29.8h, #15"); - TEST_SINGLE(sri(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sri v30.4s, v29.4s, #1"); + TEST_SINGLE(sri(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sri v30.4s, v29.4s, #1"); TEST_SINGLE(sri(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sri v30.4s, v29.4s, #31"); - TEST_SINGLE(sri(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sri v30.2d, v29.2d, #1"); + TEST_SINGLE(sri(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sri v30.2d, v29.2d, #1"); TEST_SINGLE(sri(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sri v30.2d, v29.2d, #63"); - TEST_SINGLE(sri(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sri v30.8b, v29.8b, #1"); - TEST_SINGLE(sri(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sri v30.8b, v29.8b, #7"); - TEST_SINGLE(sri(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sri v30.4h, v29.4h, #1"); + TEST_SINGLE(sri(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sri v30.8b, v29.8b, #1"); + TEST_SINGLE(sri(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sri v30.8b, v29.8b, #7"); + TEST_SINGLE(sri(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sri v30.4h, v29.4h, #1"); TEST_SINGLE(sri(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sri v30.4h, v29.4h, #15"); - TEST_SINGLE(sri(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sri v30.2s, v29.2s, #1"); + TEST_SINGLE(sri(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sri v30.2s, v29.2s, #1"); TEST_SINGLE(sri(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sri v30.2s, v29.2s, #31"); - //TEST_SINGLE(sri(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sri v30.1d, v29.1d, #1"); - //TEST_SINGLE(sri(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sri v30.1d, v29.1d, #63"); + // TEST_SINGLE(sri(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sri v30.1d, v29.1d, #1"); + // TEST_SINGLE(sri(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sri v30.1d, v29.1d, #63"); - TEST_SINGLE(sli(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sli v30.16b, v29.16b, #1"); - TEST_SINGLE(sli(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sli v30.16b, v29.16b, #7"); - TEST_SINGLE(sli(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sli v30.8h, v29.8h, #1"); + TEST_SINGLE(sli(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sli v30.16b, v29.16b, #1"); + TEST_SINGLE(sli(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sli v30.16b, v29.16b, #7"); + TEST_SINGLE(sli(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sli v30.8h, v29.8h, #1"); TEST_SINGLE(sli(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sli v30.8h, v29.8h, #15"); - TEST_SINGLE(sli(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sli v30.4s, v29.4s, #1"); + TEST_SINGLE(sli(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sli v30.4s, v29.4s, #1"); TEST_SINGLE(sli(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sli v30.4s, v29.4s, #31"); - TEST_SINGLE(sli(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sli v30.2d, v29.2d, #1"); + TEST_SINGLE(sli(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sli v30.2d, v29.2d, #1"); TEST_SINGLE(sli(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sli v30.2d, v29.2d, #63"); - TEST_SINGLE(sli(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sli v30.8b, v29.8b, #1"); - TEST_SINGLE(sli(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sli v30.8b, v29.8b, #7"); - TEST_SINGLE(sli(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sli v30.4h, v29.4h, #1"); + TEST_SINGLE(sli(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sli v30.8b, v29.8b, #1"); + TEST_SINGLE(sli(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sli v30.8b, v29.8b, #7"); + TEST_SINGLE(sli(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sli v30.4h, v29.4h, #1"); TEST_SINGLE(sli(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sli v30.4h, v29.4h, #15"); - TEST_SINGLE(sli(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sli v30.2s, v29.2s, #1"); + TEST_SINGLE(sli(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sli v30.2s, v29.2s, #1"); TEST_SINGLE(sli(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sli v30.2s, v29.2s, #31"); - //TEST_SINGLE(sli(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sli v30.1d, v29.1d, #1"); - //TEST_SINGLE(sli(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sli v30.1d, v29.1d, #63"); + // TEST_SINGLE(sli(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sli v30.1d, v29.1d, #1"); + // TEST_SINGLE(sli(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sli v30.1d, v29.1d, #63"); - TEST_SINGLE(sqshlu(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqshlu v30.16b, v29.16b, #1"); - TEST_SINGLE(sqshlu(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqshlu v30.16b, v29.16b, #7"); - TEST_SINGLE(sqshlu(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqshlu v30.8h, v29.8h, #1"); + TEST_SINGLE(sqshlu(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqshlu v30.16b, v29.16b, #1"); + TEST_SINGLE(sqshlu(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqshlu v30.16b, v29.16b, #7"); + TEST_SINGLE(sqshlu(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqshlu v30.8h, v29.8h, #1"); TEST_SINGLE(sqshlu(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sqshlu v30.8h, v29.8h, #15"); - TEST_SINGLE(sqshlu(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqshlu v30.4s, v29.4s, #1"); + TEST_SINGLE(sqshlu(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqshlu v30.4s, v29.4s, #1"); TEST_SINGLE(sqshlu(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sqshlu v30.4s, v29.4s, #31"); - TEST_SINGLE(sqshlu(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sqshlu v30.2d, v29.2d, #1"); + TEST_SINGLE(sqshlu(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sqshlu v30.2d, v29.2d, #1"); TEST_SINGLE(sqshlu(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sqshlu v30.2d, v29.2d, #63"); - TEST_SINGLE(sqshlu(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqshlu v30.8b, v29.8b, #1"); - TEST_SINGLE(sqshlu(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqshlu v30.8b, v29.8b, #7"); - TEST_SINGLE(sqshlu(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqshlu v30.4h, v29.4h, #1"); + TEST_SINGLE(sqshlu(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqshlu v30.8b, v29.8b, #1"); + TEST_SINGLE(sqshlu(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqshlu v30.8b, v29.8b, #7"); + TEST_SINGLE(sqshlu(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqshlu v30.4h, v29.4h, #1"); TEST_SINGLE(sqshlu(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sqshlu v30.4h, v29.4h, #15"); - TEST_SINGLE(sqshlu(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqshlu v30.2s, v29.2s, #1"); + TEST_SINGLE(sqshlu(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqshlu v30.2s, v29.2s, #1"); TEST_SINGLE(sqshlu(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sqshlu v30.2s, v29.2s, #31"); - //TEST_SINGLE(sqshlu(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sqshlu v30.1d, v29.1d, #1"); - //TEST_SINGLE(sqshlu(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqshlu v30.1d, v29.1d, #63"); + // TEST_SINGLE(sqshlu(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sqshlu v30.1d, v29.1d, #1"); + // TEST_SINGLE(sqshlu(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqshlu v30.1d, v29.1d, #63"); - TEST_SINGLE(uqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "uqshl v30.16b, v29.16b, #1"); - TEST_SINGLE(uqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "uqshl v30.16b, v29.16b, #7"); - TEST_SINGLE(uqshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "uqshl v30.8h, v29.8h, #1"); + TEST_SINGLE(uqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "uqshl v30.16b, v29.16b, #1"); + TEST_SINGLE(uqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "uqshl v30.16b, v29.16b, #7"); + TEST_SINGLE(uqshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "uqshl v30.8h, v29.8h, #1"); TEST_SINGLE(uqshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "uqshl v30.8h, v29.8h, #15"); - TEST_SINGLE(uqshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "uqshl v30.4s, v29.4s, #1"); + TEST_SINGLE(uqshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "uqshl v30.4s, v29.4s, #1"); TEST_SINGLE(uqshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "uqshl v30.4s, v29.4s, #31"); - TEST_SINGLE(uqshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "uqshl v30.2d, v29.2d, #1"); + TEST_SINGLE(uqshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "uqshl v30.2d, v29.2d, #1"); TEST_SINGLE(uqshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "uqshl v30.2d, v29.2d, #63"); - TEST_SINGLE(uqshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "uqshl v30.8b, v29.8b, #1"); - TEST_SINGLE(uqshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "uqshl v30.8b, v29.8b, #7"); - TEST_SINGLE(uqshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "uqshl v30.4h, v29.4h, #1"); + TEST_SINGLE(uqshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "uqshl v30.8b, v29.8b, #1"); + TEST_SINGLE(uqshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "uqshl v30.8b, v29.8b, #7"); + TEST_SINGLE(uqshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "uqshl v30.4h, v29.4h, #1"); TEST_SINGLE(uqshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "uqshl v30.4h, v29.4h, #15"); - TEST_SINGLE(uqshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "uqshl v30.2s, v29.2s, #1"); + TEST_SINGLE(uqshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "uqshl v30.2s, v29.2s, #1"); TEST_SINGLE(uqshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "uqshl v30.2s, v29.2s, #31"); - //TEST_SINGLE(uqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "uqshl v30.1d, v29.1d, #1"); - //TEST_SINGLE(uqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "uqshl v30.1d, v29.1d, #63"); + // TEST_SINGLE(uqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "uqshl v30.1d, v29.1d, #1"); + // TEST_SINGLE(uqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "uqshl v30.1d, v29.1d, #63"); - TEST_SINGLE(sqshrun(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqshrun v30.8b, v29.8h, #1"); - TEST_SINGLE(sqshrun(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqshrun v30.8b, v29.8h, #7"); - TEST_SINGLE(sqshrun(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqshrun v30.4h, v29.4s, #1"); + TEST_SINGLE(sqshrun(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqshrun v30.8b, v29.8h, #1"); + TEST_SINGLE(sqshrun(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqshrun v30.8b, v29.8h, #7"); + TEST_SINGLE(sqshrun(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqshrun v30.4h, v29.4s, #1"); TEST_SINGLE(sqshrun(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sqshrun v30.4h, v29.4s, #15"); - TEST_SINGLE(sqshrun(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqshrun v30.2s, v29.2d, #1"); + TEST_SINGLE(sqshrun(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqshrun v30.2s, v29.2d, #1"); TEST_SINGLE(sqshrun(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sqshrun v30.2s, v29.2d, #31"); - //TEST_SINGLE(sqshrun(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sqshrun v30.1d, v29.1d, #1"); - //TEST_SINGLE(sqshrun(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqshrun v30.1d, v29.1d, #63"); + // TEST_SINGLE(sqshrun(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sqshrun v30.1d, v29.1d, #1"); + // TEST_SINGLE(sqshrun(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqshrun v30.1d, v29.1d, #63"); - TEST_SINGLE(sqshrun2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqshrun2 v30.16b, v29.8h, #1"); - TEST_SINGLE(sqshrun2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqshrun2 v30.16b, v29.8h, #7"); - TEST_SINGLE(sqshrun2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqshrun2 v30.8h, v29.4s, #1"); + TEST_SINGLE(sqshrun2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqshrun2 v30.16b, v29.8h, #1"); + TEST_SINGLE(sqshrun2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqshrun2 v30.16b, v29.8h, #7"); + TEST_SINGLE(sqshrun2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqshrun2 v30.8h, v29.4s, #1"); TEST_SINGLE(sqshrun2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sqshrun2 v30.8h, v29.4s, #15"); - TEST_SINGLE(sqshrun2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqshrun2 v30.4s, v29.2d, #1"); + TEST_SINGLE(sqshrun2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqshrun2 v30.4s, v29.2d, #1"); TEST_SINGLE(sqshrun2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sqshrun2 v30.4s, v29.2d, #31"); - //TEST_SINGLE(sqshrun2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sqshrun2 v30.2d, v29.2d, #1"); - //TEST_SINGLE(sqshrun2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sqshrun2 v30.2d, v29.2d, #63"); + // TEST_SINGLE(sqshrun2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sqshrun2 v30.2d, v29.2d, #1"); + // TEST_SINGLE(sqshrun2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sqshrun2 v30.2d, v29.2d, #63"); - TEST_SINGLE(sqrshrun(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqrshrun v30.8b, v29.8h, #1"); - TEST_SINGLE(sqrshrun(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqrshrun v30.8b, v29.8h, #7"); - TEST_SINGLE(sqrshrun(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqrshrun v30.4h, v29.4s, #1"); + TEST_SINGLE(sqrshrun(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqrshrun v30.8b, v29.8h, #1"); + TEST_SINGLE(sqrshrun(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqrshrun v30.8b, v29.8h, #7"); + TEST_SINGLE(sqrshrun(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqrshrun v30.4h, v29.4s, #1"); TEST_SINGLE(sqrshrun(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sqrshrun v30.4h, v29.4s, #15"); - TEST_SINGLE(sqrshrun(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqrshrun v30.2s, v29.2d, #1"); + TEST_SINGLE(sqrshrun(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqrshrun v30.2s, v29.2d, #1"); TEST_SINGLE(sqrshrun(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sqrshrun v30.2s, v29.2d, #31"); - //TEST_SINGLE(sqrshrun(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sqrshrun v30.1d, v29.1d, #1"); - //TEST_SINGLE(sqrshrun(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqrshrun v30.1d, v29.1d, #63"); + // TEST_SINGLE(sqrshrun(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sqrshrun v30.1d, v29.1d, #1"); + // TEST_SINGLE(sqrshrun(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqrshrun v30.1d, v29.1d, #63"); - TEST_SINGLE(sqrshrun2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqrshrun2 v30.16b, v29.8h, #1"); - TEST_SINGLE(sqrshrun2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqrshrun2 v30.16b, v29.8h, #7"); - TEST_SINGLE(sqrshrun2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqrshrun2 v30.8h, v29.4s, #1"); + TEST_SINGLE(sqrshrun2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqrshrun2 v30.16b, v29.8h, #1"); + TEST_SINGLE(sqrshrun2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqrshrun2 v30.16b, v29.8h, #7"); + TEST_SINGLE(sqrshrun2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqrshrun2 v30.8h, v29.4s, #1"); TEST_SINGLE(sqrshrun2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sqrshrun2 v30.8h, v29.4s, #15"); - TEST_SINGLE(sqrshrun2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqrshrun2 v30.4s, v29.2d, #1"); + TEST_SINGLE(sqrshrun2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqrshrun2 v30.4s, v29.2d, #1"); TEST_SINGLE(sqrshrun2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sqrshrun2 v30.4s, v29.2d, #31"); - //TEST_SINGLE(sqrshrun2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sqrshrun2 v30.2d, v29.2d, #1"); - //TEST_SINGLE(sqrshrun2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sqrshrun2 v30.2d, v29.2d, #63"); + // TEST_SINGLE(sqrshrun2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sqrshrun2 v30.2d, v29.2d, #1"); + // TEST_SINGLE(sqrshrun2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sqrshrun2 v30.2d, v29.2d, #63"); - TEST_SINGLE(uqshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "uqshrn v30.8b, v29.8h, #1"); - TEST_SINGLE(uqshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "uqshrn v30.8b, v29.8h, #7"); - TEST_SINGLE(uqshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "uqshrn v30.4h, v29.4s, #1"); + TEST_SINGLE(uqshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "uqshrn v30.8b, v29.8h, #1"); + TEST_SINGLE(uqshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "uqshrn v30.8b, v29.8h, #7"); + TEST_SINGLE(uqshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "uqshrn v30.4h, v29.4s, #1"); TEST_SINGLE(uqshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "uqshrn v30.4h, v29.4s, #15"); - TEST_SINGLE(uqshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "uqshrn v30.2s, v29.2d, #1"); + TEST_SINGLE(uqshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "uqshrn v30.2s, v29.2d, #1"); TEST_SINGLE(uqshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "uqshrn v30.2s, v29.2d, #31"); - //TEST_SINGLE(uqshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "uqshrn v30.1d, v29.1d, #1"); - //TEST_SINGLE(uqshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "uqshrn v30.1d, v29.1d, #63"); + // TEST_SINGLE(uqshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "uqshrn v30.1d, v29.1d, #1"); + // TEST_SINGLE(uqshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "uqshrn v30.1d, v29.1d, #63"); - TEST_SINGLE(uqshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "uqshrn2 v30.16b, v29.8h, #1"); - TEST_SINGLE(uqshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "uqshrn2 v30.16b, v29.8h, #7"); - TEST_SINGLE(uqshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "uqshrn2 v30.8h, v29.4s, #1"); + TEST_SINGLE(uqshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "uqshrn2 v30.16b, v29.8h, #1"); + TEST_SINGLE(uqshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "uqshrn2 v30.16b, v29.8h, #7"); + TEST_SINGLE(uqshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "uqshrn2 v30.8h, v29.4s, #1"); TEST_SINGLE(uqshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "uqshrn2 v30.8h, v29.4s, #15"); - TEST_SINGLE(uqshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "uqshrn2 v30.4s, v29.2d, #1"); + TEST_SINGLE(uqshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "uqshrn2 v30.4s, v29.2d, #1"); TEST_SINGLE(uqshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "uqshrn2 v30.4s, v29.2d, #31"); - //TEST_SINGLE(uqshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "uqshrn2 v30.2d, v29.2d, #1"); - //TEST_SINGLE(uqshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "uqshrn2 v30.2d, v29.2d, #63"); + // TEST_SINGLE(uqshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "uqshrn2 v30.2d, v29.2d, #1"); + // TEST_SINGLE(uqshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "uqshrn2 v30.2d, v29.2d, #63"); - TEST_SINGLE(uqrshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "uqrshrn v30.8b, v29.8h, #1"); - TEST_SINGLE(uqrshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "uqrshrn v30.8b, v29.8h, #7"); - TEST_SINGLE(uqrshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "uqrshrn v30.4h, v29.4s, #1"); + TEST_SINGLE(uqrshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "uqrshrn v30.8b, v29.8h, #1"); + TEST_SINGLE(uqrshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "uqrshrn v30.8b, v29.8h, #7"); + TEST_SINGLE(uqrshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "uqrshrn v30.4h, v29.4s, #1"); TEST_SINGLE(uqrshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "uqrshrn v30.4h, v29.4s, #15"); - TEST_SINGLE(uqrshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "uqrshrn v30.2s, v29.2d, #1"); + TEST_SINGLE(uqrshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "uqrshrn v30.2s, v29.2d, #1"); TEST_SINGLE(uqrshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "uqrshrn v30.2s, v29.2d, #31"); - //TEST_SINGLE(uqrshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "uqrshrn v30.1d, v29.1d, #1"); - //TEST_SINGLE(uqrshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "uqrshrn v30.1d, v29.1d, #63"); + // TEST_SINGLE(uqrshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "uqrshrn v30.1d, v29.1d, #1"); + // TEST_SINGLE(uqrshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "uqrshrn v30.1d, v29.1d, #63"); - TEST_SINGLE(uqrshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "uqrshrn2 v30.16b, v29.8h, #1"); - TEST_SINGLE(uqrshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "uqrshrn2 v30.16b, v29.8h, #7"); - TEST_SINGLE(uqrshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "uqrshrn2 v30.8h, v29.4s, #1"); + TEST_SINGLE(uqrshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "uqrshrn2 v30.16b, v29.8h, #1"); + TEST_SINGLE(uqrshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "uqrshrn2 v30.16b, v29.8h, #7"); + TEST_SINGLE(uqrshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "uqrshrn2 v30.8h, v29.4s, #1"); TEST_SINGLE(uqrshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "uqrshrn2 v30.8h, v29.4s, #15"); - TEST_SINGLE(uqrshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "uqrshrn2 v30.4s, v29.2d, #1"); + TEST_SINGLE(uqrshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "uqrshrn2 v30.4s, v29.2d, #1"); TEST_SINGLE(uqrshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "uqrshrn2 v30.4s, v29.2d, #31"); - //TEST_SINGLE(uqrshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "uqrshrn2 v30.2d, v29.2d, #1"); - //TEST_SINGLE(uqrshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "uqrshrn2 v30.2d, v29.2d, #63"); + // TEST_SINGLE(uqrshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "uqrshrn2 v30.2d, v29.2d, #1"); + // TEST_SINGLE(uqrshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "uqrshrn2 v30.2d, v29.2d, #63"); - //TEST_SINGLE(ushll(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "ushll v30.8b, v29.8h, #1"); - //TEST_SINGLE(ushll(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "ushll v30.8b, v29.8h, #7"); - TEST_SINGLE(ushll(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "ushll v30.8h, v29.8b, #1"); + // TEST_SINGLE(ushll(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "ushll v30.8b, v29.8h, #1"); + // TEST_SINGLE(ushll(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "ushll v30.8b, v29.8h, #7"); + TEST_SINGLE(ushll(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "ushll v30.8h, v29.8b, #1"); TEST_SINGLE(ushll(SubRegSize::i16Bit, DReg::d30, DReg::d29, 7), "ushll v30.8h, v29.8b, #7"); - TEST_SINGLE(ushll(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "ushll v30.4s, v29.4h, #1"); + TEST_SINGLE(ushll(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "ushll v30.4s, v29.4h, #1"); TEST_SINGLE(ushll(SubRegSize::i32Bit, DReg::d30, DReg::d29, 15), "ushll v30.4s, v29.4h, #15"); - TEST_SINGLE(ushll(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "ushll v30.2d, v29.2s, #1"); + TEST_SINGLE(ushll(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "ushll v30.2d, v29.2s, #1"); TEST_SINGLE(ushll(SubRegSize::i64Bit, DReg::d30, DReg::d29, 31), "ushll v30.2d, v29.2s, #31"); - //TEST_SINGLE(ushll2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "ushll2 v30.16b, v29.8h, #1"); - //TEST_SINGLE(ushll2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "ushll2 v30.16b, v29.8h, #7"); - TEST_SINGLE(ushll2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "ushll2 v30.8h, v29.16b, #1"); + // TEST_SINGLE(ushll2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "ushll2 v30.16b, v29.8h, #1"); + // TEST_SINGLE(ushll2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "ushll2 v30.16b, v29.8h, #7"); + TEST_SINGLE(ushll2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "ushll2 v30.8h, v29.16b, #1"); TEST_SINGLE(ushll2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 7), "ushll2 v30.8h, v29.16b, #7"); - TEST_SINGLE(ushll2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "ushll2 v30.4s, v29.8h, #1"); + TEST_SINGLE(ushll2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "ushll2 v30.4s, v29.8h, #1"); TEST_SINGLE(ushll2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 15), "ushll2 v30.4s, v29.8h, #15"); - TEST_SINGLE(ushll2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "ushll2 v30.2d, v29.4s, #1"); + TEST_SINGLE(ushll2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "ushll2 v30.2d, v29.4s, #1"); TEST_SINGLE(ushll2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 31), "ushll2 v30.2d, v29.4s, #31"); - //TEST_SINGLE(uxtl(SubRegSize::i8Bit, DReg::d30, DReg::d29), "uxtl v30.8b, v29.8h"); - TEST_SINGLE(uxtl(SubRegSize::i16Bit, DReg::d30, DReg::d29), "uxtl v30.8h, v29.8b"); - TEST_SINGLE(uxtl(SubRegSize::i32Bit, DReg::d30, DReg::d29), "uxtl v30.4s, v29.4h"); - TEST_SINGLE(uxtl(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uxtl v30.2d, v29.2s"); + // TEST_SINGLE(uxtl(SubRegSize::i8Bit, DReg::d30, DReg::d29), "uxtl v30.8b, v29.8h"); + TEST_SINGLE(uxtl(SubRegSize::i16Bit, DReg::d30, DReg::d29), "uxtl v30.8h, v29.8b"); + TEST_SINGLE(uxtl(SubRegSize::i32Bit, DReg::d30, DReg::d29), "uxtl v30.4s, v29.4h"); + TEST_SINGLE(uxtl(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uxtl v30.2d, v29.2s"); - //TEST_SINGLE(uxtl2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "uxtl2 v30.16b, v29.8h"); - TEST_SINGLE(uxtl2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "uxtl2 v30.8h, v29.16b"); - TEST_SINGLE(uxtl2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "uxtl2 v30.4s, v29.8h"); - TEST_SINGLE(uxtl2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uxtl2 v30.2d, v29.4s"); + // TEST_SINGLE(uxtl2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "uxtl2 v30.16b, v29.8h"); + TEST_SINGLE(uxtl2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "uxtl2 v30.8h, v29.16b"); + TEST_SINGLE(uxtl2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "uxtl2 v30.4s, v29.8h"); + TEST_SINGLE(uxtl2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uxtl2 v30.2d, v29.4s"); - //TEST_SINGLE(ucvtf(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "ucvtf v30.16b, v29.16b, #1"); - //TEST_SINGLE(ucvtf(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "ucvtf v30.16b, v29.16b, #7"); - TEST_SINGLE(ucvtf(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "ucvtf v30.8h, v29.8h, #1"); + // TEST_SINGLE(ucvtf(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "ucvtf v30.16b, v29.16b, #1"); + // TEST_SINGLE(ucvtf(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "ucvtf v30.16b, v29.16b, #7"); + TEST_SINGLE(ucvtf(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "ucvtf v30.8h, v29.8h, #1"); TEST_SINGLE(ucvtf(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "ucvtf v30.8h, v29.8h, #15"); - TEST_SINGLE(ucvtf(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "ucvtf v30.4s, v29.4s, #1"); + TEST_SINGLE(ucvtf(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "ucvtf v30.4s, v29.4s, #1"); TEST_SINGLE(ucvtf(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "ucvtf v30.4s, v29.4s, #31"); - TEST_SINGLE(ucvtf(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "ucvtf v30.2d, v29.2d, #1"); + TEST_SINGLE(ucvtf(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "ucvtf v30.2d, v29.2d, #1"); TEST_SINGLE(ucvtf(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "ucvtf v30.2d, v29.2d, #63"); - //TEST_SINGLE(ucvtf(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "ucvtf v30.8b, v29.8b, #1"); - //TEST_SINGLE(ucvtf(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "ucvtf v30.8b, v29.8b, #7"); - TEST_SINGLE(ucvtf(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "ucvtf v30.4h, v29.4h, #1"); + // TEST_SINGLE(ucvtf(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "ucvtf v30.8b, v29.8b, #1"); + // TEST_SINGLE(ucvtf(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "ucvtf v30.8b, v29.8b, #7"); + TEST_SINGLE(ucvtf(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "ucvtf v30.4h, v29.4h, #1"); TEST_SINGLE(ucvtf(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "ucvtf v30.4h, v29.4h, #15"); - TEST_SINGLE(ucvtf(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "ucvtf v30.2s, v29.2s, #1"); + TEST_SINGLE(ucvtf(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "ucvtf v30.2s, v29.2s, #1"); TEST_SINGLE(ucvtf(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "ucvtf v30.2s, v29.2s, #31"); - //TEST_SINGLE(ucvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "ucvtf v30.1d, v29.1d, #1"); - //TEST_SINGLE(ucvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "ucvtf v30.1d, v29.1d, #63"); + // TEST_SINGLE(ucvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "ucvtf v30.1d, v29.1d, #1"); + // TEST_SINGLE(ucvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "ucvtf v30.1d, v29.1d, #63"); - //TEST_SINGLE(fcvtzu(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "fcvtzu v30.16b, v29.16b, #1"); - //TEST_SINGLE(fcvtzu(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "fcvtzu v30.16b, v29.16b, #7"); - TEST_SINGLE(fcvtzu(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "fcvtzu v30.8h, v29.8h, #1"); + // TEST_SINGLE(fcvtzu(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "fcvtzu v30.16b, v29.16b, #1"); + // TEST_SINGLE(fcvtzu(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "fcvtzu v30.16b, v29.16b, #7"); + TEST_SINGLE(fcvtzu(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "fcvtzu v30.8h, v29.8h, #1"); TEST_SINGLE(fcvtzu(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "fcvtzu v30.8h, v29.8h, #15"); - TEST_SINGLE(fcvtzu(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "fcvtzu v30.4s, v29.4s, #1"); + TEST_SINGLE(fcvtzu(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "fcvtzu v30.4s, v29.4s, #1"); TEST_SINGLE(fcvtzu(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "fcvtzu v30.4s, v29.4s, #31"); - TEST_SINGLE(fcvtzu(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "fcvtzu v30.2d, v29.2d, #1"); + TEST_SINGLE(fcvtzu(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "fcvtzu v30.2d, v29.2d, #1"); TEST_SINGLE(fcvtzu(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "fcvtzu v30.2d, v29.2d, #63"); - //TEST_SINGLE(fcvtzu(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "fcvtzu v30.8b, v29.8b, #1"); - //TEST_SINGLE(fcvtzu(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "fcvtzu v30.8b, v29.8b, #7"); - TEST_SINGLE(fcvtzu(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "fcvtzu v30.4h, v29.4h, #1"); + // TEST_SINGLE(fcvtzu(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "fcvtzu v30.8b, v29.8b, #1"); + // TEST_SINGLE(fcvtzu(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "fcvtzu v30.8b, v29.8b, #7"); + TEST_SINGLE(fcvtzu(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "fcvtzu v30.4h, v29.4h, #1"); TEST_SINGLE(fcvtzu(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "fcvtzu v30.4h, v29.4h, #15"); - TEST_SINGLE(fcvtzu(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "fcvtzu v30.2s, v29.2s, #1"); + TEST_SINGLE(fcvtzu(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "fcvtzu v30.2s, v29.2s, #1"); TEST_SINGLE(fcvtzu(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "fcvtzu v30.2s, v29.2s, #31"); - //TEST_SINGLE(fcvtzu(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "fcvtzu v30.1d, v29.1d, #1"); - //TEST_SINGLE(fcvtzu(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "fcvtzu v30.1d, v29.1d, #63"); + // TEST_SINGLE(fcvtzu(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "fcvtzu v30.1d, v29.1d, #1"); + // TEST_SINGLE(fcvtzu(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "fcvtzu v30.1d, v29.1d, #63"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x indexed element") { @@ -2658,8 +2664,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(smlal(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "smlal v30.4s, v29.4h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(smlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smlal v30.2d, v29.2s, v28.s[0]"); - //TEST_SINGLE(smlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smlal v30.2d, v29.2s, v28.s[3]"); + // TEST_SINGLE(smlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smlal v30.2d, v29.2s, v28.s[0]"); + // TEST_SINGLE(smlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smlal v30.2d, v29.2s, v28.s[3]"); TEST_SINGLE(smlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smlal v30.2d, v29.2s, v15.s[0]"); TEST_SINGLE(smlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "smlal v30.2d, v29.2s, v15.s[3]"); @@ -2668,8 +2674,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(smlal2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "smlal2 v30.4s, v29.8h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(smlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smlal2 v30.2d, v29.4s, v28.s[0]"); - //TEST_SINGLE(smlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smlal2 v30.2d, v29.4s, v28.s[3]"); + // TEST_SINGLE(smlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smlal2 v30.2d, v29.4s, v28.s[0]"); + // TEST_SINGLE(smlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smlal2 v30.2d, v29.4s, v28.s[3]"); TEST_SINGLE(smlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smlal2 v30.2d, v29.4s, v15.s[0]"); TEST_SINGLE(smlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "smlal2 v30.2d, v29.4s, v15.s[3]"); @@ -2678,8 +2684,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(sqdmlal(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqdmlal v30.4s, v29.4h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(sqdmlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmlal v30.2d, v29.2s, v28.s[0]"); - //TEST_SINGLE(sqdmlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmlal v30.2d, v29.2s, v28.s[3]"); + // TEST_SINGLE(sqdmlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmlal v30.2d, v29.2s, v28.s[0]"); + // TEST_SINGLE(sqdmlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmlal v30.2d, v29.2s, v28.s[3]"); TEST_SINGLE(sqdmlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmlal v30.2d, v29.2s, v15.s[0]"); TEST_SINGLE(sqdmlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "sqdmlal v30.2d, v29.2s, v15.s[3]"); @@ -2688,8 +2694,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(sqdmlal2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqdmlal2 v30.4s, v29.8h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(sqdmlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmlal2 v30.2d, v29.4s, v28.s[0]"); - //TEST_SINGLE(sqdmlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmlal2 v30.2d, v29.4s, v28.s[3]"); + // TEST_SINGLE(sqdmlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmlal2 v30.2d, v29.4s, v28.s[0]"); + // TEST_SINGLE(sqdmlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmlal2 v30.2d, v29.4s, v28.s[3]"); TEST_SINGLE(sqdmlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmlal2 v30.2d, v29.4s, v15.s[0]"); TEST_SINGLE(sqdmlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "sqdmlal2 v30.2d, v29.4s, v15.s[3]"); @@ -2698,8 +2704,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(smlsl(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "smlsl v30.4s, v29.4h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(smlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smlsl v30.2d, v29.2s, v28.s[0]"); - //TEST_SINGLE(smlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smlsl v30.2d, v29.2s, v28.s[3]"); + // TEST_SINGLE(smlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smlsl v30.2d, v29.2s, v28.s[0]"); + // TEST_SINGLE(smlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smlsl v30.2d, v29.2s, v28.s[3]"); TEST_SINGLE(smlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smlsl v30.2d, v29.2s, v15.s[0]"); TEST_SINGLE(smlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "smlsl v30.2d, v29.2s, v15.s[3]"); @@ -2708,8 +2714,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(smlsl2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "smlsl2 v30.4s, v29.8h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(smlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smlsl2 v30.2d, v29.4s, v28.s[0]"); - //TEST_SINGLE(smlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smlsl2 v30.2d, v29.4s, v28.s[3]"); + // TEST_SINGLE(smlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smlsl2 v30.2d, v29.4s, v28.s[0]"); + // TEST_SINGLE(smlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smlsl2 v30.2d, v29.4s, v28.s[3]"); TEST_SINGLE(smlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smlsl2 v30.2d, v29.4s, v15.s[0]"); TEST_SINGLE(smlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "smlsl2 v30.2d, v29.4s, v15.s[3]"); @@ -2718,8 +2724,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(sqdmlsl(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqdmlsl v30.4s, v29.4h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(sqdmlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmlsl v30.2d, v29.2s, v28.s[0]"); - //TEST_SINGLE(sqdmlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmlsl v30.2d, v29.2s, v28.s[3]"); + // TEST_SINGLE(sqdmlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmlsl v30.2d, v29.2s, v28.s[0]"); + // TEST_SINGLE(sqdmlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmlsl v30.2d, v29.2s, v28.s[3]"); TEST_SINGLE(sqdmlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmlsl v30.2d, v29.2s, v15.s[0]"); TEST_SINGLE(sqdmlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "sqdmlsl v30.2d, v29.2s, v15.s[3]"); @@ -2728,8 +2734,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(sqdmlsl2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqdmlsl2 v30.4s, v29.8h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(sqdmlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmlsl2 v30.2d, v29.4s, v28.s[0]"); - //TEST_SINGLE(sqdmlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmlsl2 v30.2d, v29.4s, v28.s[3]"); + // TEST_SINGLE(sqdmlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmlsl2 v30.2d, v29.4s, v28.s[0]"); + // TEST_SINGLE(sqdmlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmlsl2 v30.2d, v29.4s, v28.s[3]"); TEST_SINGLE(sqdmlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmlsl2 v30.2d, v29.4s, v15.s[0]"); TEST_SINGLE(sqdmlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "sqdmlsl2 v30.2d, v29.4s, v15.s[3]"); @@ -2738,8 +2744,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(mul(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 7), "mul v30.8h, v29.8h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(mul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "mul v30.4s, v29.4s, v28.s[0]"); - //TEST_SINGLE(mul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "mul v30.4s, v29.4s, v28.s[3]"); + // TEST_SINGLE(mul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "mul v30.4s, v29.4s, v28.s[0]"); + // TEST_SINGLE(mul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "mul v30.4s, v29.4s, v28.s[3]"); TEST_SINGLE(mul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "mul v30.4s, v29.4s, v15.s[0]"); TEST_SINGLE(mul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "mul v30.4s, v29.4s, v15.s[3]"); @@ -2748,8 +2754,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(mul(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 7), "mul v30.4h, v29.4h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(mul(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "mul v30.2s, v29.2s, v28.s[0]"); - //TEST_SINGLE(mul(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "mul v30.2s, v29.2s, v28.s[3]"); + // TEST_SINGLE(mul(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "mul v30.2s, v29.2s, v28.s[0]"); + // TEST_SINGLE(mul(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "mul v30.2s, v29.2s, v28.s[3]"); TEST_SINGLE(mul(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 0), "mul v30.2s, v29.2s, v15.s[0]"); TEST_SINGLE(mul(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 3), "mul v30.2s, v29.2s, v15.s[3]"); @@ -2758,8 +2764,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(smull(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "smull v30.4s, v29.4h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(smull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smull v30.2d, v29.2s, v28.s[0]"); - //TEST_SINGLE(smull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smull v30.2d, v29.2s, v28.s[3]"); + // TEST_SINGLE(smull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smull v30.2d, v29.2s, v28.s[0]"); + // TEST_SINGLE(smull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smull v30.2d, v29.2s, v28.s[3]"); TEST_SINGLE(smull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smull v30.2d, v29.2s, v15.s[0]"); TEST_SINGLE(smull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "smull v30.2d, v29.2s, v15.s[3]"); @@ -2768,8 +2774,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(smull2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "smull2 v30.4s, v29.8h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(smull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smull2 v30.2d, v29.4s, v28.s[0]"); - //TEST_SINGLE(smull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smull2 v30.2d, v29.4s, v28.s[3]"); + // TEST_SINGLE(smull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smull2 v30.2d, v29.4s, v28.s[0]"); + // TEST_SINGLE(smull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smull2 v30.2d, v29.4s, v28.s[3]"); TEST_SINGLE(smull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smull2 v30.2d, v29.4s, v15.s[0]"); TEST_SINGLE(smull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "smull2 v30.2d, v29.4s, v15.s[3]"); @@ -2778,8 +2784,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(sqdmull(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqdmull v30.4s, v29.4h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(sqdmull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmull v30.2d, v29.2s, v28.s[0]"); - //TEST_SINGLE(sqdmull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmull v30.2d, v29.2s, v28.s[3]"); + // TEST_SINGLE(sqdmull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmull v30.2d, v29.2s, v28.s[0]"); + // TEST_SINGLE(sqdmull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmull v30.2d, v29.2s, v28.s[3]"); TEST_SINGLE(sqdmull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmull v30.2d, v29.2s, v15.s[0]"); TEST_SINGLE(sqdmull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "sqdmull v30.2d, v29.2s, v15.s[3]"); @@ -2788,8 +2794,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(sqdmull2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqdmull2 v30.4s, v29.8h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(sqdmull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmull2 v30.2d, v29.4s, v28.s[0]"); - //TEST_SINGLE(sqdmull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmull2 v30.2d, v29.4s, v28.s[3]"); + // TEST_SINGLE(sqdmull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmull2 v30.2d, v29.4s, v28.s[0]"); + // TEST_SINGLE(sqdmull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmull2 v30.2d, v29.4s, v28.s[3]"); TEST_SINGLE(sqdmull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmull2 v30.2d, v29.4s, v15.s[0]"); TEST_SINGLE(sqdmull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "sqdmull2 v30.2d, v29.4s, v15.s[3]"); @@ -2798,8 +2804,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(sqdmulh(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 7), "sqdmulh v30.8h, v29.8h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "sqdmulh v30.4s, v29.4s, v28.s[0]"); - //TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "sqdmulh v30.4s, v29.4s, v28.s[3]"); + // TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "sqdmulh v30.4s, v29.4s, v28.s[0]"); + // TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "sqdmulh v30.4s, v29.4s, v28.s[3]"); TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "sqdmulh v30.4s, v29.4s, v15.s[0]"); TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "sqdmulh v30.4s, v29.4s, v15.s[3]"); @@ -2808,8 +2814,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(sqdmulh(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 7), "sqdmulh v30.4h, v29.4h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "sqdmulh v30.2s, v29.2s, v28.s[0]"); - //TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "sqdmulh v30.2s, v29.2s, v28.s[3]"); + // TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "sqdmulh v30.2s, v29.2s, v28.s[0]"); + // TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "sqdmulh v30.2s, v29.2s, v28.s[3]"); TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 0), "sqdmulh v30.2s, v29.2s, v15.s[0]"); TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 3), "sqdmulh v30.2s, v29.2s, v15.s[3]"); @@ -2818,8 +2824,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(sqrdmulh(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 7), "sqrdmulh v30.8h, v29.8h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "sqrdmulh v30.4s, v29.4s, v28.s[0]"); - //TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "sqrdmulh v30.4s, v29.4s, v28.s[3]"); + // TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "sqrdmulh v30.4s, v29.4s, v28.s[0]"); + // TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "sqrdmulh v30.4s, v29.4s, v28.s[3]"); TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "sqrdmulh v30.4s, v29.4s, v15.s[0]"); TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "sqrdmulh v30.4s, v29.4s, v15.s[3]"); @@ -2828,8 +2834,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(sqrdmulh(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 7), "sqrdmulh v30.4h, v29.4h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "sqrdmulh v30.2s, v29.2s, v28.s[0]"); - //TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "sqrdmulh v30.2s, v29.2s, v28.s[3]"); + // TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "sqrdmulh v30.2s, v29.2s, v28.s[0]"); + // TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "sqrdmulh v30.2s, v29.2s, v28.s[3]"); TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 0), "sqrdmulh v30.2s, v29.2s, v15.s[0]"); TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 3), "sqrdmulh v30.2s, v29.2s, v15.s[3]"); @@ -2877,17 +2883,17 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(sudot(DReg::d30, DReg::d29, DReg::d15, 3), "sudot v30.2s, v29.8b, v15.4b[3]"); // Unimplemented in vixl disassembler - //TEST_SINGLE(bfdot(QReg::q30, QReg::q29, QReg::q28, 0), "bfdot v30.4s, v29.8h, v28.2h[0]"); - //TEST_SINGLE(bfdot(QReg::q30, QReg::q29, QReg::q28, 3), "bfdot v30.4s, v29.8h, v28.2h[3]"); + // TEST_SINGLE(bfdot(QReg::q30, QReg::q29, QReg::q28, 0), "bfdot v30.4s, v29.8h, v28.2h[0]"); + // TEST_SINGLE(bfdot(QReg::q30, QReg::q29, QReg::q28, 3), "bfdot v30.4s, v29.8h, v28.2h[3]"); - //TEST_SINGLE(bfdot(QReg::q30, QReg::q29, QReg::q15, 0), "bfdot v30.4s, v29.8h, v15.2h[0]"); - //TEST_SINGLE(bfdot(QReg::q30, QReg::q29, QReg::q15, 3), "bfdot v30.4s, v29.8h, v15.2h[3]"); + // TEST_SINGLE(bfdot(QReg::q30, QReg::q29, QReg::q15, 0), "bfdot v30.4s, v29.8h, v15.2h[0]"); + // TEST_SINGLE(bfdot(QReg::q30, QReg::q29, QReg::q15, 3), "bfdot v30.4s, v29.8h, v15.2h[3]"); - //TEST_SINGLE(bfdot(DReg::d30, DReg::d29, DReg::d28, 0), "bfdot v30.2s, v29.4h, v28.2h[0]"); - //TEST_SINGLE(bfdot(DReg::d30, DReg::d29, DReg::d28, 3), "bfdot v30.2s, v29.4h, v28.2h[3]"); + // TEST_SINGLE(bfdot(DReg::d30, DReg::d29, DReg::d28, 0), "bfdot v30.2s, v29.4h, v28.2h[0]"); + // TEST_SINGLE(bfdot(DReg::d30, DReg::d29, DReg::d28, 3), "bfdot v30.2s, v29.4h, v28.2h[3]"); - //TEST_SINGLE(bfdot(DReg::d30, DReg::d29, DReg::d15, 0), "bfdot v30.2s, v29.4h, v15.2h[0]"); - //TEST_SINGLE(bfdot(DReg::d30, DReg::d29, DReg::d15, 3), "bfdot v30.2s, v29.4h, v15.2h[3]"); + // TEST_SINGLE(bfdot(DReg::d30, DReg::d29, DReg::d15, 0), "bfdot v30.2s, v29.4h, v15.2h[0]"); + // TEST_SINGLE(bfdot(DReg::d30, DReg::d29, DReg::d15, 3), "bfdot v30.2s, v29.4h, v15.2h[3]"); TEST_SINGLE(fmla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "fmla v30.4s, v29.4s, v15.s[0]"); TEST_SINGLE(fmla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "fmla v30.4s, v29.4s, v15.s[3]"); @@ -2898,8 +2904,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(fmla(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q15, 0), "fmla v30.2d, v29.2d, v15.d[0]"); TEST_SINGLE(fmla(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q15, 1), "fmla v30.2d, v29.2d, v15.d[1]"); - //TEST_SINGLE(fmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 0), "fmla v30.1d, v29.1d, v15.d[0]"); - //TEST_SINGLE(fmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 1), "fmla v30.1d, v29.1d, v15.d[1]"); + // TEST_SINGLE(fmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 0), "fmla v30.1d, v29.1d, v15.d[0]"); + // TEST_SINGLE(fmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 1), "fmla v30.1d, v29.1d, v15.d[1]"); TEST_SINGLE(fmls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "fmls v30.4s, v29.4s, v15.s[0]"); TEST_SINGLE(fmls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "fmls v30.4s, v29.4s, v15.s[3]"); @@ -2910,8 +2916,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(fmls(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q15, 0), "fmls v30.2d, v29.2d, v15.d[0]"); TEST_SINGLE(fmls(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q15, 1), "fmls v30.2d, v29.2d, v15.d[1]"); - //TEST_SINGLE(fmls(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 0), "fmls v30.1d, v29.1d, v15.d[0]"); - //TEST_SINGLE(fmls(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 1), "fmls v30.1d, v29.1d, v15.d[1]"); + // TEST_SINGLE(fmls(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 0), "fmls v30.1d, v29.1d, v15.d[0]"); + // TEST_SINGLE(fmls(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 1), "fmls v30.1d, v29.1d, v15.d[1]"); TEST_SINGLE(fmul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "fmul v30.4s, v29.4s, v15.s[0]"); TEST_SINGLE(fmul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "fmul v30.4s, v29.4s, v15.s[3]"); @@ -2922,8 +2928,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(fmul(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q15, 0), "fmul v30.2d, v29.2d, v15.d[0]"); TEST_SINGLE(fmul(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q15, 1), "fmul v30.2d, v29.2d, v15.d[1]"); - //TEST_SINGLE(fmul(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 0), "fmul v30.1d, v29.1d, v15.d[0]"); - //TEST_SINGLE(fmul(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 1), "fmul v30.1d, v29.1d, v15.d[1]"); + // TEST_SINGLE(fmul(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 0), "fmul v30.1d, v29.1d, v15.d[0]"); + // TEST_SINGLE(fmul(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 1), "fmul v30.1d, v29.1d, v15.d[1]"); TEST_SINGLE(fmlal(QReg::q30, QReg::q29, QReg::q15, 0), "fmlal v30.4s, v29.4h, v15.h[0]"); TEST_SINGLE(fmlal(QReg::q30, QReg::q29, QReg::q15, 7), "fmlal v30.4s, v29.4h, v15.h[7]"); @@ -2962,18 +2968,18 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(usdot(DReg::d30, DReg::d29, DReg::d15, 3), "usdot v30.2s, v29.8b, v15.4b[3]"); // Unimplemented in vixl disassembler - //TEST_SINGLE(bfmlalb(VReg::v30, VReg::v29, VReg::v15, 0), "bfmlalb v30.4s, v29.8h, v15.h[0]"); - //TEST_SINGLE(bfmlalb(VReg::v30, VReg::v29, VReg::v15, 7), "bfmlalb v30.4s, v29.8h, v15.h[7]"); + // TEST_SINGLE(bfmlalb(VReg::v30, VReg::v29, VReg::v15, 0), "bfmlalb v30.4s, v29.8h, v15.h[0]"); + // TEST_SINGLE(bfmlalb(VReg::v30, VReg::v29, VReg::v15, 7), "bfmlalb v30.4s, v29.8h, v15.h[7]"); - //TEST_SINGLE(bfmlalt(VReg::v30, VReg::v29, VReg::v15, 0), "bfmlalt v30.4s, v29.8h, v15.h[0]"); - //TEST_SINGLE(bfmlalt(VReg::v30, VReg::v29, VReg::v15, 7), "bfmlalt v30.4s, v29.8h, v15.h[7]"); + // TEST_SINGLE(bfmlalt(VReg::v30, VReg::v29, VReg::v15, 0), "bfmlalt v30.4s, v29.8h, v15.h[0]"); + // TEST_SINGLE(bfmlalt(VReg::v30, VReg::v29, VReg::v15, 7), "bfmlalt v30.4s, v29.8h, v15.h[7]"); TEST_SINGLE(mla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 0), "mla v30.8h, v29.8h, v15.h[0]"); TEST_SINGLE(mla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 7), "mla v30.8h, v29.8h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(mla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "mla v30.4s, v29.4s, v28.s[0]"); - //TEST_SINGLE(mla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "mla v30.4s, v29.4s, v28.s[3]"); + // TEST_SINGLE(mla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "mla v30.4s, v29.4s, v28.s[0]"); + // TEST_SINGLE(mla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "mla v30.4s, v29.4s, v28.s[3]"); TEST_SINGLE(mla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "mla v30.4s, v29.4s, v15.s[0]"); TEST_SINGLE(mla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "mla v30.4s, v29.4s, v15.s[3]"); @@ -2982,8 +2988,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(mla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 7), "mla v30.4h, v29.4h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(mla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "mla v30.2s, v29.2s, v28.s[0]"); - //TEST_SINGLE(mla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "mla v30.2s, v29.2s, v28.s[3]"); + // TEST_SINGLE(mla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "mla v30.2s, v29.2s, v28.s[0]"); + // TEST_SINGLE(mla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "mla v30.2s, v29.2s, v28.s[3]"); TEST_SINGLE(mla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 0), "mla v30.2s, v29.2s, v15.s[0]"); TEST_SINGLE(mla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 3), "mla v30.2s, v29.2s, v15.s[3]"); @@ -2992,8 +2998,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(umlal(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "umlal v30.4s, v29.4h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(umlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umlal v30.2d, v29.2s, v28.s[0]"); - //TEST_SINGLE(umlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umlal v30.2d, v29.2s, v28.s[3]"); + // TEST_SINGLE(umlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umlal v30.2d, v29.2s, v28.s[0]"); + // TEST_SINGLE(umlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umlal v30.2d, v29.2s, v28.s[3]"); TEST_SINGLE(umlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umlal v30.2d, v29.2s, v15.s[0]"); TEST_SINGLE(umlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "umlal v30.2d, v29.2s, v15.s[3]"); @@ -3002,8 +3008,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(umlal2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "umlal2 v30.4s, v29.8h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(umlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umlal2 v30.2d, v29.4s, v28.s[0]"); - //TEST_SINGLE(umlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umlal2 v30.2d, v29.4s, v28.s[3]"); + // TEST_SINGLE(umlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umlal2 v30.2d, v29.4s, v28.s[0]"); + // TEST_SINGLE(umlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umlal2 v30.2d, v29.4s, v28.s[3]"); TEST_SINGLE(umlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umlal2 v30.2d, v29.4s, v15.s[0]"); TEST_SINGLE(umlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "umlal2 v30.2d, v29.4s, v15.s[3]"); @@ -3012,8 +3018,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(mls(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 7), "mls v30.8h, v29.8h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(mls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "mls v30.4s, v29.4s, v28.s[0]"); - //TEST_SINGLE(mls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "mls v30.4s, v29.4s, v28.s[3]"); + // TEST_SINGLE(mls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "mls v30.4s, v29.4s, v28.s[0]"); + // TEST_SINGLE(mls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "mls v30.4s, v29.4s, v28.s[3]"); TEST_SINGLE(mls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "mls v30.4s, v29.4s, v15.s[0]"); TEST_SINGLE(mls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "mls v30.4s, v29.4s, v15.s[3]"); @@ -3022,8 +3028,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(mls(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 7), "mls v30.4h, v29.4h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(mls(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "mls v30.2s, v29.2s, v28.s[0]"); - //TEST_SINGLE(mls(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "mls v30.2s, v29.2s, v28.s[3]"); + // TEST_SINGLE(mls(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "mls v30.2s, v29.2s, v28.s[0]"); + // TEST_SINGLE(mls(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "mls v30.2s, v29.2s, v28.s[3]"); TEST_SINGLE(mls(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 0), "mls v30.2s, v29.2s, v15.s[0]"); TEST_SINGLE(mls(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 3), "mls v30.2s, v29.2s, v15.s[3]"); @@ -3032,8 +3038,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(umlsl(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "umlsl v30.4s, v29.4h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(umlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umlsl v30.2d, v29.2s, v28.s[0]"); - //TEST_SINGLE(umlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umlsl v30.2d, v29.2s, v28.s[3]"); + // TEST_SINGLE(umlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umlsl v30.2d, v29.2s, v28.s[0]"); + // TEST_SINGLE(umlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umlsl v30.2d, v29.2s, v28.s[3]"); TEST_SINGLE(umlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umlsl v30.2d, v29.2s, v15.s[0]"); TEST_SINGLE(umlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "umlsl v30.2d, v29.2s, v15.s[3]"); @@ -3042,8 +3048,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(umlsl2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "umlsl2 v30.4s, v29.8h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(umlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umlsl2 v30.2d, v29.4s, v28.s[0]"); - //TEST_SINGLE(umlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umlsl2 v30.2d, v29.4s, v28.s[3]"); + // TEST_SINGLE(umlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umlsl2 v30.2d, v29.4s, v28.s[0]"); + // TEST_SINGLE(umlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umlsl2 v30.2d, v29.4s, v28.s[3]"); TEST_SINGLE(umlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umlsl2 v30.2d, v29.4s, v15.s[0]"); TEST_SINGLE(umlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "umlsl2 v30.2d, v29.4s, v15.s[3]"); @@ -3052,8 +3058,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(umull(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "umull v30.4s, v29.4h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(umull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umull v30.2d, v29.2s, v28.s[0]"); - //TEST_SINGLE(umull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umull v30.2d, v29.2s, v28.s[3]"); + // TEST_SINGLE(umull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umull v30.2d, v29.2s, v28.s[0]"); + // TEST_SINGLE(umull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umull v30.2d, v29.2s, v28.s[3]"); TEST_SINGLE(umull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umull v30.2d, v29.2s, v15.s[0]"); TEST_SINGLE(umull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "umull v30.2d, v29.2s, v15.s[3]"); @@ -3062,8 +3068,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(umull2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "umull2 v30.4s, v29.8h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(umull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umull2 v30.2d, v29.4s, v28.s[0]"); - //TEST_SINGLE(umull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umull2 v30.2d, v29.4s, v28.s[3]"); + // TEST_SINGLE(umull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umull2 v30.2d, v29.4s, v28.s[0]"); + // TEST_SINGLE(umull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umull2 v30.2d, v29.4s, v28.s[3]"); TEST_SINGLE(umull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umull2 v30.2d, v29.4s, v15.s[0]"); TEST_SINGLE(umull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "umull2 v30.2d, v29.4s, v15.s[3]"); @@ -3072,8 +3078,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(sqrdmlah(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 7), "sqrdmlah v30.8h, v29.8h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "sqrdmlah v30.4s, v29.4s, v28.s[0]"); - //TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "sqrdmlah v30.4s, v29.4s, v28.s[3]"); + // TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "sqrdmlah v30.4s, v29.4s, v28.s[0]"); + // TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "sqrdmlah v30.4s, v29.4s, v28.s[3]"); TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "sqrdmlah v30.4s, v29.4s, v15.s[0]"); TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "sqrdmlah v30.4s, v29.4s, v15.s[3]"); @@ -3082,8 +3088,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(sqrdmlah(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 7), "sqrdmlah v30.4h, v29.4h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "sqrdmlah v30.2s, v29.2s, v28.s[0]"); - //TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "sqrdmlah v30.2s, v29.2s, v28.s[3]"); + // TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "sqrdmlah v30.2s, v29.2s, v28.s[0]"); + // TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "sqrdmlah v30.2s, v29.2s, v28.s[3]"); TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 0), "sqrdmlah v30.2s, v29.2s, v15.s[0]"); TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 3), "sqrdmlah v30.2s, v29.2s, v15.s[3]"); @@ -3104,8 +3110,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(sqrdmlsh(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 7), "sqrdmlsh v30.8h, v29.8h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "sqrdmlsh v30.4s, v29.4s, v28.s[0]"); - //TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "sqrdmlsh v30.4s, v29.4s, v28.s[3]"); + // TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "sqrdmlsh v30.4s, v29.4s, v28.s[0]"); + // TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "sqrdmlsh v30.4s, v29.4s, v28.s[3]"); TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "sqrdmlsh v30.4s, v29.4s, v15.s[0]"); TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "sqrdmlsh v30.4s, v29.4s, v15.s[3]"); @@ -3114,8 +3120,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x index TEST_SINGLE(sqrdmlsh(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 7), "sqrdmlsh v30.4h, v29.4h, v15.h[7]"); // vixl has a disassembler bug where it doesn't decode rm correctly for registers >= 16 - //TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "sqrdmlsh v30.2s, v29.2s, v28.s[0]"); - //TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "sqrdmlsh v30.2s, v29.2s, v28.s[3]"); + // TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "sqrdmlsh v30.2s, v29.2s, v28.s[0]"); + // TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "sqrdmlsh v30.2s, v29.2s, v28.s[3]"); TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 0), "sqrdmlsh v30.2s, v29.2s, v15.s[0]"); TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 3), "sqrdmlsh v30.2s, v29.2s, v15.s[3]"); @@ -3133,60 +3139,60 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Cryptographic two-register S // TODO: Implement in emitter. } TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Conversion between floating-point and fixed-point") { - TEST_SINGLE(scvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "scvtf h29, w30, #1"); + TEST_SINGLE(scvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "scvtf h29, w30, #1"); TEST_SINGLE(scvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i32Bit, Reg::r30, 32), "scvtf h29, w30, #32"); - TEST_SINGLE(scvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "scvtf s29, w30, #1"); + TEST_SINGLE(scvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "scvtf s29, w30, #1"); TEST_SINGLE(scvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i32Bit, Reg::r30, 32), "scvtf s29, w30, #32"); - TEST_SINGLE(scvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "scvtf d29, w30, #1"); + TEST_SINGLE(scvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "scvtf d29, w30, #1"); TEST_SINGLE(scvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i32Bit, Reg::r30, 32), "scvtf d29, w30, #32"); - TEST_SINGLE(scvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "scvtf h29, x30, #1"); + TEST_SINGLE(scvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "scvtf h29, x30, #1"); TEST_SINGLE(scvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i64Bit, Reg::r30, 64), "scvtf h29, x30, #64"); - TEST_SINGLE(scvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "scvtf s29, x30, #1"); + TEST_SINGLE(scvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "scvtf s29, x30, #1"); TEST_SINGLE(scvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i64Bit, Reg::r30, 64), "scvtf s29, x30, #64"); - TEST_SINGLE(scvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "scvtf d29, x30, #1"); + TEST_SINGLE(scvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "scvtf d29, x30, #1"); TEST_SINGLE(scvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i64Bit, Reg::r30, 64), "scvtf d29, x30, #64"); - TEST_SINGLE(ucvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "ucvtf h29, w30, #1"); + TEST_SINGLE(ucvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "ucvtf h29, w30, #1"); TEST_SINGLE(ucvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i32Bit, Reg::r30, 32), "ucvtf h29, w30, #32"); - TEST_SINGLE(ucvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "ucvtf s29, w30, #1"); + TEST_SINGLE(ucvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "ucvtf s29, w30, #1"); TEST_SINGLE(ucvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i32Bit, Reg::r30, 32), "ucvtf s29, w30, #32"); - TEST_SINGLE(ucvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "ucvtf d29, w30, #1"); + TEST_SINGLE(ucvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "ucvtf d29, w30, #1"); TEST_SINGLE(ucvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i32Bit, Reg::r30, 32), "ucvtf d29, w30, #32"); - TEST_SINGLE(ucvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "ucvtf h29, x30, #1"); + TEST_SINGLE(ucvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "ucvtf h29, x30, #1"); TEST_SINGLE(ucvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i64Bit, Reg::r30, 64), "ucvtf h29, x30, #64"); - TEST_SINGLE(ucvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "ucvtf s29, x30, #1"); + TEST_SINGLE(ucvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "ucvtf s29, x30, #1"); TEST_SINGLE(ucvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i64Bit, Reg::r30, 64), "ucvtf s29, x30, #64"); - TEST_SINGLE(ucvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "ucvtf d29, x30, #1"); + TEST_SINGLE(ucvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "ucvtf d29, x30, #1"); TEST_SINGLE(ucvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i64Bit, Reg::r30, 64), "ucvtf d29, x30, #64"); - TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 1), "fcvtzs w30, h29, #1"); + TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 1), "fcvtzs w30, h29, #1"); TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 32), "fcvtzs w30, h29, #32"); - TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 1), "fcvtzs w30, s29, #1"); + TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 1), "fcvtzs w30, s29, #1"); TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 32), "fcvtzs w30, s29, #32"); - TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 1), "fcvtzs w30, d29, #1"); + TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 1), "fcvtzs w30, d29, #1"); TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 32), "fcvtzs w30, d29, #32"); - TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 1), "fcvtzs x30, h29, #1"); + TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 1), "fcvtzs x30, h29, #1"); TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 64), "fcvtzs x30, h29, #64"); - TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 1), "fcvtzs x30, s29, #1"); + TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 1), "fcvtzs x30, s29, #1"); TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 64), "fcvtzs x30, s29, #64"); - TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 1), "fcvtzs x30, d29, #1"); + TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 1), "fcvtzs x30, d29, #1"); TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 64), "fcvtzs x30, d29, #64"); - TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 1), "fcvtzu w30, h29, #1"); + TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 1), "fcvtzu w30, h29, #1"); TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 32), "fcvtzu w30, h29, #32"); - TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 1), "fcvtzu w30, s29, #1"); + TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 1), "fcvtzu w30, s29, #1"); TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 32), "fcvtzu w30, s29, #32"); - TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 1), "fcvtzu w30, d29, #1"); + TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 1), "fcvtzu w30, d29, #1"); TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 32), "fcvtzu w30, d29, #32"); - TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 1), "fcvtzu x30, h29, #1"); + TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 1), "fcvtzu x30, h29, #1"); TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 64), "fcvtzu x30, h29, #64"); - TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 1), "fcvtzu x30, s29, #1"); + TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 1), "fcvtzu x30, s29, #1"); TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 64), "fcvtzu x30, s29, #64"); - TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 1), "fcvtzu x30, d29, #1"); + TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 1), "fcvtzu x30, d29, #1"); TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 64), "fcvtzu x30, d29, #64"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Conversion between floating-point and integer") { @@ -3235,27 +3241,27 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Conversion between floating- TEST_SINGLE(fmov(Size::i32Bit, Reg::r29, HReg::h30), "fmov w29, h30"); TEST_SINGLE(fmov(Size::i64Bit, Reg::r29, HReg::h30), "fmov x29, h30"); TEST_SINGLE(fmov(Size::i32Bit, Reg::r29, SReg::s30), "fmov w29, s30"); - //TEST_SINGLE(fmov(Size::i64Bit, Reg::r29, SReg::s30), "fmov x29, s30"); - //TEST_SINGLE(fmov(Size::i32Bit, Reg::r29, DReg::d30), "fmov w29, d30"); + // TEST_SINGLE(fmov(Size::i64Bit, Reg::r29, SReg::s30), "fmov x29, s30"); + // TEST_SINGLE(fmov(Size::i32Bit, Reg::r29, DReg::d30), "fmov w29, d30"); TEST_SINGLE(fmov(Size::i64Bit, Reg::r29, DReg::d30), "fmov x29, d30"); - //TEST_SINGLE(fmov(Size::i32Bit, Reg::r29, VReg::v30, false), "fmov w29, s30"); + // TEST_SINGLE(fmov(Size::i32Bit, Reg::r29, VReg::v30, false), "fmov w29, s30"); TEST_SINGLE(fmov(Size::i64Bit, Reg::r29, VReg::v30, false), "fmov x29, d30"); - //TEST_SINGLE(fmov(Size::i32Bit, Reg::r29, VReg::v30, true), "fmov w29, s30"); + // TEST_SINGLE(fmov(Size::i32Bit, Reg::r29, VReg::v30, true), "fmov w29, s30"); TEST_SINGLE(fmov(Size::i64Bit, Reg::r29, VReg::v30, true), "fmov x29, v30.D[1]"); TEST_SINGLE(fmov(Size::i32Bit, HReg::h30, Reg::r29), "fmov h30, w29"); TEST_SINGLE(fmov(Size::i64Bit, HReg::h30, Reg::r29), "fmov h30, x29"); TEST_SINGLE(fmov(Size::i32Bit, SReg::s30, Reg::r29), "fmov s30, w29"); - //TEST_SINGLE(fmov(Size::i64Bit, SReg::s30, Reg::r29), "fmov s30, x29"); - //TEST_SINGLE(fmov(Size::i32Bit, DReg::d30, Reg::r29), "fmov d30, w29"); + // TEST_SINGLE(fmov(Size::i64Bit, SReg::s30, Reg::r29), "fmov s30, x29"); + // TEST_SINGLE(fmov(Size::i32Bit, DReg::d30, Reg::r29), "fmov d30, w29"); TEST_SINGLE(fmov(Size::i64Bit, DReg::d30, Reg::r29), "fmov d30, x29"); - //TEST_SINGLE(fmov(Size::i32Bit, VReg::v30, Reg::r29, false), "fmov s30, w29"); + // TEST_SINGLE(fmov(Size::i32Bit, VReg::v30, Reg::r29, false), "fmov s30, w29"); TEST_SINGLE(fmov(Size::i64Bit, VReg::v30, Reg::r29, false), "fmov d30, x29"); - //TEST_SINGLE(fmov(Size::i32Bit, VReg::v30, Reg::r29, true), "fmov d30, x29"); + // TEST_SINGLE(fmov(Size::i32Bit, VReg::v30, Reg::r29, true), "fmov d30, x29"); TEST_SINGLE(fmov(Size::i64Bit, VReg::v30, Reg::r29, true), "fmov v30.D[1], x29"); TEST_SINGLE(fcvtps(Size::i32Bit, Reg::r29, HReg::h30), "fcvtps w29, h30"); @@ -3300,5 +3306,3 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Conversion between floating- TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r29, DReg::d30), "fcvtzu w29, d30"); TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r29, DReg::d30), "fcvtzu x29, d30"); } - - diff --git a/FEXCore/unittests/Emitter/Loadstore_Tests.cpp b/FEXCore/unittests/Emitter/Loadstore_Tests.cpp index 6af98eb7c8..b82994fcc2 100644 --- a/FEXCore/unittests/Emitter/Loadstore_Tests.cpp +++ b/FEXCore/unittests/Emitter/Loadstore_Tests.cpp @@ -31,8 +31,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(ld1(QReg::q26, Reg::r30), "ld1 {v26.2d}, [x30]"); TEST_SINGLE(ld1(DReg::d26, Reg::r30), "ld1 {v26.1d}, [x30]"); - TEST_SINGLE(ld1(QReg::q31, QReg::q0, Reg::r30), "ld1 {v31.16b, v0.16b}, [x30]"); - TEST_SINGLE(ld1(DReg::d31, DReg::d0, Reg::r30), "ld1 {v31.8b, v0.8b}, [x30]"); + TEST_SINGLE(ld1(QReg::q31, QReg::q0, Reg::r30), "ld1 {v31.16b, v0.16b}, [x30]"); + TEST_SINGLE(ld1(DReg::d31, DReg::d0, Reg::r30), "ld1 {v31.8b, v0.8b}, [x30]"); TEST_SINGLE(ld1(QReg::q26, QReg::q27, Reg::r30), "ld1 {v26.16b, v27.16b}, [x30]"); TEST_SINGLE(ld1(DReg::d26, DReg::d27, Reg::r30), "ld1 {v26.8b, v27.8b}, [x30]"); @@ -45,8 +45,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(ld1(QReg::q26, QReg::q27, Reg::r30), "ld1 {v26.2d, v27.2d}, [x30]"); TEST_SINGLE(ld1(DReg::d26, DReg::d27, Reg::r30), "ld1 {v26.1d, v27.1d}, [x30]"); - TEST_SINGLE(ld1(QReg::q31, QReg::q0, QReg::q1, Reg::r30), "ld1 {v31.16b, v0.16b, v1.16b}, [x30]"); - TEST_SINGLE(ld1(DReg::d31, DReg::d0, DReg::d1, Reg::r30), "ld1 {v31.8b, v0.8b, v1.8b}, [x30]"); + TEST_SINGLE(ld1(QReg::q31, QReg::q0, QReg::q1, Reg::r30), "ld1 {v31.16b, v0.16b, v1.16b}, [x30]"); + TEST_SINGLE(ld1(DReg::d31, DReg::d0, DReg::d1, Reg::r30), "ld1 {v31.8b, v0.8b, v1.8b}, [x30]"); TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld1 {v26.16b, v27.16b, v28.16b}, [x30]"); TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld1 {v26.8b, v27.8b, v28.8b}, [x30]"); @@ -59,9 +59,10 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld1 {v26.2d, v27.2d, v28.2d}, [x30]"); TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld1 {v26.1d, v27.1d, v28.1d}, [x30]"); - TEST_SINGLE(ld1(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30), "ld1 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30]"); - TEST_SINGLE(ld1(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30), "ld1 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30]"); - TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld1 {v26.16b, v27.16b, v28.16b, v29.16b}, [x30]"); + TEST_SINGLE(ld1(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30), "ld1 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30]"); + TEST_SINGLE(ld1(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30), "ld1 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30]"); + TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld1 {v26.16b, v27.16b, v28.16b, v29.16b}, " + "[x30]"); TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld1 {v26.8b, v27.8b, v28.8b, v29.8b}, [x30]"); TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld1 {v26.8h, v27.8h, v28.8h, v29.8h}, [x30]"); @@ -85,8 +86,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(st1(QReg::q26, Reg::r30), "st1 {v26.2d}, [x30]"); TEST_SINGLE(st1(DReg::d26, Reg::r30), "st1 {v26.1d}, [x30]"); - TEST_SINGLE(st1(QReg::q31, QReg::q0, Reg::r30), "st1 {v31.16b, v0.16b}, [x30]"); - TEST_SINGLE(st1(DReg::d31, DReg::d0, Reg::r30), "st1 {v31.8b, v0.8b}, [x30]"); + TEST_SINGLE(st1(QReg::q31, QReg::q0, Reg::r30), "st1 {v31.16b, v0.16b}, [x30]"); + TEST_SINGLE(st1(DReg::d31, DReg::d0, Reg::r30), "st1 {v31.8b, v0.8b}, [x30]"); TEST_SINGLE(st1(QReg::q26, QReg::q27, Reg::r30), "st1 {v26.16b, v27.16b}, [x30]"); TEST_SINGLE(st1(DReg::d26, DReg::d27, Reg::r30), "st1 {v26.8b, v27.8b}, [x30]"); @@ -99,8 +100,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(st1(QReg::q26, QReg::q27, Reg::r30), "st1 {v26.2d, v27.2d}, [x30]"); TEST_SINGLE(st1(DReg::d26, DReg::d27, Reg::r30), "st1 {v26.1d, v27.1d}, [x30]"); - TEST_SINGLE(st1(QReg::q31, QReg::q0, QReg::q1, Reg::r30), "st1 {v31.16b, v0.16b, v1.16b}, [x30]"); - TEST_SINGLE(st1(DReg::d31, DReg::d0, DReg::d1, Reg::r30), "st1 {v31.8b, v0.8b, v1.8b}, [x30]"); + TEST_SINGLE(st1(QReg::q31, QReg::q0, QReg::q1, Reg::r30), "st1 {v31.16b, v0.16b, v1.16b}, [x30]"); + TEST_SINGLE(st1(DReg::d31, DReg::d0, DReg::d1, Reg::r30), "st1 {v31.8b, v0.8b, v1.8b}, [x30]"); TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "st1 {v26.16b, v27.16b, v28.16b}, [x30]"); TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "st1 {v26.8b, v27.8b, v28.8b}, [x30]"); @@ -113,9 +114,10 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "st1 {v26.2d, v27.2d, v28.2d}, [x30]"); TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "st1 {v26.1d, v27.1d, v28.1d}, [x30]"); - TEST_SINGLE(st1(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30), "st1 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30]"); - TEST_SINGLE(st1(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30), "st1 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30]"); - TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "st1 {v26.16b, v27.16b, v28.16b, v29.16b}, [x30]"); + TEST_SINGLE(st1(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30), "st1 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30]"); + TEST_SINGLE(st1(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30), "st1 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30]"); + TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "st1 {v26.16b, v27.16b, v28.16b, v29.16b}, " + "[x30]"); TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "st1 {v26.8b, v27.8b, v28.8b, v29.8b}, [x30]"); TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "st1 {v26.8h, v27.8h, v28.8h, v29.8h}, [x30]"); @@ -127,8 +129,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "st1 {v26.2d, v27.2d, v28.2d, v29.2d}, [x30]"); TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "st1 {v26.1d, v27.1d, v28.1d, v29.1d}, [x30]"); - TEST_SINGLE(ld2(QReg::q31, QReg::q0, Reg::r30), "ld2 {v31.16b, v0.16b}, [x30]"); - TEST_SINGLE(ld2(DReg::d31, DReg::d0, Reg::r30), "ld2 {v31.8b, v0.8b}, [x30]"); + TEST_SINGLE(ld2(QReg::q31, QReg::q0, Reg::r30), "ld2 {v31.16b, v0.16b}, [x30]"); + TEST_SINGLE(ld2(DReg::d31, DReg::d0, Reg::r30), "ld2 {v31.8b, v0.8b}, [x30]"); TEST_SINGLE(ld2(QReg::q26, QReg::q27, Reg::r30), "ld2 {v26.16b, v27.16b}, [x30]"); TEST_SINGLE(ld2(DReg::d26, DReg::d27, Reg::r30), "ld2 {v26.8b, v27.8b}, [x30]"); @@ -141,8 +143,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(ld2(QReg::q26, QReg::q27, Reg::r30), "ld2 {v26.2d, v27.2d}, [x30]"); TEST_SINGLE(ld2(DReg::d26, DReg::d27, Reg::r30), "unallocated (NEONLoadStoreMultiStruct)"); - TEST_SINGLE(st2(QReg::q31, QReg::q0, Reg::r30), "st2 {v31.16b, v0.16b}, [x30]"); - TEST_SINGLE(st2(DReg::d31, DReg::d0, Reg::r30), "st2 {v31.8b, v0.8b}, [x30]"); + TEST_SINGLE(st2(QReg::q31, QReg::q0, Reg::r30), "st2 {v31.16b, v0.16b}, [x30]"); + TEST_SINGLE(st2(DReg::d31, DReg::d0, Reg::r30), "st2 {v31.8b, v0.8b}, [x30]"); TEST_SINGLE(st2(QReg::q26, QReg::q27, Reg::r30), "st2 {v26.16b, v27.16b}, [x30]"); TEST_SINGLE(st2(DReg::d26, DReg::d27, Reg::r30), "st2 {v26.8b, v27.8b}, [x30]"); @@ -155,8 +157,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(st2(QReg::q26, QReg::q27, Reg::r30), "st2 {v26.2d, v27.2d}, [x30]"); TEST_SINGLE(st2(DReg::d26, DReg::d27, Reg::r30), "unallocated (NEONLoadStoreMultiStruct)"); - TEST_SINGLE(ld3(QReg::q31, QReg::q0, QReg::q1, Reg::r30), "ld3 {v31.16b, v0.16b, v1.16b}, [x30]"); - TEST_SINGLE(ld3(DReg::d31, DReg::d0, DReg::d1, Reg::r30), "ld3 {v31.8b, v0.8b, v1.8b}, [x30]"); + TEST_SINGLE(ld3(QReg::q31, QReg::q0, QReg::q1, Reg::r30), "ld3 {v31.16b, v0.16b, v1.16b}, [x30]"); + TEST_SINGLE(ld3(DReg::d31, DReg::d0, DReg::d1, Reg::r30), "ld3 {v31.8b, v0.8b, v1.8b}, [x30]"); TEST_SINGLE(ld3(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld3 {v26.16b, v27.16b, v28.16b}, [x30]"); TEST_SINGLE(ld3(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld3 {v26.8b, v27.8b, v28.8b}, [x30]"); @@ -169,8 +171,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(ld3(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld3 {v26.2d, v27.2d, v28.2d}, [x30]"); TEST_SINGLE(ld3(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "unallocated (NEONLoadStoreMultiStruct)"); - TEST_SINGLE(st3(QReg::q31, QReg::q0, QReg::q1, Reg::r30), "st3 {v31.16b, v0.16b, v1.16b}, [x30]"); - TEST_SINGLE(st3(DReg::d31, DReg::d0, DReg::d1, Reg::r30), "st3 {v31.8b, v0.8b, v1.8b}, [x30]"); + TEST_SINGLE(st3(QReg::q31, QReg::q0, QReg::q1, Reg::r30), "st3 {v31.16b, v0.16b, v1.16b}, [x30]"); + TEST_SINGLE(st3(DReg::d31, DReg::d0, DReg::d1, Reg::r30), "st3 {v31.8b, v0.8b, v1.8b}, [x30]"); TEST_SINGLE(st3(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "st3 {v26.16b, v27.16b, v28.16b}, [x30]"); TEST_SINGLE(st3(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "st3 {v26.8b, v27.8b, v28.8b}, [x30]"); @@ -183,9 +185,10 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(st3(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "st3 {v26.2d, v27.2d, v28.2d}, [x30]"); TEST_SINGLE(st3(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "unallocated (NEONLoadStoreMultiStruct)"); - TEST_SINGLE(ld4(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30), "ld4 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30]"); - TEST_SINGLE(ld4(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30), "ld4 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30]"); - TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4 {v26.16b, v27.16b, v28.16b, v29.16b}, [x30]"); + TEST_SINGLE(ld4(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30), "ld4 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30]"); + TEST_SINGLE(ld4(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30), "ld4 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30]"); + TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4 {v26.16b, v27.16b, v28.16b, v29.16b}, " + "[x30]"); TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld4 {v26.8b, v27.8b, v28.8b, v29.8b}, [x30]"); TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4 {v26.8h, v27.8h, v28.8h, v29.8h}, [x30]"); @@ -197,9 +200,10 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4 {v26.2d, v27.2d, v28.2d, v29.2d}, [x30]"); TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "unallocated (NEONLoadStoreMultiStruct)"); - TEST_SINGLE(st4(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30), "st4 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30]"); - TEST_SINGLE(st4(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30), "st4 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30]"); - TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "st4 {v26.16b, v27.16b, v28.16b, v29.16b}, [x30]"); + TEST_SINGLE(st4(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30), "st4 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30]"); + TEST_SINGLE(st4(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30), "st4 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30]"); + TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "st4 {v26.16b, v27.16b, v28.16b, v29.16b}, " + "[x30]"); TEST_SINGLE(st4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "st4 {v26.8b, v27.8b, v28.8b, v29.8b}, [x30]"); TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "st4 {v26.8h, v27.8h, v28.8h, v29.8h}, [x30]"); @@ -237,8 +241,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(ld1(QReg::q26, Reg::r30, 16), "ld1 {v26.2d}, [x30], #16"); TEST_SINGLE(ld1(DReg::d26, Reg::r30, 8), "ld1 {v26.1d}, [x30], #8"); - TEST_SINGLE(ld1(QReg::q31, QReg::q0, Reg::r30, Reg::r29), "ld1 {v31.16b, v0.16b}, [x30], x29"); - TEST_SINGLE(ld1(DReg::d31, DReg::d0, Reg::r30, Reg::r29), "ld1 {v31.8b, v0.8b}, [x30], x29"); + TEST_SINGLE(ld1(QReg::q31, QReg::q0, Reg::r30, Reg::r29), "ld1 {v31.16b, v0.16b}, [x30], x29"); + TEST_SINGLE(ld1(DReg::d31, DReg::d0, Reg::r30, Reg::r29), "ld1 {v31.8b, v0.8b}, [x30], x29"); TEST_SINGLE(ld1(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld1 {v26.16b, v27.16b}, [x30], x29"); TEST_SINGLE(ld1(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld1 {v26.8b, v27.8b}, [x30], x29"); @@ -251,8 +255,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(ld1(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld1 {v26.2d, v27.2d}, [x30], x29"); TEST_SINGLE(ld1(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld1 {v26.1d, v27.1d}, [x30], x29"); - TEST_SINGLE(ld1(QReg::q31, QReg::q0, Reg::r30, 32), "ld1 {v31.16b, v0.16b}, [x30], #32"); - TEST_SINGLE(ld1(DReg::d31, DReg::d0, Reg::r30, 16), "ld1 {v31.8b, v0.8b}, [x30], #16"); + TEST_SINGLE(ld1(QReg::q31, QReg::q0, Reg::r30, 32), "ld1 {v31.16b, v0.16b}, [x30], #32"); + TEST_SINGLE(ld1(DReg::d31, DReg::d0, Reg::r30, 16), "ld1 {v31.8b, v0.8b}, [x30], #16"); TEST_SINGLE(ld1(QReg::q26, QReg::q27, Reg::r30, 32), "ld1 {v26.16b, v27.16b}, [x30], #32"); TEST_SINGLE(ld1(DReg::d26, DReg::d27, Reg::r30, 16), "ld1 {v26.8b, v27.8b}, [x30], #16"); @@ -265,8 +269,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(ld1(QReg::q26, QReg::q27, Reg::r30, 32), "ld1 {v26.2d, v27.2d}, [x30], #32"); TEST_SINGLE(ld1(DReg::d26, DReg::d27, Reg::r30, 16), "ld1 {v26.1d, v27.1d}, [x30], #16"); - TEST_SINGLE(ld1(QReg::q31, QReg::q0, QReg::q1, Reg::r30, Reg::r29), "ld1 {v31.16b, v0.16b, v1.16b}, [x30], x29"); - TEST_SINGLE(ld1(DReg::d31, DReg::d0, DReg::d1, Reg::r30, Reg::r29), "ld1 {v31.8b, v0.8b, v1.8b}, [x30], x29"); + TEST_SINGLE(ld1(QReg::q31, QReg::q0, QReg::q1, Reg::r30, Reg::r29), "ld1 {v31.16b, v0.16b, v1.16b}, [x30], x29"); + TEST_SINGLE(ld1(DReg::d31, DReg::d0, DReg::d1, Reg::r30, Reg::r29), "ld1 {v31.8b, v0.8b, v1.8b}, [x30], x29"); TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld1 {v26.16b, v27.16b, v28.16b}, [x30], x29"); TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld1 {v26.8b, v27.8b, v28.8b}, [x30], x29"); @@ -279,8 +283,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld1 {v26.2d, v27.2d, v28.2d}, [x30], x29"); TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld1 {v26.1d, v27.1d, v28.1d}, [x30], x29"); - TEST_SINGLE(ld1(QReg::q31, QReg::q0, QReg::q1, Reg::r30, 48), "ld1 {v31.16b, v0.16b, v1.16b}, [x30], #48"); - TEST_SINGLE(ld1(DReg::d31, DReg::d0, DReg::d1, Reg::r30, 24), "ld1 {v31.8b, v0.8b, v1.8b}, [x30], #24"); + TEST_SINGLE(ld1(QReg::q31, QReg::q0, QReg::q1, Reg::r30, 48), "ld1 {v31.16b, v0.16b, v1.16b}, [x30], #48"); + TEST_SINGLE(ld1(DReg::d31, DReg::d0, DReg::d1, Reg::r30, 24), "ld1 {v31.8b, v0.8b, v1.8b}, [x30], #24"); TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "ld1 {v26.16b, v27.16b, v28.16b}, [x30], #48"); TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "ld1 {v26.8b, v27.8b, v28.8b}, [x30], #24"); @@ -293,33 +297,53 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "ld1 {v26.2d, v27.2d, v28.2d}, [x30], #48"); TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "ld1 {v26.1d, v27.1d, v28.1d}, [x30], #24"); - TEST_SINGLE(ld1(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, Reg::r29), "ld1 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30], x29"); - TEST_SINGLE(ld1(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, Reg::r29), "ld1 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], x29"); - TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld1 {v26.16b, v27.16b, v28.16b, v29.16b}, [x30], x29"); - TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld1 {v26.8b, v27.8b, v28.8b, v29.8b}, [x30], x29"); - - TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld1 {v26.8h, v27.8h, v28.8h, v29.8h}, [x30], x29"); - TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld1 {v26.4h, v27.4h, v28.4h, v29.4h}, [x30], x29"); - - TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld1 {v26.4s, v27.4s, v28.4s, v29.4s}, [x30], x29"); - TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld1 {v26.2s, v27.2s, v28.2s, v29.2s}, [x30], x29"); - - TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld1 {v26.2d, v27.2d, v28.2d, v29.2d}, [x30], x29"); - TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld1 {v26.1d, v27.1d, v28.1d, v29.1d}, [x30], x29"); - - TEST_SINGLE(ld1(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, 64), "ld1 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30], #64"); - TEST_SINGLE(ld1(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, 32), "ld1 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], #32"); - TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld1 {v26.16b, v27.16b, v28.16b, v29.16b}, [x30], #64"); - TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld1 {v26.8b, v27.8b, v28.8b, v29.8b}, [x30], #32"); - - TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld1 {v26.8h, v27.8h, v28.8h, v29.8h}, [x30], #64"); - TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld1 {v26.4h, v27.4h, v28.4h, v29.4h}, [x30], #32"); - - TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld1 {v26.4s, v27.4s, v28.4s, v29.4s}, [x30], #64"); - TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld1 {v26.2s, v27.2s, v28.2s, v29.2s}, [x30], #32"); - - TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld1 {v26.2d, v27.2d, v28.2d, v29.2d}, [x30], #64"); - TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld1 {v26.1d, v27.1d, v28.1d, v29.1d}, [x30], #32"); + TEST_SINGLE(ld1(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, Reg::r29), "ld1 {v31.16b, v0.16b, v1.16b, v2.16b}, " + "[x30], x29"); + TEST_SINGLE(ld1(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, Reg::r29), "ld1 {v31.8b, v0.8b, v1.8b, v2.8b}, " + "[x30], x29"); + TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld1 {v26.16b, v27.16b, v28.16b, " + "v29.16b}, [x30], x29"); + TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld1 {v26.8b, v27.8b, v28.8b, " + "v29.8b}, [x30], x29"); + + TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld1 {v26.8h, v27.8h, v28.8h, " + "v29.8h}, [x30], x29"); + TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld1 {v26.4h, v27.4h, v28.4h, " + "v29.4h}, [x30], x29"); + + TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld1 {v26.4s, v27.4s, v28.4s, " + "v29.4s}, [x30], x29"); + TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld1 {v26.2s, v27.2s, v28.2s, " + "v29.2s}, [x30], x29"); + + TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld1 {v26.2d, v27.2d, v28.2d, " + "v29.2d}, [x30], x29"); + TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld1 {v26.1d, v27.1d, v28.1d, " + "v29.1d}, [x30], x29"); + + TEST_SINGLE(ld1(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, 64), "ld1 {v31.16b, v0.16b, v1.16b, v2.16b}, " + "[x30], #64"); + TEST_SINGLE(ld1(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, 32), "ld1 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], " + "#32"); + TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld1 {v26.16b, v27.16b, v28.16b, v29.16b}, " + "[x30], #64"); + TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld1 {v26.8b, v27.8b, v28.8b, v29.8b}, " + "[x30], #32"); + + TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld1 {v26.8h, v27.8h, v28.8h, v29.8h}, " + "[x30], #64"); + TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld1 {v26.4h, v27.4h, v28.4h, v29.4h}, " + "[x30], #32"); + + TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld1 {v26.4s, v27.4s, v28.4s, v29.4s}, " + "[x30], #64"); + TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld1 {v26.2s, v27.2s, v28.2s, v29.2s}, " + "[x30], #32"); + + TEST_SINGLE(ld1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld1 {v26.2d, v27.2d, v28.2d, v29.2d}, " + "[x30], #64"); + TEST_SINGLE(ld1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld1 {v26.1d, v27.1d, v28.1d, v29.1d}, " + "[x30], #32"); TEST_SINGLE(st1(QReg::q26, Reg::r30, Reg::r29), "st1 {v26.16b}, [x30], x29"); TEST_SINGLE(st1(DReg::d26, Reg::r30, Reg::r29), "st1 {v26.8b}, [x30], x29"); @@ -345,8 +369,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(st1(QReg::q26, Reg::r30, 16), "st1 {v26.2d}, [x30], #16"); TEST_SINGLE(st1(DReg::d26, Reg::r30, 8), "st1 {v26.1d}, [x30], #8"); - TEST_SINGLE(st1(QReg::q31, QReg::q0, Reg::r30, Reg::r29), "st1 {v31.16b, v0.16b}, [x30], x29"); - TEST_SINGLE(st1(DReg::d31, DReg::d0, Reg::r30, Reg::r29), "st1 {v31.8b, v0.8b}, [x30], x29"); + TEST_SINGLE(st1(QReg::q31, QReg::q0, Reg::r30, Reg::r29), "st1 {v31.16b, v0.16b}, [x30], x29"); + TEST_SINGLE(st1(DReg::d31, DReg::d0, Reg::r30, Reg::r29), "st1 {v31.8b, v0.8b}, [x30], x29"); TEST_SINGLE(st1(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "st1 {v26.16b, v27.16b}, [x30], x29"); TEST_SINGLE(st1(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "st1 {v26.8b, v27.8b}, [x30], x29"); @@ -359,8 +383,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(st1(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "st1 {v26.2d, v27.2d}, [x30], x29"); TEST_SINGLE(st1(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "st1 {v26.1d, v27.1d}, [x30], x29"); - TEST_SINGLE(st1(QReg::q31, QReg::q0, Reg::r30, 32), "st1 {v31.16b, v0.16b}, [x30], #32"); - TEST_SINGLE(st1(DReg::d31, DReg::d0, Reg::r30, 16), "st1 {v31.8b, v0.8b}, [x30], #16"); + TEST_SINGLE(st1(QReg::q31, QReg::q0, Reg::r30, 32), "st1 {v31.16b, v0.16b}, [x30], #32"); + TEST_SINGLE(st1(DReg::d31, DReg::d0, Reg::r30, 16), "st1 {v31.8b, v0.8b}, [x30], #16"); TEST_SINGLE(st1(QReg::q26, QReg::q27, Reg::r30, 32), "st1 {v26.16b, v27.16b}, [x30], #32"); TEST_SINGLE(st1(DReg::d26, DReg::d27, Reg::r30, 16), "st1 {v26.8b, v27.8b}, [x30], #16"); @@ -373,8 +397,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(st1(QReg::q26, QReg::q27, Reg::r30, 32), "st1 {v26.2d, v27.2d}, [x30], #32"); TEST_SINGLE(st1(DReg::d26, DReg::d27, Reg::r30, 16), "st1 {v26.1d, v27.1d}, [x30], #16"); - TEST_SINGLE(st1(QReg::q31, QReg::q0, QReg::q1, Reg::r30, Reg::r29), "st1 {v31.16b, v0.16b, v1.16b}, [x30], x29"); - TEST_SINGLE(st1(DReg::d31, DReg::d0, DReg::d1, Reg::r30, Reg::r29), "st1 {v31.8b, v0.8b, v1.8b}, [x30], x29"); + TEST_SINGLE(st1(QReg::q31, QReg::q0, QReg::q1, Reg::r30, Reg::r29), "st1 {v31.16b, v0.16b, v1.16b}, [x30], x29"); + TEST_SINGLE(st1(DReg::d31, DReg::d0, DReg::d1, Reg::r30, Reg::r29), "st1 {v31.8b, v0.8b, v1.8b}, [x30], x29"); TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "st1 {v26.16b, v27.16b, v28.16b}, [x30], x29"); TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "st1 {v26.8b, v27.8b, v28.8b}, [x30], x29"); @@ -387,8 +411,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "st1 {v26.2d, v27.2d, v28.2d}, [x30], x29"); TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "st1 {v26.1d, v27.1d, v28.1d}, [x30], x29"); - TEST_SINGLE(st1(QReg::q31, QReg::q0, QReg::q1, Reg::r30, 48), "st1 {v31.16b, v0.16b, v1.16b}, [x30], #48"); - TEST_SINGLE(st1(DReg::d31, DReg::d0, DReg::d1, Reg::r30, 24), "st1 {v31.8b, v0.8b, v1.8b}, [x30], #24"); + TEST_SINGLE(st1(QReg::q31, QReg::q0, QReg::q1, Reg::r30, 48), "st1 {v31.16b, v0.16b, v1.16b}, [x30], #48"); + TEST_SINGLE(st1(DReg::d31, DReg::d0, DReg::d1, Reg::r30, 24), "st1 {v31.8b, v0.8b, v1.8b}, [x30], #24"); TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "st1 {v26.16b, v27.16b, v28.16b}, [x30], #48"); TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "st1 {v26.8b, v27.8b, v28.8b}, [x30], #24"); @@ -401,36 +425,56 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "st1 {v26.2d, v27.2d, v28.2d}, [x30], #48"); TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "st1 {v26.1d, v27.1d, v28.1d}, [x30], #24"); - TEST_SINGLE(st1(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, Reg::r29), "st1 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30], x29"); - TEST_SINGLE(st1(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, Reg::r29), "st1 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], x29"); - TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st1 {v26.16b, v27.16b, v28.16b, v29.16b}, [x30], x29"); - TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st1 {v26.8b, v27.8b, v28.8b, v29.8b}, [x30], x29"); - - TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st1 {v26.8h, v27.8h, v28.8h, v29.8h}, [x30], x29"); - TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st1 {v26.4h, v27.4h, v28.4h, v29.4h}, [x30], x29"); - - TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st1 {v26.4s, v27.4s, v28.4s, v29.4s}, [x30], x29"); - TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st1 {v26.2s, v27.2s, v28.2s, v29.2s}, [x30], x29"); - - TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st1 {v26.2d, v27.2d, v28.2d, v29.2d}, [x30], x29"); - TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st1 {v26.1d, v27.1d, v28.1d, v29.1d}, [x30], x29"); - - TEST_SINGLE(st1(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, 64), "st1 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30], #64"); - TEST_SINGLE(st1(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, 32), "st1 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], #32"); - TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st1 {v26.16b, v27.16b, v28.16b, v29.16b}, [x30], #64"); - TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st1 {v26.8b, v27.8b, v28.8b, v29.8b}, [x30], #32"); - - TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st1 {v26.8h, v27.8h, v28.8h, v29.8h}, [x30], #64"); - TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st1 {v26.4h, v27.4h, v28.4h, v29.4h}, [x30], #32"); - - TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st1 {v26.4s, v27.4s, v28.4s, v29.4s}, [x30], #64"); - TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st1 {v26.2s, v27.2s, v28.2s, v29.2s}, [x30], #32"); - - TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st1 {v26.2d, v27.2d, v28.2d, v29.2d}, [x30], #64"); - TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st1 {v26.1d, v27.1d, v28.1d, v29.1d}, [x30], #32"); - - TEST_SINGLE(ld2(QReg::q31, QReg::q0, Reg::r30, Reg::r29), "ld2 {v31.16b, v0.16b}, [x30], x29"); - TEST_SINGLE(ld2(DReg::d31, DReg::d0, Reg::r30, Reg::r29), "ld2 {v31.8b, v0.8b}, [x30], x29"); + TEST_SINGLE(st1(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, Reg::r29), "st1 {v31.16b, v0.16b, v1.16b, v2.16b}, " + "[x30], x29"); + TEST_SINGLE(st1(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, Reg::r29), "st1 {v31.8b, v0.8b, v1.8b, v2.8b}, " + "[x30], x29"); + TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st1 {v26.16b, v27.16b, v28.16b, " + "v29.16b}, [x30], x29"); + TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st1 {v26.8b, v27.8b, v28.8b, " + "v29.8b}, [x30], x29"); + + TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st1 {v26.8h, v27.8h, v28.8h, " + "v29.8h}, [x30], x29"); + TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st1 {v26.4h, v27.4h, v28.4h, " + "v29.4h}, [x30], x29"); + + TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st1 {v26.4s, v27.4s, v28.4s, " + "v29.4s}, [x30], x29"); + TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st1 {v26.2s, v27.2s, v28.2s, " + "v29.2s}, [x30], x29"); + + TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st1 {v26.2d, v27.2d, v28.2d, " + "v29.2d}, [x30], x29"); + TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st1 {v26.1d, v27.1d, v28.1d, " + "v29.1d}, [x30], x29"); + + TEST_SINGLE(st1(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, 64), "st1 {v31.16b, v0.16b, v1.16b, v2.16b}, " + "[x30], #64"); + TEST_SINGLE(st1(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, 32), "st1 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], " + "#32"); + TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st1 {v26.16b, v27.16b, v28.16b, v29.16b}, " + "[x30], #64"); + TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st1 {v26.8b, v27.8b, v28.8b, v29.8b}, " + "[x30], #32"); + + TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st1 {v26.8h, v27.8h, v28.8h, v29.8h}, " + "[x30], #64"); + TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st1 {v26.4h, v27.4h, v28.4h, v29.4h}, " + "[x30], #32"); + + TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st1 {v26.4s, v27.4s, v28.4s, v29.4s}, " + "[x30], #64"); + TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st1 {v26.2s, v27.2s, v28.2s, v29.2s}, " + "[x30], #32"); + + TEST_SINGLE(st1(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st1 {v26.2d, v27.2d, v28.2d, v29.2d}, " + "[x30], #64"); + TEST_SINGLE(st1(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st1 {v26.1d, v27.1d, v28.1d, v29.1d}, " + "[x30], #32"); + + TEST_SINGLE(ld2(QReg::q31, QReg::q0, Reg::r30, Reg::r29), "ld2 {v31.16b, v0.16b}, [x30], x29"); + TEST_SINGLE(ld2(DReg::d31, DReg::d0, Reg::r30, Reg::r29), "ld2 {v31.8b, v0.8b}, [x30], x29"); TEST_SINGLE(ld2(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld2 {v26.16b, v27.16b}, [x30], x29"); TEST_SINGLE(ld2(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld2 {v26.8b, v27.8b}, [x30], x29"); @@ -443,8 +487,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(ld2(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld2 {v26.2d, v27.2d}, [x30], x29"); TEST_SINGLE(ld2(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "unallocated (NEONLoadStoreMultiStructPostIndex)"); - TEST_SINGLE(ld2(QReg::q31, QReg::q0, Reg::r30, 32), "ld2 {v31.16b, v0.16b}, [x30], #32"); - TEST_SINGLE(ld2(DReg::d31, DReg::d0, Reg::r30, 16), "ld2 {v31.8b, v0.8b}, [x30], #16"); + TEST_SINGLE(ld2(QReg::q31, QReg::q0, Reg::r30, 32), "ld2 {v31.16b, v0.16b}, [x30], #32"); + TEST_SINGLE(ld2(DReg::d31, DReg::d0, Reg::r30, 16), "ld2 {v31.8b, v0.8b}, [x30], #16"); TEST_SINGLE(ld2(QReg::q26, QReg::q27, Reg::r30, 32), "ld2 {v26.16b, v27.16b}, [x30], #32"); TEST_SINGLE(ld2(DReg::d26, DReg::d27, Reg::r30, 16), "ld2 {v26.8b, v27.8b}, [x30], #16"); @@ -457,8 +501,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(ld2(QReg::q26, QReg::q27, Reg::r30, 32), "ld2 {v26.2d, v27.2d}, [x30], #32"); TEST_SINGLE(ld2(DReg::d26, DReg::d27, Reg::r30, 16), "unallocated (NEONLoadStoreMultiStructPostIndex)"); - TEST_SINGLE(st2(QReg::q31, QReg::q0, Reg::r30, Reg::r29), "st2 {v31.16b, v0.16b}, [x30], x29"); - TEST_SINGLE(st2(DReg::d31, DReg::d0, Reg::r30, Reg::r29), "st2 {v31.8b, v0.8b}, [x30], x29"); + TEST_SINGLE(st2(QReg::q31, QReg::q0, Reg::r30, Reg::r29), "st2 {v31.16b, v0.16b}, [x30], x29"); + TEST_SINGLE(st2(DReg::d31, DReg::d0, Reg::r30, Reg::r29), "st2 {v31.8b, v0.8b}, [x30], x29"); TEST_SINGLE(st2(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "st2 {v26.16b, v27.16b}, [x30], x29"); TEST_SINGLE(st2(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "st2 {v26.8b, v27.8b}, [x30], x29"); @@ -471,8 +515,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(st2(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "st2 {v26.2d, v27.2d}, [x30], x29"); TEST_SINGLE(st2(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "unallocated (NEONLoadStoreMultiStructPostIndex)"); - TEST_SINGLE(st2(QReg::q31, QReg::q0, Reg::r30, 32), "st2 {v31.16b, v0.16b}, [x30], #32"); - TEST_SINGLE(st2(DReg::d31, DReg::d0, Reg::r30, 16), "st2 {v31.8b, v0.8b}, [x30], #16"); + TEST_SINGLE(st2(QReg::q31, QReg::q0, Reg::r30, 32), "st2 {v31.16b, v0.16b}, [x30], #32"); + TEST_SINGLE(st2(DReg::d31, DReg::d0, Reg::r30, 16), "st2 {v31.8b, v0.8b}, [x30], #16"); TEST_SINGLE(st2(QReg::q26, QReg::q27, Reg::r30, 32), "st2 {v26.16b, v27.16b}, [x30], #32"); TEST_SINGLE(st2(DReg::d26, DReg::d27, Reg::r30, 16), "st2 {v26.8b, v27.8b}, [x30], #16"); @@ -485,8 +529,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(st2(QReg::q26, QReg::q27, Reg::r30, 32), "st2 {v26.2d, v27.2d}, [x30], #32"); TEST_SINGLE(st2(DReg::d26, DReg::d27, Reg::r30, 16), "unallocated (NEONLoadStoreMultiStructPostIndex)"); - TEST_SINGLE(ld3(QReg::q31, QReg::q0, QReg::q1, Reg::r30, Reg::r29), "ld3 {v31.16b, v0.16b, v1.16b}, [x30], x29"); - TEST_SINGLE(ld3(DReg::d31, DReg::d0, DReg::d1, Reg::r30, Reg::r29), "ld3 {v31.8b, v0.8b, v1.8b}, [x30], x29"); + TEST_SINGLE(ld3(QReg::q31, QReg::q0, QReg::q1, Reg::r30, Reg::r29), "ld3 {v31.16b, v0.16b, v1.16b}, [x30], x29"); + TEST_SINGLE(ld3(DReg::d31, DReg::d0, DReg::d1, Reg::r30, Reg::r29), "ld3 {v31.8b, v0.8b, v1.8b}, [x30], x29"); TEST_SINGLE(ld3(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld3 {v26.16b, v27.16b, v28.16b}, [x30], x29"); TEST_SINGLE(ld3(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld3 {v26.8b, v27.8b, v28.8b}, [x30], x29"); @@ -497,10 +541,11 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(ld3(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld3 {v26.2s, v27.2s, v28.2s}, [x30], x29"); TEST_SINGLE(ld3(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld3 {v26.2d, v27.2d, v28.2d}, [x30], x29"); - TEST_SINGLE(ld3(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "unallocated (NEONLoadStoreMultiStructPostIndex)"); + TEST_SINGLE(ld3(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "unallocated " + "(NEONLoadStoreMultiStructPostIndex)"); - TEST_SINGLE(ld3(QReg::q31, QReg::q0, QReg::q1, Reg::r30, 48), "ld3 {v31.16b, v0.16b, v1.16b}, [x30], #48"); - TEST_SINGLE(ld3(DReg::d31, DReg::d0, DReg::d1, Reg::r30, 24), "ld3 {v31.8b, v0.8b, v1.8b}, [x30], #24"); + TEST_SINGLE(ld3(QReg::q31, QReg::q0, QReg::q1, Reg::r30, 48), "ld3 {v31.16b, v0.16b, v1.16b}, [x30], #48"); + TEST_SINGLE(ld3(DReg::d31, DReg::d0, DReg::d1, Reg::r30, 24), "ld3 {v31.8b, v0.8b, v1.8b}, [x30], #24"); TEST_SINGLE(ld3(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "ld3 {v26.16b, v27.16b, v28.16b}, [x30], #48"); TEST_SINGLE(ld3(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "ld3 {v26.8b, v27.8b, v28.8b}, [x30], #24"); @@ -513,8 +558,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(ld3(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "ld3 {v26.2d, v27.2d, v28.2d}, [x30], #48"); TEST_SINGLE(ld3(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "unallocated (NEONLoadStoreMultiStructPostIndex)"); - TEST_SINGLE(st3(QReg::q31, QReg::q0, QReg::q1, Reg::r30, Reg::r29), "st3 {v31.16b, v0.16b, v1.16b}, [x30], x29"); - TEST_SINGLE(st3(DReg::d31, DReg::d0, DReg::d1, Reg::r30, Reg::r29), "st3 {v31.8b, v0.8b, v1.8b}, [x30], x29"); + TEST_SINGLE(st3(QReg::q31, QReg::q0, QReg::q1, Reg::r30, Reg::r29), "st3 {v31.16b, v0.16b, v1.16b}, [x30], x29"); + TEST_SINGLE(st3(DReg::d31, DReg::d0, DReg::d1, Reg::r30, Reg::r29), "st3 {v31.8b, v0.8b, v1.8b}, [x30], x29"); TEST_SINGLE(st3(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "st3 {v26.16b, v27.16b, v28.16b}, [x30], x29"); TEST_SINGLE(st3(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "st3 {v26.8b, v27.8b, v28.8b}, [x30], x29"); @@ -525,10 +570,11 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(st3(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "st3 {v26.2s, v27.2s, v28.2s}, [x30], x29"); TEST_SINGLE(st3(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "st3 {v26.2d, v27.2d, v28.2d}, [x30], x29"); - TEST_SINGLE(st3(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "unallocated (NEONLoadStoreMultiStructPostIndex)"); + TEST_SINGLE(st3(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "unallocated " + "(NEONLoadStoreMultiStructPostIndex)"); - TEST_SINGLE(st3(QReg::q31, QReg::q0, QReg::q1, Reg::r30, 48), "st3 {v31.16b, v0.16b, v1.16b}, [x30], #48"); - TEST_SINGLE(st3(DReg::d31, DReg::d0, DReg::d1, Reg::r30, 24), "st3 {v31.8b, v0.8b, v1.8b}, [x30], #24"); + TEST_SINGLE(st3(QReg::q31, QReg::q0, QReg::q1, Reg::r30, 48), "st3 {v31.16b, v0.16b, v1.16b}, [x30], #48"); + TEST_SINGLE(st3(DReg::d31, DReg::d0, DReg::d1, Reg::r30, 24), "st3 {v31.8b, v0.8b, v1.8b}, [x30], #24"); TEST_SINGLE(st3(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "st3 {v26.16b, v27.16b, v28.16b}, [x30], #48"); TEST_SINGLE(st3(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "st3 {v26.8b, v27.8b, v28.8b}, [x30], #24"); @@ -541,64 +587,106 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(st3(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "st3 {v26.2d, v27.2d, v28.2d}, [x30], #48"); TEST_SINGLE(st3(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "unallocated (NEONLoadStoreMultiStructPostIndex)"); - TEST_SINGLE(ld4(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, Reg::r29), "ld4 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30], x29"); - TEST_SINGLE(ld4(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, Reg::r29), "ld4 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], x29"); - TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4 {v26.16b, v27.16b, v28.16b, v29.16b}, [x30], x29"); - TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4 {v26.8b, v27.8b, v28.8b, v29.8b}, [x30], x29"); - - TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4 {v26.8h, v27.8h, v28.8h, v29.8h}, [x30], x29"); - TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4 {v26.4h, v27.4h, v28.4h, v29.4h}, [x30], x29"); - - TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4 {v26.4s, v27.4s, v28.4s, v29.4s}, [x30], x29"); - TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4 {v26.2s, v27.2s, v28.2s, v29.2s}, [x30], x29"); - - TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4 {v26.2d, v27.2d, v28.2d, v29.2d}, [x30], x29"); - TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "unallocated (NEONLoadStoreMultiStructPostIndex)"); - - TEST_SINGLE(ld4(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, 64), "ld4 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30], #64"); - TEST_SINGLE(ld4(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, 32), "ld4 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], #32"); - TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld4 {v26.16b, v27.16b, v28.16b, v29.16b}, [x30], #64"); - TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld4 {v26.8b, v27.8b, v28.8b, v29.8b}, [x30], #32"); - - TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld4 {v26.8h, v27.8h, v28.8h, v29.8h}, [x30], #64"); - TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld4 {v26.4h, v27.4h, v28.4h, v29.4h}, [x30], #32"); - - TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld4 {v26.4s, v27.4s, v28.4s, v29.4s}, [x30], #64"); - TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld4 {v26.2s, v27.2s, v28.2s, v29.2s}, [x30], #32"); - - TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld4 {v26.2d, v27.2d, v28.2d, v29.2d}, [x30], #64"); - TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "unallocated (NEONLoadStoreMultiStructPostIndex)"); - - TEST_SINGLE(st4(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, Reg::r29), "st4 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30], x29"); - TEST_SINGLE(st4(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, Reg::r29), "st4 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], x29"); - TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st4 {v26.16b, v27.16b, v28.16b, v29.16b}, [x30], x29"); - TEST_SINGLE(st4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st4 {v26.8b, v27.8b, v28.8b, v29.8b}, [x30], x29"); - - TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st4 {v26.8h, v27.8h, v28.8h, v29.8h}, [x30], x29"); - TEST_SINGLE(st4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st4 {v26.4h, v27.4h, v28.4h, v29.4h}, [x30], x29"); - - TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st4 {v26.4s, v27.4s, v28.4s, v29.4s}, [x30], x29"); - TEST_SINGLE(st4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st4 {v26.2s, v27.2s, v28.2s, v29.2s}, [x30], x29"); - - TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st4 {v26.2d, v27.2d, v28.2d, v29.2d}, [x30], x29"); - TEST_SINGLE(st4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "unallocated (NEONLoadStoreMultiStructPostIndex)"); - - TEST_SINGLE(st4(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, 64), "st4 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30], #64"); - TEST_SINGLE(st4(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, 32), "st4 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], #32"); - TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st4 {v26.16b, v27.16b, v28.16b, v29.16b}, [x30], #64"); - TEST_SINGLE(st4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st4 {v26.8b, v27.8b, v28.8b, v29.8b}, [x30], #32"); - - TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st4 {v26.8h, v27.8h, v28.8h, v29.8h}, [x30], #64"); - TEST_SINGLE(st4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st4 {v26.4h, v27.4h, v28.4h, v29.4h}, [x30], #32"); - - TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st4 {v26.4s, v27.4s, v28.4s, v29.4s}, [x30], #64"); - TEST_SINGLE(st4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st4 {v26.2s, v27.2s, v28.2s, v29.2s}, [x30], #32"); - - TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st4 {v26.2d, v27.2d, v28.2d, v29.2d}, [x30], #64"); - TEST_SINGLE(st4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "unallocated (NEONLoadStoreMultiStructPostIndex)"); + TEST_SINGLE(ld4(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, Reg::r29), "ld4 {v31.16b, v0.16b, v1.16b, v2.16b}, " + "[x30], x29"); + TEST_SINGLE(ld4(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, Reg::r29), "ld4 {v31.8b, v0.8b, v1.8b, v2.8b}, " + "[x30], x29"); + TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4 {v26.16b, v27.16b, v28.16b, " + "v29.16b}, [x30], x29"); + TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4 {v26.8b, v27.8b, v28.8b, " + "v29.8b}, [x30], x29"); + + TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4 {v26.8h, v27.8h, v28.8h, " + "v29.8h}, [x30], x29"); + TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4 {v26.4h, v27.4h, v28.4h, " + "v29.4h}, [x30], x29"); + + TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4 {v26.4s, v27.4s, v28.4s, " + "v29.4s}, [x30], x29"); + TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4 {v26.2s, v27.2s, v28.2s, " + "v29.2s}, [x30], x29"); + + TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4 {v26.2d, v27.2d, v28.2d, " + "v29.2d}, [x30], x29"); + TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "unallocated " + "(NEONLoadStoreMultiStructPostIndex" + ")"); + + TEST_SINGLE(ld4(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, 64), "ld4 {v31.16b, v0.16b, v1.16b, v2.16b}, " + "[x30], #64"); + TEST_SINGLE(ld4(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, 32), "ld4 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], " + "#32"); + TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld4 {v26.16b, v27.16b, v28.16b, v29.16b}, " + "[x30], #64"); + TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld4 {v26.8b, v27.8b, v28.8b, v29.8b}, " + "[x30], #32"); + + TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld4 {v26.8h, v27.8h, v28.8h, v29.8h}, " + "[x30], #64"); + TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld4 {v26.4h, v27.4h, v28.4h, v29.4h}, " + "[x30], #32"); + + TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld4 {v26.4s, v27.4s, v28.4s, v29.4s}, " + "[x30], #64"); + TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld4 {v26.2s, v27.2s, v28.2s, v29.2s}, " + "[x30], #32"); + + TEST_SINGLE(ld4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld4 {v26.2d, v27.2d, v28.2d, v29.2d}, " + "[x30], #64"); + TEST_SINGLE(ld4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "unallocated " + "(NEONLoadStoreMultiStructPostIndex)"); + + TEST_SINGLE(st4(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, Reg::r29), "st4 {v31.16b, v0.16b, v1.16b, v2.16b}, " + "[x30], x29"); + TEST_SINGLE(st4(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, Reg::r29), "st4 {v31.8b, v0.8b, v1.8b, v2.8b}, " + "[x30], x29"); + TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st4 {v26.16b, v27.16b, v28.16b, " + "v29.16b}, [x30], x29"); + TEST_SINGLE(st4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st4 {v26.8b, v27.8b, v28.8b, " + "v29.8b}, [x30], x29"); + + TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st4 {v26.8h, v27.8h, v28.8h, " + "v29.8h}, [x30], x29"); + TEST_SINGLE(st4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st4 {v26.4h, v27.4h, v28.4h, " + "v29.4h}, [x30], x29"); + + TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st4 {v26.4s, v27.4s, v28.4s, " + "v29.4s}, [x30], x29"); + TEST_SINGLE(st4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st4 {v26.2s, v27.2s, v28.2s, " + "v29.2s}, [x30], x29"); + + TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st4 {v26.2d, v27.2d, v28.2d, " + "v29.2d}, [x30], x29"); + TEST_SINGLE(st4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "unallocated " + "(NEONLoadStoreMultiStructPostIndex" + ")"); + + TEST_SINGLE(st4(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, 64), "st4 {v31.16b, v0.16b, v1.16b, v2.16b}, " + "[x30], #64"); + TEST_SINGLE(st4(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, 32), "st4 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], " + "#32"); + TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st4 {v26.16b, v27.16b, v28.16b, v29.16b}, " + "[x30], #64"); + TEST_SINGLE(st4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st4 {v26.8b, v27.8b, v28.8b, v29.8b}, " + "[x30], #32"); + + TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st4 {v26.8h, v27.8h, v28.8h, v29.8h}, " + "[x30], #64"); + TEST_SINGLE(st4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st4 {v26.4h, v27.4h, v28.4h, v29.4h}, " + "[x30], #32"); + + TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st4 {v26.4s, v27.4s, v28.4s, v29.4s}, " + "[x30], #64"); + TEST_SINGLE(st4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st4 {v26.2s, v27.2s, v28.2s, v29.2s}, " + "[x30], #32"); + + TEST_SINGLE(st4(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st4 {v26.2d, v27.2d, v28.2d, v29.2d}, " + "[x30], #64"); + TEST_SINGLE(st4(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "unallocated " + "(NEONLoadStoreMultiStructPostIndex)"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: ASIMD loadstore single") { - TEST_SINGLE(ld1(VReg::v26, 0, Reg::r30), "ld1 {v26.b}[0], [x30]"); + TEST_SINGLE(ld1(VReg::v26, 0, Reg::r30), "ld1 {v26.b}[0], [x30]"); TEST_SINGLE(ld1(VReg::v26, 0, Reg::r30), "ld1 {v26.h}[0], [x30]"); TEST_SINGLE(ld1(VReg::v26, 0, Reg::r30), "ld1 {v26.s}[0], [x30]"); TEST_SINGLE(ld1(VReg::v26, 0, Reg::r30), "ld1 {v26.d}[0], [x30]"); @@ -608,17 +696,17 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: ASIMD loadstore single") TEST_SINGLE(ld1(VReg::v26, 3, Reg::r30), "ld1 {v26.s}[3], [x30]"); TEST_SINGLE(ld1(VReg::v26, 1, Reg::r30), "ld1 {v26.d}[1], [x30]"); - TEST_SINGLE(ld1r(DReg::d26, Reg::r30), "ld1r {v26.8b}, [x30]"); + TEST_SINGLE(ld1r(DReg::d26, Reg::r30), "ld1r {v26.8b}, [x30]"); TEST_SINGLE(ld1r(DReg::d26, Reg::r30), "ld1r {v26.4h}, [x30]"); TEST_SINGLE(ld1r(DReg::d26, Reg::r30), "ld1r {v26.2s}, [x30]"); TEST_SINGLE(ld1r(DReg::d26, Reg::r30), "ld1r {v26.1d}, [x30]"); - TEST_SINGLE(ld1r(QReg::q26, Reg::r30), "ld1r {v26.16b}, [x30]"); + TEST_SINGLE(ld1r(QReg::q26, Reg::r30), "ld1r {v26.16b}, [x30]"); TEST_SINGLE(ld1r(QReg::q26, Reg::r30), "ld1r {v26.8h}, [x30]"); TEST_SINGLE(ld1r(QReg::q26, Reg::r30), "ld1r {v26.4s}, [x30]"); TEST_SINGLE(ld1r(QReg::q26, Reg::r30), "ld1r {v26.2d}, [x30]"); - TEST_SINGLE(st1(VReg::v26, 0, Reg::r30), "st1 {v26.b}[0], [x30]"); + TEST_SINGLE(st1(VReg::v26, 0, Reg::r30), "st1 {v26.b}[0], [x30]"); TEST_SINGLE(st1(VReg::v26, 0, Reg::r30), "st1 {v26.h}[0], [x30]"); TEST_SINGLE(st1(VReg::v26, 0, Reg::r30), "st1 {v26.s}[0], [x30]"); TEST_SINGLE(st1(VReg::v26, 0, Reg::r30), "st1 {v26.d}[0], [x30]"); @@ -628,8 +716,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: ASIMD loadstore single") TEST_SINGLE(st1(VReg::v26, 3, Reg::r30), "st1 {v26.s}[3], [x30]"); TEST_SINGLE(st1(VReg::v26, 1, Reg::r30), "st1 {v26.d}[1], [x30]"); - TEST_SINGLE(ld2(VReg::v31, VReg::v0, 0, Reg::r30), "ld2 {v31.b, v0.b}[0], [x30]"); - TEST_SINGLE(ld2(VReg::v26, VReg::v27, 0, Reg::r30), "ld2 {v26.b, v27.b}[0], [x30]"); + TEST_SINGLE(ld2(VReg::v31, VReg::v0, 0, Reg::r30), "ld2 {v31.b, v0.b}[0], [x30]"); + TEST_SINGLE(ld2(VReg::v26, VReg::v27, 0, Reg::r30), "ld2 {v26.b, v27.b}[0], [x30]"); TEST_SINGLE(ld2(VReg::v26, VReg::v27, 0, Reg::r30), "ld2 {v26.h, v27.h}[0], [x30]"); TEST_SINGLE(ld2(VReg::v26, VReg::v27, 0, Reg::r30), "ld2 {v26.s, v27.s}[0], [x30]"); TEST_SINGLE(ld2(VReg::v26, VReg::v27, 0, Reg::r30), "ld2 {v26.d, v27.d}[0], [x30]"); @@ -639,20 +727,20 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: ASIMD loadstore single") TEST_SINGLE(ld2(VReg::v26, VReg::v27, 3, Reg::r30), "ld2 {v26.s, v27.s}[3], [x30]"); TEST_SINGLE(ld2(VReg::v26, VReg::v27, 1, Reg::r30), "ld2 {v26.d, v27.d}[1], [x30]"); - TEST_SINGLE(ld2r(DReg::d31, DReg::d0, Reg::r30), "ld2r {v31.8b, v0.8b}, [x30]"); - TEST_SINGLE(ld2r(DReg::d26, DReg::d27, Reg::r30), "ld2r {v26.8b, v27.8b}, [x30]"); + TEST_SINGLE(ld2r(DReg::d31, DReg::d0, Reg::r30), "ld2r {v31.8b, v0.8b}, [x30]"); + TEST_SINGLE(ld2r(DReg::d26, DReg::d27, Reg::r30), "ld2r {v26.8b, v27.8b}, [x30]"); TEST_SINGLE(ld2r(DReg::d26, DReg::d27, Reg::r30), "ld2r {v26.4h, v27.4h}, [x30]"); TEST_SINGLE(ld2r(DReg::d26, DReg::d27, Reg::r30), "ld2r {v26.2s, v27.2s}, [x30]"); TEST_SINGLE(ld2r(DReg::d26, DReg::d27, Reg::r30), "ld2r {v26.1d, v27.1d}, [x30]"); - TEST_SINGLE(ld2r(QReg::q31, QReg::q0, Reg::r30), "ld2r {v31.16b, v0.16b}, [x30]"); - TEST_SINGLE(ld2r(QReg::q26, QReg::q27, Reg::r30), "ld2r {v26.16b, v27.16b}, [x30]"); + TEST_SINGLE(ld2r(QReg::q31, QReg::q0, Reg::r30), "ld2r {v31.16b, v0.16b}, [x30]"); + TEST_SINGLE(ld2r(QReg::q26, QReg::q27, Reg::r30), "ld2r {v26.16b, v27.16b}, [x30]"); TEST_SINGLE(ld2r(QReg::q26, QReg::q27, Reg::r30), "ld2r {v26.8h, v27.8h}, [x30]"); TEST_SINGLE(ld2r(QReg::q26, QReg::q27, Reg::r30), "ld2r {v26.4s, v27.4s}, [x30]"); TEST_SINGLE(ld2r(QReg::q26, QReg::q27, Reg::r30), "ld2r {v26.2d, v27.2d}, [x30]"); - TEST_SINGLE(st2(VReg::v31, VReg::v0, 0, Reg::r30), "st2 {v31.b, v0.b}[0], [x30]"); - TEST_SINGLE(st2(VReg::v26, VReg::v27, 0, Reg::r30), "st2 {v26.b, v27.b}[0], [x30]"); + TEST_SINGLE(st2(VReg::v31, VReg::v0, 0, Reg::r30), "st2 {v31.b, v0.b}[0], [x30]"); + TEST_SINGLE(st2(VReg::v26, VReg::v27, 0, Reg::r30), "st2 {v26.b, v27.b}[0], [x30]"); TEST_SINGLE(st2(VReg::v26, VReg::v27, 0, Reg::r30), "st2 {v26.h, v27.h}[0], [x30]"); TEST_SINGLE(st2(VReg::v26, VReg::v27, 0, Reg::r30), "st2 {v26.s, v27.s}[0], [x30]"); TEST_SINGLE(st2(VReg::v26, VReg::v27, 0, Reg::r30), "st2 {v26.d, v27.d}[0], [x30]"); @@ -662,8 +750,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: ASIMD loadstore single") TEST_SINGLE(st2(VReg::v26, VReg::v27, 3, Reg::r30), "st2 {v26.s, v27.s}[3], [x30]"); TEST_SINGLE(st2(VReg::v26, VReg::v27, 1, Reg::r30), "st2 {v26.d, v27.d}[1], [x30]"); - TEST_SINGLE(ld3(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30), "ld3 {v31.b, v0.b, v1.b}[0], [x30]"); - TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "ld3 {v26.b, v27.b, v28.b}[0], [x30]"); + TEST_SINGLE(ld3(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30), "ld3 {v31.b, v0.b, v1.b}[0], [x30]"); + TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "ld3 {v26.b, v27.b, v28.b}[0], [x30]"); TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "ld3 {v26.h, v27.h, v28.h}[0], [x30]"); TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "ld3 {v26.s, v27.s, v28.s}[0], [x30]"); TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "ld3 {v26.d, v27.d, v28.d}[0], [x30]"); @@ -673,20 +761,20 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: ASIMD loadstore single") TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 3, Reg::r30), "ld3 {v26.s, v27.s, v28.s}[3], [x30]"); TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 1, Reg::r30), "ld3 {v26.d, v27.d, v28.d}[1], [x30]"); - TEST_SINGLE(ld3r(DReg::d31, DReg::d0, DReg::d1, Reg::r30), "ld3r {v31.8b, v0.8b, v1.8b}, [x30]"); - TEST_SINGLE(ld3r(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld3r {v26.8b, v27.8b, v28.8b}, [x30]"); + TEST_SINGLE(ld3r(DReg::d31, DReg::d0, DReg::d1, Reg::r30), "ld3r {v31.8b, v0.8b, v1.8b}, [x30]"); + TEST_SINGLE(ld3r(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld3r {v26.8b, v27.8b, v28.8b}, [x30]"); TEST_SINGLE(ld3r(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld3r {v26.4h, v27.4h, v28.4h}, [x30]"); TEST_SINGLE(ld3r(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld3r {v26.2s, v27.2s, v28.2s}, [x30]"); TEST_SINGLE(ld3r(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld3r {v26.1d, v27.1d, v28.1d}, [x30]"); - TEST_SINGLE(ld3r(QReg::q31, QReg::q0, QReg::q1, Reg::r30), "ld3r {v31.16b, v0.16b, v1.16b}, [x30]"); - TEST_SINGLE(ld3r(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld3r {v26.16b, v27.16b, v28.16b}, [x30]"); + TEST_SINGLE(ld3r(QReg::q31, QReg::q0, QReg::q1, Reg::r30), "ld3r {v31.16b, v0.16b, v1.16b}, [x30]"); + TEST_SINGLE(ld3r(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld3r {v26.16b, v27.16b, v28.16b}, [x30]"); TEST_SINGLE(ld3r(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld3r {v26.8h, v27.8h, v28.8h}, [x30]"); TEST_SINGLE(ld3r(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld3r {v26.4s, v27.4s, v28.4s}, [x30]"); TEST_SINGLE(ld3r(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld3r {v26.2d, v27.2d, v28.2d}, [x30]"); - TEST_SINGLE(st3(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30), "st3 {v31.b, v0.b, v1.b}[0], [x30]"); - TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "st3 {v26.b, v27.b, v28.b}[0], [x30]"); + TEST_SINGLE(st3(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30), "st3 {v31.b, v0.b, v1.b}[0], [x30]"); + TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "st3 {v26.b, v27.b, v28.b}[0], [x30]"); TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "st3 {v26.h, v27.h, v28.h}[0], [x30]"); TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "st3 {v26.s, v27.s, v28.s}[0], [x30]"); TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "st3 {v26.d, v27.d, v28.d}[0], [x30]"); @@ -696,42 +784,66 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: ASIMD loadstore single") TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 3, Reg::r30), "st3 {v26.s, v27.s, v28.s}[3], [x30]"); TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 1, Reg::r30), "st3 {v26.d, v27.d, v28.d}[1], [x30]"); - TEST_SINGLE(ld4(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30), "ld4 {v31.b, v0.b, v1.b, v2.b}[0], [x30]"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "ld4 {v26.b, v27.b, v28.b, v29.b}[0], [x30]"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "ld4 {v26.h, v27.h, v28.h, v29.h}[0], [x30]"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "ld4 {v26.s, v27.s, v28.s, v29.s}[0], [x30]"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "ld4 {v26.d, v27.d, v28.d, v29.d}[0], [x30]"); - - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30), "ld4 {v26.b, v27.b, v28.b, v29.b}[15], [x30]"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30), "ld4 {v26.h, v27.h, v28.h, v29.h}[7], [x30]"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30), "ld4 {v26.s, v27.s, v28.s, v29.s}[3], [x30]"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30), "ld4 {v26.d, v27.d, v28.d, v29.d}[1], [x30]"); - - TEST_SINGLE(ld4r(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30), "ld4r {v31.8b, v0.8b, v1.8b, v2.8b}, [x30]"); - TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld4r {v26.8b, v27.8b, v28.8b, v29.8b}, [x30]"); - TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld4r {v26.4h, v27.4h, v28.4h, v29.4h}, [x30]"); - TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld4r {v26.2s, v27.2s, v28.2s, v29.2s}, [x30]"); - TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld4r {v26.1d, v27.1d, v28.1d, v29.1d}, [x30]"); - - TEST_SINGLE(ld4r(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30), "ld4r {v31.16b, v0.16b, v1.16b, v2.16b}, [x30]"); - TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4r {v26.16b, v27.16b, v28.16b, v29.16b}, [x30]"); - TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4r {v26.8h, v27.8h, v28.8h, v29.8h}, [x30]"); - TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4r {v26.4s, v27.4s, v28.4s, v29.4s}, [x30]"); - TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4r {v26.2d, v27.2d, v28.2d, v29.2d}, [x30]"); - - TEST_SINGLE(st4(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30), "st4 {v31.b, v0.b, v1.b, v2.b}[0], [x30]"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "st4 {v26.b, v27.b, v28.b, v29.b}[0], [x30]"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "st4 {v26.h, v27.h, v28.h, v29.h}[0], [x30]"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "st4 {v26.s, v27.s, v28.s, v29.s}[0], [x30]"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "st4 {v26.d, v27.d, v28.d, v29.d}[0], [x30]"); - - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30), "st4 {v26.b, v27.b, v28.b, v29.b}[15], [x30]"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30), "st4 {v26.h, v27.h, v28.h, v29.h}[7], [x30]"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30), "st4 {v26.s, v27.s, v28.s, v29.s}[3], [x30]"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30), "st4 {v26.d, v27.d, v28.d, v29.d}[1], [x30]"); + TEST_SINGLE(ld4(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30), "ld4 {v31.b, v0.b, v1.b, v2.b}[0], [x30]"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "ld4 {v26.b, v27.b, v28.b, v29.b}[0], " + "[x30]"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "ld4 {v26.h, v27.h, v28.h, v29.h}[0], " + "[x30]"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "ld4 {v26.s, v27.s, v28.s, v29.s}[0], " + "[x30]"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "ld4 {v26.d, v27.d, v28.d, v29.d}[0], " + "[x30]"); + + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30), "ld4 {v26.b, v27.b, v28.b, v29.b}[15], " + "[x30]"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30), "ld4 {v26.h, v27.h, v28.h, v29.h}[7], " + "[x30]"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30), "ld4 {v26.s, v27.s, v28.s, v29.s}[3], " + "[x30]"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30), "ld4 {v26.d, v27.d, v28.d, v29.d}[1], " + "[x30]"); + + TEST_SINGLE(ld4r(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30), "ld4r {v31.8b, v0.8b, v1.8b, v2.8b}, [x30]"); + TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld4r {v26.8b, v27.8b, v28.8b, v29.8b}, " + "[x30]"); + TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld4r {v26.4h, v27.4h, v28.4h, v29.4h}, " + "[x30]"); + TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld4r {v26.2s, v27.2s, v28.2s, v29.2s}, " + "[x30]"); + TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld4r {v26.1d, v27.1d, v28.1d, v29.1d}, " + "[x30]"); + + TEST_SINGLE(ld4r(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30), "ld4r {v31.16b, v0.16b, v1.16b, v2.16b}, [x30]"); + TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4r {v26.16b, v27.16b, v28.16b, v29.16b}, " + "[x30]"); + TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4r {v26.8h, v27.8h, v28.8h, v29.8h}, " + "[x30]"); + TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4r {v26.4s, v27.4s, v28.4s, v29.4s}, " + "[x30]"); + TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4r {v26.2d, v27.2d, v28.2d, v29.2d}, " + "[x30]"); + + TEST_SINGLE(st4(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30), "st4 {v31.b, v0.b, v1.b, v2.b}[0], [x30]"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "st4 {v26.b, v27.b, v28.b, v29.b}[0], " + "[x30]"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "st4 {v26.h, v27.h, v28.h, v29.h}[0], " + "[x30]"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "st4 {v26.s, v27.s, v28.s, v29.s}[0], " + "[x30]"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "st4 {v26.d, v27.d, v28.d, v29.d}[0], " + "[x30]"); + + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30), "st4 {v26.b, v27.b, v28.b, v29.b}[15], " + "[x30]"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30), "st4 {v26.h, v27.h, v28.h, v29.h}[7], " + "[x30]"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30), "st4 {v26.s, v27.s, v28.s, v29.s}[3], " + "[x30]"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30), "st4 {v26.d, v27.d, v28.d, v29.d}[1], " + "[x30]"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store single structure (post-indexed)") { - TEST_SINGLE(ld1(VReg::v26, 0, Reg::r30, 1), "ld1 {v26.b}[0], [x30], #1"); + TEST_SINGLE(ld1(VReg::v26, 0, Reg::r30, 1), "ld1 {v26.b}[0], [x30], #1"); TEST_SINGLE(ld1(VReg::v26, 0, Reg::r30, 2), "ld1 {v26.h}[0], [x30], #2"); TEST_SINGLE(ld1(VReg::v26, 0, Reg::r30, 4), "ld1 {v26.s}[0], [x30], #4"); TEST_SINGLE(ld1(VReg::v26, 0, Reg::r30, 8), "ld1 {v26.d}[0], [x30], #8"); @@ -741,17 +853,17 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(ld1(VReg::v26, 3, Reg::r30, 4), "ld1 {v26.s}[3], [x30], #4"); TEST_SINGLE(ld1(VReg::v26, 1, Reg::r30, 8), "ld1 {v26.d}[1], [x30], #8"); - TEST_SINGLE(ld1r(DReg::d26, Reg::r30, 1), "ld1r {v26.8b}, [x30], #1"); + TEST_SINGLE(ld1r(DReg::d26, Reg::r30, 1), "ld1r {v26.8b}, [x30], #1"); TEST_SINGLE(ld1r(DReg::d26, Reg::r30, 2), "ld1r {v26.4h}, [x30], #2"); TEST_SINGLE(ld1r(DReg::d26, Reg::r30, 4), "ld1r {v26.2s}, [x30], #4"); TEST_SINGLE(ld1r(DReg::d26, Reg::r30, 8), "ld1r {v26.1d}, [x30], #8"); - TEST_SINGLE(ld1r(QReg::q26, Reg::r30, 1), "ld1r {v26.16b}, [x30], #1"); + TEST_SINGLE(ld1r(QReg::q26, Reg::r30, 1), "ld1r {v26.16b}, [x30], #1"); TEST_SINGLE(ld1r(QReg::q26, Reg::r30, 2), "ld1r {v26.8h}, [x30], #2"); TEST_SINGLE(ld1r(QReg::q26, Reg::r30, 4), "ld1r {v26.4s}, [x30], #4"); TEST_SINGLE(ld1r(QReg::q26, Reg::r30, 8), "ld1r {v26.2d}, [x30], #8"); - TEST_SINGLE(st1(VReg::v26, 0, Reg::r30, 1), "st1 {v26.b}[0], [x30], #1"); + TEST_SINGLE(st1(VReg::v26, 0, Reg::r30, 1), "st1 {v26.b}[0], [x30], #1"); TEST_SINGLE(st1(VReg::v26, 0, Reg::r30, 2), "st1 {v26.h}[0], [x30], #2"); TEST_SINGLE(st1(VReg::v26, 0, Reg::r30, 4), "st1 {v26.s}[0], [x30], #4"); TEST_SINGLE(st1(VReg::v26, 0, Reg::r30, 8), "st1 {v26.d}[0], [x30], #8"); @@ -761,239 +873,295 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store TEST_SINGLE(st1(VReg::v26, 3, Reg::r30, 4), "st1 {v26.s}[3], [x30], #4"); TEST_SINGLE(st1(VReg::v26, 1, Reg::r30, 8), "st1 {v26.d}[1], [x30], #8"); - TEST_SINGLE(ld2(VReg::v31, VReg::v0, 0, Reg::r30, 2), "ld2 {v31.b, v0.b}[0], [x30], #2"); - TEST_SINGLE(ld2(VReg::v26, VReg::v27, 0, Reg::r30, 2), "ld2 {v26.b, v27.b}[0], [x30], #2"); - TEST_SINGLE(ld2(VReg::v26, VReg::v27, 0, Reg::r30, 4), "ld2 {v26.h, v27.h}[0], [x30], #4"); - TEST_SINGLE(ld2(VReg::v26, VReg::v27, 0, Reg::r30, 8), "ld2 {v26.s, v27.s}[0], [x30], #8"); + TEST_SINGLE(ld2(VReg::v31, VReg::v0, 0, Reg::r30, 2), "ld2 {v31.b, v0.b}[0], [x30], #2"); + TEST_SINGLE(ld2(VReg::v26, VReg::v27, 0, Reg::r30, 2), "ld2 {v26.b, v27.b}[0], [x30], #2"); + TEST_SINGLE(ld2(VReg::v26, VReg::v27, 0, Reg::r30, 4), "ld2 {v26.h, v27.h}[0], [x30], #4"); + TEST_SINGLE(ld2(VReg::v26, VReg::v27, 0, Reg::r30, 8), "ld2 {v26.s, v27.s}[0], [x30], #8"); TEST_SINGLE(ld2(VReg::v26, VReg::v27, 0, Reg::r30, 16), "ld2 {v26.d, v27.d}[0], [x30], #16"); - TEST_SINGLE(ld2(VReg::v26, VReg::v27, 15, Reg::r30, 2), "ld2 {v26.b, v27.b}[15], [x30], #2"); - TEST_SINGLE(ld2(VReg::v26, VReg::v27, 7, Reg::r30, 4), "ld2 {v26.h, v27.h}[7], [x30], #4"); - TEST_SINGLE(ld2(VReg::v26, VReg::v27, 3, Reg::r30, 8), "ld2 {v26.s, v27.s}[3], [x30], #8"); + TEST_SINGLE(ld2(VReg::v26, VReg::v27, 15, Reg::r30, 2), "ld2 {v26.b, v27.b}[15], [x30], #2"); + TEST_SINGLE(ld2(VReg::v26, VReg::v27, 7, Reg::r30, 4), "ld2 {v26.h, v27.h}[7], [x30], #4"); + TEST_SINGLE(ld2(VReg::v26, VReg::v27, 3, Reg::r30, 8), "ld2 {v26.s, v27.s}[3], [x30], #8"); TEST_SINGLE(ld2(VReg::v26, VReg::v27, 1, Reg::r30, 16), "ld2 {v26.d, v27.d}[1], [x30], #16"); - TEST_SINGLE(ld2r(DReg::d31, DReg::d0, Reg::r30, 2), "ld2r {v31.8b, v0.8b}, [x30], #2"); - TEST_SINGLE(ld2r(DReg::d26, DReg::d27, Reg::r30, 2), "ld2r {v26.8b, v27.8b}, [x30], #2"); - TEST_SINGLE(ld2r(DReg::d26, DReg::d27, Reg::r30, 4), "ld2r {v26.4h, v27.4h}, [x30], #4"); - TEST_SINGLE(ld2r(DReg::d26, DReg::d27, Reg::r30, 8), "ld2r {v26.2s, v27.2s}, [x30], #8"); + TEST_SINGLE(ld2r(DReg::d31, DReg::d0, Reg::r30, 2), "ld2r {v31.8b, v0.8b}, [x30], #2"); + TEST_SINGLE(ld2r(DReg::d26, DReg::d27, Reg::r30, 2), "ld2r {v26.8b, v27.8b}, [x30], #2"); + TEST_SINGLE(ld2r(DReg::d26, DReg::d27, Reg::r30, 4), "ld2r {v26.4h, v27.4h}, [x30], #4"); + TEST_SINGLE(ld2r(DReg::d26, DReg::d27, Reg::r30, 8), "ld2r {v26.2s, v27.2s}, [x30], #8"); TEST_SINGLE(ld2r(DReg::d26, DReg::d27, Reg::r30, 16), "ld2r {v26.1d, v27.1d}, [x30], #16"); - TEST_SINGLE(ld2r(QReg::q31, QReg::q0, Reg::r30, 2), "ld2r {v31.16b, v0.16b}, [x30], #2"); - TEST_SINGLE(ld2r(QReg::q26, QReg::q27, Reg::r30, 2), "ld2r {v26.16b, v27.16b}, [x30], #2"); - TEST_SINGLE(ld2r(QReg::q26, QReg::q27, Reg::r30, 4), "ld2r {v26.8h, v27.8h}, [x30], #4"); - TEST_SINGLE(ld2r(QReg::q26, QReg::q27, Reg::r30, 8), "ld2r {v26.4s, v27.4s}, [x30], #8"); + TEST_SINGLE(ld2r(QReg::q31, QReg::q0, Reg::r30, 2), "ld2r {v31.16b, v0.16b}, [x30], #2"); + TEST_SINGLE(ld2r(QReg::q26, QReg::q27, Reg::r30, 2), "ld2r {v26.16b, v27.16b}, [x30], #2"); + TEST_SINGLE(ld2r(QReg::q26, QReg::q27, Reg::r30, 4), "ld2r {v26.8h, v27.8h}, [x30], #4"); + TEST_SINGLE(ld2r(QReg::q26, QReg::q27, Reg::r30, 8), "ld2r {v26.4s, v27.4s}, [x30], #8"); TEST_SINGLE(ld2r(QReg::q26, QReg::q27, Reg::r30, 16), "ld2r {v26.2d, v27.2d}, [x30], #16"); - TEST_SINGLE(st2(VReg::v31, VReg::v0, 0, Reg::r30, 2), "st2 {v31.b, v0.b}[0], [x30], #2"); - TEST_SINGLE(st2(VReg::v26, VReg::v27, 0, Reg::r30, 2), "st2 {v26.b, v27.b}[0], [x30], #2"); - TEST_SINGLE(st2(VReg::v26, VReg::v27, 0, Reg::r30, 4), "st2 {v26.h, v27.h}[0], [x30], #4"); - TEST_SINGLE(st2(VReg::v26, VReg::v27, 0, Reg::r30, 8), "st2 {v26.s, v27.s}[0], [x30], #8"); + TEST_SINGLE(st2(VReg::v31, VReg::v0, 0, Reg::r30, 2), "st2 {v31.b, v0.b}[0], [x30], #2"); + TEST_SINGLE(st2(VReg::v26, VReg::v27, 0, Reg::r30, 2), "st2 {v26.b, v27.b}[0], [x30], #2"); + TEST_SINGLE(st2(VReg::v26, VReg::v27, 0, Reg::r30, 4), "st2 {v26.h, v27.h}[0], [x30], #4"); + TEST_SINGLE(st2(VReg::v26, VReg::v27, 0, Reg::r30, 8), "st2 {v26.s, v27.s}[0], [x30], #8"); TEST_SINGLE(st2(VReg::v26, VReg::v27, 0, Reg::r30, 16), "st2 {v26.d, v27.d}[0], [x30], #16"); - TEST_SINGLE(st2(VReg::v26, VReg::v27, 15, Reg::r30, 2), "st2 {v26.b, v27.b}[15], [x30], #2"); - TEST_SINGLE(st2(VReg::v26, VReg::v27, 7, Reg::r30, 4), "st2 {v26.h, v27.h}[7], [x30], #4"); - TEST_SINGLE(st2(VReg::v26, VReg::v27, 3, Reg::r30, 8), "st2 {v26.s, v27.s}[3], [x30], #8"); - TEST_SINGLE(st2(VReg::v26, VReg::v27, 1, Reg::r30, 16), "st2 {v26.d, v27.d}[1], [x30], #16"); + TEST_SINGLE(st2(VReg::v26, VReg::v27, 15, Reg::r30, 2), "st2 {v26.b, v27.b}[15], [x30], #2"); + TEST_SINGLE(st2(VReg::v26, VReg::v27, 7, Reg::r30, 4), "st2 {v26.h, v27.h}[7], [x30], #4"); + TEST_SINGLE(st2(VReg::v26, VReg::v27, 3, Reg::r30, 8), "st2 {v26.s, v27.s}[3], [x30], #8"); + TEST_SINGLE(st2(VReg::v26, VReg::v27, 1, Reg::r30, 16), "st2 {v26.d, v27.d}[1], [x30], #16"); - TEST_SINGLE(ld3(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30, 3), "ld3 {v31.b, v0.b, v1.b}[0], [x30], #3"); - TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 3), "ld3 {v26.b, v27.b, v28.b}[0], [x30], #3"); - TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 6), "ld3 {v26.h, v27.h, v28.h}[0], [x30], #6"); + TEST_SINGLE(ld3(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30, 3), "ld3 {v31.b, v0.b, v1.b}[0], [x30], #3"); + TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 3), "ld3 {v26.b, v27.b, v28.b}[0], [x30], #3"); + TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 6), "ld3 {v26.h, v27.h, v28.h}[0], [x30], #6"); TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 12), "ld3 {v26.s, v27.s, v28.s}[0], [x30], #12"); TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 24), "ld3 {v26.d, v27.d, v28.d}[0], [x30], #24"); - TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 15, Reg::r30, 1), "ld3 {v26.b, v27.b, v28.b}[15], [x30], #3"); - TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 7, Reg::r30, 2), "ld3 {v26.h, v27.h, v28.h}[7], [x30], #6"); - TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 3, Reg::r30, 4), "ld3 {v26.s, v27.s, v28.s}[3], [x30], #12"); - TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 1, Reg::r30, 8), "ld3 {v26.d, v27.d, v28.d}[1], [x30], #24"); + TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 15, Reg::r30, 1), "ld3 {v26.b, v27.b, v28.b}[15], [x30], #3"); + TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 7, Reg::r30, 2), "ld3 {v26.h, v27.h, v28.h}[7], [x30], #6"); + TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 3, Reg::r30, 4), "ld3 {v26.s, v27.s, v28.s}[3], [x30], #12"); + TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 1, Reg::r30, 8), "ld3 {v26.d, v27.d, v28.d}[1], [x30], #24"); - TEST_SINGLE(ld3r(DReg::d31, DReg::d0, DReg::d1, Reg::r30, 3), "ld3r {v31.8b, v0.8b, v1.8b}, [x30], #3"); - TEST_SINGLE(ld3r(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 3), "ld3r {v26.8b, v27.8b, v28.8b}, [x30], #3"); - TEST_SINGLE(ld3r(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 6), "ld3r {v26.4h, v27.4h, v28.4h}, [x30], #6"); + TEST_SINGLE(ld3r(DReg::d31, DReg::d0, DReg::d1, Reg::r30, 3), "ld3r {v31.8b, v0.8b, v1.8b}, [x30], #3"); + TEST_SINGLE(ld3r(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 3), "ld3r {v26.8b, v27.8b, v28.8b}, [x30], #3"); + TEST_SINGLE(ld3r(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 6), "ld3r {v26.4h, v27.4h, v28.4h}, [x30], #6"); TEST_SINGLE(ld3r(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 12), "ld3r {v26.2s, v27.2s, v28.2s}, [x30], #12"); TEST_SINGLE(ld3r(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "ld3r {v26.1d, v27.1d, v28.1d}, [x30], #24"); - TEST_SINGLE(ld3r(QReg::q31, QReg::q0, QReg::q1, Reg::r30, 3), "ld3r {v31.16b, v0.16b, v1.16b}, [x30], #3"); - TEST_SINGLE(ld3r(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 3), "ld3r {v26.16b, v27.16b, v28.16b}, [x30], #3"); - TEST_SINGLE(ld3r(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 6), "ld3r {v26.8h, v27.8h, v28.8h}, [x30], #6"); + TEST_SINGLE(ld3r(QReg::q31, QReg::q0, QReg::q1, Reg::r30, 3), "ld3r {v31.16b, v0.16b, v1.16b}, [x30], #3"); + TEST_SINGLE(ld3r(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 3), "ld3r {v26.16b, v27.16b, v28.16b}, [x30], #3"); + TEST_SINGLE(ld3r(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 6), "ld3r {v26.8h, v27.8h, v28.8h}, [x30], #6"); TEST_SINGLE(ld3r(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 12), "ld3r {v26.4s, v27.4s, v28.4s}, [x30], #12"); TEST_SINGLE(ld3r(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 24), "ld3r {v26.2d, v27.2d, v28.2d}, [x30], #24"); - TEST_SINGLE(st3(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30, 3), "st3 {v31.b, v0.b, v1.b}[0], [x30], #3"); - TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 3), "st3 {v26.b, v27.b, v28.b}[0], [x30], #3"); - TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 6), "st3 {v26.h, v27.h, v28.h}[0], [x30], #6"); + TEST_SINGLE(st3(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30, 3), "st3 {v31.b, v0.b, v1.b}[0], [x30], #3"); + TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 3), "st3 {v26.b, v27.b, v28.b}[0], [x30], #3"); + TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 6), "st3 {v26.h, v27.h, v28.h}[0], [x30], #6"); TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 12), "st3 {v26.s, v27.s, v28.s}[0], [x30], #12"); TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 24), "st3 {v26.d, v27.d, v28.d}[0], [x30], #24"); - TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 15, Reg::r30, 3), "st3 {v26.b, v27.b, v28.b}[15], [x30], #3"); - TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 7, Reg::r30, 6), "st3 {v26.h, v27.h, v28.h}[7], [x30], #6"); - TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 3, Reg::r30, 12), "st3 {v26.s, v27.s, v28.s}[3], [x30], #12"); - TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 1, Reg::r30, 24), "st3 {v26.d, v27.d, v28.d}[1], [x30], #24"); - - TEST_SINGLE(ld4(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30, 4), "ld4 {v31.b, v0.b, v1.b, v2.b}[0], [x30], #4"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 4), "ld4 {v26.b, v27.b, v28.b, v29.b}[0], [x30], #4"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 8), "ld4 {v26.h, v27.h, v28.h, v29.h}[0], [x30], #8"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 16), "ld4 {v26.s, v27.s, v28.s, v29.s}[0], [x30], #16"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 32), "ld4 {v26.d, v27.d, v28.d, v29.d}[0], [x30], #32"); - - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30, 4), "ld4 {v26.b, v27.b, v28.b, v29.b}[15], [x30], #4"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30, 8), "ld4 {v26.h, v27.h, v28.h, v29.h}[7], [x30], #8"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30, 16), "ld4 {v26.s, v27.s, v28.s, v29.s}[3], [x30], #16"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30, 32), "ld4 {v26.d, v27.d, v28.d, v29.d}[1], [x30], #32"); - - TEST_SINGLE(ld4r(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, 4), "ld4r {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], #4"); - TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 4), "ld4r {v26.8b, v27.8b, v28.8b, v29.8b}, [x30], #4"); - TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 8), "ld4r {v26.4h, v27.4h, v28.4h, v29.4h}, [x30], #8"); - TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 16), "ld4r {v26.2s, v27.2s, v28.2s, v29.2s}, [x30], #16"); - TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld4r {v26.1d, v27.1d, v28.1d, v29.1d}, [x30], #32"); - - TEST_SINGLE(ld4r(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, 4), "ld4r {v31.16b, v0.16b, v1.16b, v2.16b}, [x30], #4"); - TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 4), "ld4r {v26.16b, v27.16b, v28.16b, v29.16b}, [x30], #4"); - TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 8), "ld4r {v26.8h, v27.8h, v28.8h, v29.8h}, [x30], #8"); - TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 16), "ld4r {v26.4s, v27.4s, v28.4s, v29.4s}, [x30], #16"); - TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 32), "ld4r {v26.2d, v27.2d, v28.2d, v29.2d}, [x30], #32"); - - TEST_SINGLE(st4(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30, 4), "st4 {v31.b, v0.b, v1.b, v2.b}[0], [x30], #4"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 4), "st4 {v26.b, v27.b, v28.b, v29.b}[0], [x30], #4"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 8), "st4 {v26.h, v27.h, v28.h, v29.h}[0], [x30], #8"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 16), "st4 {v26.s, v27.s, v28.s, v29.s}[0], [x30], #16"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 32), "st4 {v26.d, v27.d, v28.d, v29.d}[0], [x30], #32"); - - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30, 4), "st4 {v26.b, v27.b, v28.b, v29.b}[15], [x30], #4"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30, 8), "st4 {v26.h, v27.h, v28.h, v29.h}[7], [x30], #8"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30, 16), "st4 {v26.s, v27.s, v28.s, v29.s}[3], [x30], #16"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30, 32), "st4 {v26.d, v27.d, v28.d, v29.d}[1], [x30], #32"); - - TEST_SINGLE(ld1(VReg::v26, 0, Reg::r30, Reg::r29), "ld1 {v26.b}[0], [x30], x29"); + TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 15, Reg::r30, 3), "st3 {v26.b, v27.b, v28.b}[15], [x30], #3"); + TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 7, Reg::r30, 6), "st3 {v26.h, v27.h, v28.h}[7], [x30], #6"); + TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 3, Reg::r30, 12), "st3 {v26.s, v27.s, v28.s}[3], [x30], #12"); + TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 1, Reg::r30, 24), "st3 {v26.d, v27.d, v28.d}[1], [x30], #24"); + + TEST_SINGLE(ld4(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30, 4), "ld4 {v31.b, v0.b, v1.b, v2.b}[0], [x30], " + "#4"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 4), "ld4 {v26.b, v27.b, v28.b, v29.b}[0], " + "[x30], #4"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 8), "ld4 {v26.h, v27.h, v28.h, v29.h}[0], " + "[x30], #8"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 16), "ld4 {v26.s, v27.s, v28.s, v29.s}[0], " + "[x30], #16"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 32), "ld4 {v26.d, v27.d, v28.d, v29.d}[0], " + "[x30], #32"); + + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30, 4), "ld4 {v26.b, v27.b, v28.b, v29.b}[15], " + "[x30], #4"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30, 8), "ld4 {v26.h, v27.h, v28.h, v29.h}[7], " + "[x30], #8"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30, 16), "ld4 {v26.s, v27.s, v28.s, v29.s}[3], " + "[x30], #16"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30, 32), "ld4 {v26.d, v27.d, v28.d, v29.d}[1], " + "[x30], #32"); + + TEST_SINGLE(ld4r(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, 4), "ld4r {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], " + "#4"); + TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 4), "ld4r {v26.8b, v27.8b, v28.8b, v29.8b}, " + "[x30], #4"); + TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 8), "ld4r {v26.4h, v27.4h, v28.4h, v29.4h}, " + "[x30], #8"); + TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 16), "ld4r {v26.2s, v27.2s, v28.2s, v29.2s}, " + "[x30], #16"); + TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld4r {v26.1d, v27.1d, v28.1d, v29.1d}, " + "[x30], #32"); + + TEST_SINGLE(ld4r(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, 4), "ld4r {v31.16b, v0.16b, v1.16b, v2.16b}, " + "[x30], #4"); + TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 4), "ld4r {v26.16b, v27.16b, v28.16b, " + "v29.16b}, [x30], #4"); + TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 8), "ld4r {v26.8h, v27.8h, v28.8h, v29.8h}, " + "[x30], #8"); + TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 16), "ld4r {v26.4s, v27.4s, v28.4s, v29.4s}, " + "[x30], #16"); + TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 32), "ld4r {v26.2d, v27.2d, v28.2d, v29.2d}, " + "[x30], #32"); + + TEST_SINGLE(st4(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30, 4), "st4 {v31.b, v0.b, v1.b, v2.b}[0], [x30], " + "#4"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 4), "st4 {v26.b, v27.b, v28.b, v29.b}[0], " + "[x30], #4"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 8), "st4 {v26.h, v27.h, v28.h, v29.h}[0], " + "[x30], #8"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 16), "st4 {v26.s, v27.s, v28.s, v29.s}[0], " + "[x30], #16"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 32), "st4 {v26.d, v27.d, v28.d, v29.d}[0], " + "[x30], #32"); + + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30, 4), "st4 {v26.b, v27.b, v28.b, v29.b}[15], " + "[x30], #4"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30, 8), "st4 {v26.h, v27.h, v28.h, v29.h}[7], " + "[x30], #8"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30, 16), "st4 {v26.s, v27.s, v28.s, v29.s}[3], " + "[x30], #16"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30, 32), "st4 {v26.d, v27.d, v28.d, v29.d}[1], " + "[x30], #32"); + + TEST_SINGLE(ld1(VReg::v26, 0, Reg::r30, Reg::r29), "ld1 {v26.b}[0], [x30], x29"); TEST_SINGLE(ld1(VReg::v26, 0, Reg::r30, Reg::r29), "ld1 {v26.h}[0], [x30], x29"); TEST_SINGLE(ld1(VReg::v26, 0, Reg::r30, Reg::r29), "ld1 {v26.s}[0], [x30], x29"); TEST_SINGLE(ld1(VReg::v26, 0, Reg::r30, Reg::r29), "ld1 {v26.d}[0], [x30], x29"); - TEST_SINGLE(ld1(VReg::v26, 15, Reg::r30, Reg::r29), "ld1 {v26.b}[15], [x30], x29"); + TEST_SINGLE(ld1(VReg::v26, 15, Reg::r30, Reg::r29), "ld1 {v26.b}[15], [x30], x29"); TEST_SINGLE(ld1(VReg::v26, 7, Reg::r30, Reg::r29), "ld1 {v26.h}[7], [x30], x29"); TEST_SINGLE(ld1(VReg::v26, 3, Reg::r30, Reg::r29), "ld1 {v26.s}[3], [x30], x29"); TEST_SINGLE(ld1(VReg::v26, 1, Reg::r30, Reg::r29), "ld1 {v26.d}[1], [x30], x29"); - TEST_SINGLE(ld1r(DReg::d26, Reg::r30, Reg::r29), "ld1r {v26.8b}, [x30], x29"); + TEST_SINGLE(ld1r(DReg::d26, Reg::r30, Reg::r29), "ld1r {v26.8b}, [x30], x29"); TEST_SINGLE(ld1r(DReg::d26, Reg::r30, Reg::r29), "ld1r {v26.4h}, [x30], x29"); TEST_SINGLE(ld1r(DReg::d26, Reg::r30, Reg::r29), "ld1r {v26.2s}, [x30], x29"); TEST_SINGLE(ld1r(DReg::d26, Reg::r30, Reg::r29), "ld1r {v26.1d}, [x30], x29"); - TEST_SINGLE(ld1r(QReg::q26, Reg::r30, Reg::r29), "ld1r {v26.16b}, [x30], x29"); + TEST_SINGLE(ld1r(QReg::q26, Reg::r30, Reg::r29), "ld1r {v26.16b}, [x30], x29"); TEST_SINGLE(ld1r(QReg::q26, Reg::r30, Reg::r29), "ld1r {v26.8h}, [x30], x29"); TEST_SINGLE(ld1r(QReg::q26, Reg::r30, Reg::r29), "ld1r {v26.4s}, [x30], x29"); TEST_SINGLE(ld1r(QReg::q26, Reg::r30, Reg::r29), "ld1r {v26.2d}, [x30], x29"); - TEST_SINGLE(st1(VReg::v26, 0, Reg::r30, Reg::r29), "st1 {v26.b}[0], [x30], x29"); + TEST_SINGLE(st1(VReg::v26, 0, Reg::r30, Reg::r29), "st1 {v26.b}[0], [x30], x29"); TEST_SINGLE(st1(VReg::v26, 0, Reg::r30, Reg::r29), "st1 {v26.h}[0], [x30], x29"); TEST_SINGLE(st1(VReg::v26, 0, Reg::r30, Reg::r29), "st1 {v26.s}[0], [x30], x29"); TEST_SINGLE(st1(VReg::v26, 0, Reg::r30, Reg::r29), "st1 {v26.d}[0], [x30], x29"); - TEST_SINGLE(st1(VReg::v26, 15, Reg::r30, Reg::r29), "st1 {v26.b}[15], [x30], x29"); + TEST_SINGLE(st1(VReg::v26, 15, Reg::r30, Reg::r29), "st1 {v26.b}[15], [x30], x29"); TEST_SINGLE(st1(VReg::v26, 7, Reg::r30, Reg::r29), "st1 {v26.h}[7], [x30], x29"); TEST_SINGLE(st1(VReg::v26, 3, Reg::r30, Reg::r29), "st1 {v26.s}[3], [x30], x29"); TEST_SINGLE(st1(VReg::v26, 1, Reg::r30, Reg::r29), "st1 {v26.d}[1], [x30], x29"); - TEST_SINGLE(ld2(VReg::v31, VReg::v0, 0, Reg::r30, Reg::r29), "ld2 {v31.b, v0.b}[0], [x30], x29"); - TEST_SINGLE(ld2(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "ld2 {v26.b, v27.b}[0], [x30], x29"); + TEST_SINGLE(ld2(VReg::v31, VReg::v0, 0, Reg::r30, Reg::r29), "ld2 {v31.b, v0.b}[0], [x30], x29"); + TEST_SINGLE(ld2(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "ld2 {v26.b, v27.b}[0], [x30], x29"); TEST_SINGLE(ld2(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "ld2 {v26.h, v27.h}[0], [x30], x29"); TEST_SINGLE(ld2(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "ld2 {v26.s, v27.s}[0], [x30], x29"); TEST_SINGLE(ld2(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "ld2 {v26.d, v27.d}[0], [x30], x29"); - TEST_SINGLE(ld2(VReg::v26, VReg::v27, 15, Reg::r30, Reg::r29), "ld2 {v26.b, v27.b}[15], [x30], x29"); - TEST_SINGLE(ld2(VReg::v26, VReg::v27, 7, Reg::r30, Reg::r29), "ld2 {v26.h, v27.h}[7], [x30], x29"); - TEST_SINGLE(ld2(VReg::v26, VReg::v27, 3, Reg::r30, Reg::r29), "ld2 {v26.s, v27.s}[3], [x30], x29"); - TEST_SINGLE(ld2(VReg::v26, VReg::v27, 1, Reg::r30, Reg::r29), "ld2 {v26.d, v27.d}[1], [x30], x29"); + TEST_SINGLE(ld2(VReg::v26, VReg::v27, 15, Reg::r30, Reg::r29), "ld2 {v26.b, v27.b}[15], [x30], x29"); + TEST_SINGLE(ld2(VReg::v26, VReg::v27, 7, Reg::r30, Reg::r29), "ld2 {v26.h, v27.h}[7], [x30], x29"); + TEST_SINGLE(ld2(VReg::v26, VReg::v27, 3, Reg::r30, Reg::r29), "ld2 {v26.s, v27.s}[3], [x30], x29"); + TEST_SINGLE(ld2(VReg::v26, VReg::v27, 1, Reg::r30, Reg::r29), "ld2 {v26.d, v27.d}[1], [x30], x29"); - TEST_SINGLE(ld2r(DReg::d31, DReg::d0, Reg::r30, Reg::r29), "ld2r {v31.8b, v0.8b}, [x30], x29"); - TEST_SINGLE(ld2r(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld2r {v26.8b, v27.8b}, [x30], x29"); + TEST_SINGLE(ld2r(DReg::d31, DReg::d0, Reg::r30, Reg::r29), "ld2r {v31.8b, v0.8b}, [x30], x29"); + TEST_SINGLE(ld2r(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld2r {v26.8b, v27.8b}, [x30], x29"); TEST_SINGLE(ld2r(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld2r {v26.4h, v27.4h}, [x30], x29"); TEST_SINGLE(ld2r(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld2r {v26.2s, v27.2s}, [x30], x29"); TEST_SINGLE(ld2r(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld2r {v26.1d, v27.1d}, [x30], x29"); - TEST_SINGLE(ld2r(QReg::q31, QReg::q0, Reg::r30, Reg::r29), "ld2r {v31.16b, v0.16b}, [x30], x29"); - TEST_SINGLE(ld2r(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld2r {v26.16b, v27.16b}, [x30], x29"); + TEST_SINGLE(ld2r(QReg::q31, QReg::q0, Reg::r30, Reg::r29), "ld2r {v31.16b, v0.16b}, [x30], x29"); + TEST_SINGLE(ld2r(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld2r {v26.16b, v27.16b}, [x30], x29"); TEST_SINGLE(ld2r(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld2r {v26.8h, v27.8h}, [x30], x29"); TEST_SINGLE(ld2r(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld2r {v26.4s, v27.4s}, [x30], x29"); TEST_SINGLE(ld2r(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld2r {v26.2d, v27.2d}, [x30], x29"); - TEST_SINGLE(st2(VReg::v31, VReg::v0, 0, Reg::r30, Reg::r29), "st2 {v31.b, v0.b}[0], [x30], x29"); - TEST_SINGLE(st2(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "st2 {v26.b, v27.b}[0], [x30], x29"); + TEST_SINGLE(st2(VReg::v31, VReg::v0, 0, Reg::r30, Reg::r29), "st2 {v31.b, v0.b}[0], [x30], x29"); + TEST_SINGLE(st2(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "st2 {v26.b, v27.b}[0], [x30], x29"); TEST_SINGLE(st2(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "st2 {v26.h, v27.h}[0], [x30], x29"); TEST_SINGLE(st2(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "st2 {v26.s, v27.s}[0], [x30], x29"); TEST_SINGLE(st2(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "st2 {v26.d, v27.d}[0], [x30], x29"); - TEST_SINGLE(st2(VReg::v26, VReg::v27, 15, Reg::r30, Reg::r29), "st2 {v26.b, v27.b}[15], [x30], x29"); - TEST_SINGLE(st2(VReg::v26, VReg::v27, 7, Reg::r30, Reg::r29), "st2 {v26.h, v27.h}[7], [x30], x29"); - TEST_SINGLE(st2(VReg::v26, VReg::v27, 3, Reg::r30, Reg::r29), "st2 {v26.s, v27.s}[3], [x30], x29"); - TEST_SINGLE(st2(VReg::v26, VReg::v27, 1, Reg::r30, Reg::r29), "st2 {v26.d, v27.d}[1], [x30], x29"); + TEST_SINGLE(st2(VReg::v26, VReg::v27, 15, Reg::r30, Reg::r29), "st2 {v26.b, v27.b}[15], [x30], x29"); + TEST_SINGLE(st2(VReg::v26, VReg::v27, 7, Reg::r30, Reg::r29), "st2 {v26.h, v27.h}[7], [x30], x29"); + TEST_SINGLE(st2(VReg::v26, VReg::v27, 3, Reg::r30, Reg::r29), "st2 {v26.s, v27.s}[3], [x30], x29"); + TEST_SINGLE(st2(VReg::v26, VReg::v27, 1, Reg::r30, Reg::r29), "st2 {v26.d, v27.d}[1], [x30], x29"); - TEST_SINGLE(ld3(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30, Reg::r29), "ld3 {v31.b, v0.b, v1.b}[0], [x30], x29"); - TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "ld3 {v26.b, v27.b, v28.b}[0], [x30], x29"); + TEST_SINGLE(ld3(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30, Reg::r29), "ld3 {v31.b, v0.b, v1.b}[0], [x30], x29"); + TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "ld3 {v26.b, v27.b, v28.b}[0], [x30], x29"); TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "ld3 {v26.h, v27.h, v28.h}[0], [x30], x29"); TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "ld3 {v26.s, v27.s, v28.s}[0], [x30], x29"); TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "ld3 {v26.d, v27.d, v28.d}[0], [x30], x29"); - TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 15, Reg::r30, Reg::r29), "ld3 {v26.b, v27.b, v28.b}[15], [x30], x29"); - TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 7, Reg::r30, Reg::r29), "ld3 {v26.h, v27.h, v28.h}[7], [x30], x29"); - TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 3, Reg::r30, Reg::r29), "ld3 {v26.s, v27.s, v28.s}[3], [x30], x29"); - TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 1, Reg::r30, Reg::r29), "ld3 {v26.d, v27.d, v28.d}[1], [x30], x29"); + TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 15, Reg::r30, Reg::r29), "ld3 {v26.b, v27.b, v28.b}[15], [x30], x29"); + TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 7, Reg::r30, Reg::r29), "ld3 {v26.h, v27.h, v28.h}[7], [x30], x29"); + TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 3, Reg::r30, Reg::r29), "ld3 {v26.s, v27.s, v28.s}[3], [x30], x29"); + TEST_SINGLE(ld3(VReg::v26, VReg::v27, VReg::v28, 1, Reg::r30, Reg::r29), "ld3 {v26.d, v27.d, v28.d}[1], [x30], x29"); - TEST_SINGLE(ld3r(DReg::d31, DReg::d0, DReg::d1, Reg::r30, Reg::r29), "ld3r {v31.8b, v0.8b, v1.8b}, [x30], x29"); - TEST_SINGLE(ld3r(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld3r {v26.8b, v27.8b, v28.8b}, [x30], x29"); + TEST_SINGLE(ld3r(DReg::d31, DReg::d0, DReg::d1, Reg::r30, Reg::r29), "ld3r {v31.8b, v0.8b, v1.8b}, [x30], x29"); + TEST_SINGLE(ld3r(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld3r {v26.8b, v27.8b, v28.8b}, [x30], x29"); TEST_SINGLE(ld3r(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld3r {v26.4h, v27.4h, v28.4h}, [x30], x29"); TEST_SINGLE(ld3r(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld3r {v26.2s, v27.2s, v28.2s}, [x30], x29"); TEST_SINGLE(ld3r(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld3r {v26.1d, v27.1d, v28.1d}, [x30], x29"); - TEST_SINGLE(ld3r(QReg::q31, QReg::q0, QReg::q1, Reg::r30, Reg::r29), "ld3r {v31.16b, v0.16b, v1.16b}, [x30], x29"); - TEST_SINGLE(ld3r(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld3r {v26.16b, v27.16b, v28.16b}, [x30], x29"); + TEST_SINGLE(ld3r(QReg::q31, QReg::q0, QReg::q1, Reg::r30, Reg::r29), "ld3r {v31.16b, v0.16b, v1.16b}, [x30], x29"); + TEST_SINGLE(ld3r(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld3r {v26.16b, v27.16b, v28.16b}, [x30], x29"); TEST_SINGLE(ld3r(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld3r {v26.8h, v27.8h, v28.8h}, [x30], x29"); TEST_SINGLE(ld3r(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld3r {v26.4s, v27.4s, v28.4s}, [x30], x29"); TEST_SINGLE(ld3r(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld3r {v26.2d, v27.2d, v28.2d}, [x30], x29"); - TEST_SINGLE(st3(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30, Reg::r29), "st3 {v31.b, v0.b, v1.b}[0], [x30], x29"); - TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "st3 {v26.b, v27.b, v28.b}[0], [x30], x29"); + TEST_SINGLE(st3(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30, Reg::r29), "st3 {v31.b, v0.b, v1.b}[0], [x30], x29"); + TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "st3 {v26.b, v27.b, v28.b}[0], [x30], x29"); TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "st3 {v26.h, v27.h, v28.h}[0], [x30], x29"); TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "st3 {v26.s, v27.s, v28.s}[0], [x30], x29"); TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "st3 {v26.d, v27.d, v28.d}[0], [x30], x29"); - TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 15, Reg::r30, Reg::r29), "st3 {v26.b, v27.b, v28.b}[15], [x30], x29"); - TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 7, Reg::r30, Reg::r29), "st3 {v26.h, v27.h, v28.h}[7], [x30], x29"); - TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 3, Reg::r30, Reg::r29), "st3 {v26.s, v27.s, v28.s}[3], [x30], x29"); - TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 1, Reg::r30, Reg::r29), "st3 {v26.d, v27.d, v28.d}[1], [x30], x29"); - - TEST_SINGLE(ld4(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30, Reg::r29), "ld4 {v31.b, v0.b, v1.b, v2.b}[0], [x30], x29"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "ld4 {v26.b, v27.b, v28.b, v29.b}[0], [x30], x29"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "ld4 {v26.h, v27.h, v28.h, v29.h}[0], [x30], x29"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "ld4 {v26.s, v27.s, v28.s, v29.s}[0], [x30], x29"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "ld4 {v26.d, v27.d, v28.d, v29.d}[0], [x30], x29"); - - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30, Reg::r29), "ld4 {v26.b, v27.b, v28.b, v29.b}[15], [x30], x29"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30, Reg::r29), "ld4 {v26.h, v27.h, v28.h, v29.h}[7], [x30], x29"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30, Reg::r29), "ld4 {v26.s, v27.s, v28.s, v29.s}[3], [x30], x29"); - TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30, Reg::r29), "ld4 {v26.d, v27.d, v28.d, v29.d}[1], [x30], x29"); - - TEST_SINGLE(ld4r(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, Reg::r29), "ld4r {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], x29"); - TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4r {v26.8b, v27.8b, v28.8b, v29.8b}, [x30], x29"); - TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4r {v26.4h, v27.4h, v28.4h, v29.4h}, [x30], x29"); - TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4r {v26.2s, v27.2s, v28.2s, v29.2s}, [x30], x29"); - TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4r {v26.1d, v27.1d, v28.1d, v29.1d}, [x30], x29"); - - TEST_SINGLE(ld4r(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, Reg::r29), "ld4r {v31.16b, v0.16b, v1.16b, v2.16b}, [x30], x29"); - TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4r {v26.16b, v27.16b, v28.16b, v29.16b}, [x30], x29"); - TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4r {v26.8h, v27.8h, v28.8h, v29.8h}, [x30], x29"); - TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4r {v26.4s, v27.4s, v28.4s, v29.4s}, [x30], x29"); - TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4r {v26.2d, v27.2d, v28.2d, v29.2d}, [x30], x29"); - - TEST_SINGLE(st4(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30, Reg::r29), "st4 {v31.b, v0.b, v1.b, v2.b}[0], [x30], x29"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "st4 {v26.b, v27.b, v28.b, v29.b}[0], [x30], x29"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "st4 {v26.h, v27.h, v28.h, v29.h}[0], [x30], x29"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "st4 {v26.s, v27.s, v28.s, v29.s}[0], [x30], x29"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "st4 {v26.d, v27.d, v28.d, v29.d}[0], [x30], x29"); - - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30, Reg::r29), "st4 {v26.b, v27.b, v28.b, v29.b}[15], [x30], x29"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30, Reg::r29), "st4 {v26.h, v27.h, v28.h, v29.h}[7], [x30], x29"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30, Reg::r29), "st4 {v26.s, v27.s, v28.s, v29.s}[3], [x30], x29"); - TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30, Reg::r29), "st4 {v26.d, v27.d, v28.d, v29.d}[1], [x30], x29"); + TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 15, Reg::r30, Reg::r29), "st3 {v26.b, v27.b, v28.b}[15], [x30], x29"); + TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 7, Reg::r30, Reg::r29), "st3 {v26.h, v27.h, v28.h}[7], [x30], x29"); + TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 3, Reg::r30, Reg::r29), "st3 {v26.s, v27.s, v28.s}[3], [x30], x29"); + TEST_SINGLE(st3(VReg::v26, VReg::v27, VReg::v28, 1, Reg::r30, Reg::r29), "st3 {v26.d, v27.d, v28.d}[1], [x30], x29"); + + TEST_SINGLE(ld4(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30, Reg::r29), "ld4 {v31.b, v0.b, v1.b, v2.b}[0], " + "[x30], x29"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "ld4 {v26.b, v27.b, v28.b, " + "v29.b}[0], [x30], x29"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "ld4 {v26.h, v27.h, v28.h, " + "v29.h}[0], [x30], x29"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "ld4 {v26.s, v27.s, v28.s, " + "v29.s}[0], [x30], x29"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "ld4 {v26.d, v27.d, v28.d, " + "v29.d}[0], [x30], x29"); + + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30, Reg::r29), "ld4 {v26.b, v27.b, v28.b, " + "v29.b}[15], [x30], x29"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30, Reg::r29), "ld4 {v26.h, v27.h, v28.h, " + "v29.h}[7], [x30], x29"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30, Reg::r29), "ld4 {v26.s, v27.s, v28.s, " + "v29.s}[3], [x30], x29"); + TEST_SINGLE(ld4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30, Reg::r29), "ld4 {v26.d, v27.d, v28.d, " + "v29.d}[1], [x30], x29"); + + TEST_SINGLE(ld4r(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, Reg::r29), "ld4r {v31.8b, v0.8b, v1.8b, v2.8b}, " + "[x30], x29"); + TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4r {v26.8b, v27.8b, v28.8b, " + "v29.8b}, [x30], x29"); + TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4r {v26.4h, v27.4h, v28.4h, " + "v29.4h}, [x30], x29"); + TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4r {v26.2s, v27.2s, v28.2s, " + "v29.2s}, [x30], x29"); + TEST_SINGLE(ld4r(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4r {v26.1d, v27.1d, v28.1d, " + "v29.1d}, [x30], x29"); + + TEST_SINGLE(ld4r(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, Reg::r29), "ld4r {v31.16b, v0.16b, v1.16b, " + "v2.16b}, [x30], x29"); + TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4r {v26.16b, v27.16b, v28.16b, " + "v29.16b}, [x30], x29"); + TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4r {v26.8h, v27.8h, v28.8h, " + "v29.8h}, [x30], x29"); + TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4r {v26.4s, v27.4s, v28.4s, " + "v29.4s}, [x30], x29"); + TEST_SINGLE(ld4r(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4r {v26.2d, v27.2d, v28.2d, " + "v29.2d}, [x30], x29"); + + TEST_SINGLE(st4(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30, Reg::r29), "st4 {v31.b, v0.b, v1.b, v2.b}[0], " + "[x30], x29"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "st4 {v26.b, v27.b, v28.b, " + "v29.b}[0], [x30], x29"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "st4 {v26.h, v27.h, v28.h, " + "v29.h}[0], [x30], x29"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "st4 {v26.s, v27.s, v28.s, " + "v29.s}[0], [x30], x29"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "st4 {v26.d, v27.d, v28.d, " + "v29.d}[0], [x30], x29"); + + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30, Reg::r29), "st4 {v26.b, v27.b, v28.b, " + "v29.b}[15], [x30], x29"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30, Reg::r29), "st4 {v26.h, v27.h, v28.h, " + "v29.h}[7], [x30], x29"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30, Reg::r29), "st4 {v26.s, v27.s, v28.s, " + "v29.s}[3], [x30], x29"); + TEST_SINGLE(st4(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30, Reg::r29), "st4 {v26.d, v27.d, v28.d, " + "v29.d}[1], [x30], x29"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Loadstore exclusive pair") { TEST_SINGLE(stxp(Size::i32Bit, Reg::r28, Reg::r29, Reg::r30, Reg::r28), "stxp w28, w29, w30, [x28]"); @@ -1115,41 +1283,41 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Compare and swap") { } TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: LDAPR/STLR unscaled immediate") { TEST_SINGLE(stlurb(Reg::r30, Reg::r29, -256), "stlurb w30, [x29, #-256]"); - TEST_SINGLE(stlurb(Reg::r30, Reg::r29, 255), "stlurb w30, [x29, #255]"); + TEST_SINGLE(stlurb(Reg::r30, Reg::r29, 255), "stlurb w30, [x29, #255]"); TEST_SINGLE(ldapurb(Reg::r30, Reg::r29, -256), "ldapurb w30, [x29, #-256]"); - TEST_SINGLE(ldapurb(Reg::r30, Reg::r29, 255), "ldapurb w30, [x29, #255]"); + TEST_SINGLE(ldapurb(Reg::r30, Reg::r29, 255), "ldapurb w30, [x29, #255]"); TEST_SINGLE(ldapursb(WReg::w30, Reg::r29, -256), "ldapursb w30, [x29, #-256]"); - TEST_SINGLE(ldapursb(WReg::w30, Reg::r29, 255), "ldapursb w30, [x29, #255]"); + TEST_SINGLE(ldapursb(WReg::w30, Reg::r29, 255), "ldapursb w30, [x29, #255]"); TEST_SINGLE(ldapursb(XReg::x30, Reg::r29, -256), "ldapursb x30, [x29, #-256]"); - TEST_SINGLE(ldapursb(XReg::x30, Reg::r29, 255), "ldapursb x30, [x29, #255]"); + TEST_SINGLE(ldapursb(XReg::x30, Reg::r29, 255), "ldapursb x30, [x29, #255]"); TEST_SINGLE(stlurh(Reg::r30, Reg::r29, -256), "stlurh w30, [x29, #-256]"); - TEST_SINGLE(stlurh(Reg::r30, Reg::r29, 255), "stlurh w30, [x29, #255]"); + TEST_SINGLE(stlurh(Reg::r30, Reg::r29, 255), "stlurh w30, [x29, #255]"); TEST_SINGLE(ldapurh(Reg::r30, Reg::r29, -256), "ldapurh w30, [x29, #-256]"); - TEST_SINGLE(ldapurh(Reg::r30, Reg::r29, 255), "ldapurh w30, [x29, #255]"); + TEST_SINGLE(ldapurh(Reg::r30, Reg::r29, 255), "ldapurh w30, [x29, #255]"); TEST_SINGLE(ldapursh(WReg::w30, Reg::r29, -256), "ldapursh w30, [x29, #-256]"); - TEST_SINGLE(ldapursh(WReg::w30, Reg::r29, 255), "ldapursh w30, [x29, #255]"); + TEST_SINGLE(ldapursh(WReg::w30, Reg::r29, 255), "ldapursh w30, [x29, #255]"); TEST_SINGLE(ldapursh(XReg::x30, Reg::r29, -256), "ldapursh x30, [x29, #-256]"); - TEST_SINGLE(ldapursh(XReg::x30, Reg::r29, 255), "ldapursh x30, [x29, #255]"); + TEST_SINGLE(ldapursh(XReg::x30, Reg::r29, 255), "ldapursh x30, [x29, #255]"); TEST_SINGLE(stlur(WReg::w30, Reg::r29, -256), "stlur w30, [x29, #-256]"); - TEST_SINGLE(stlur(WReg::w30, Reg::r29, 255), "stlur w30, [x29, #255]"); + TEST_SINGLE(stlur(WReg::w30, Reg::r29, 255), "stlur w30, [x29, #255]"); TEST_SINGLE(ldapur(WReg::w30, Reg::r29, -256), "ldapur w30, [x29, #-256]"); - TEST_SINGLE(ldapur(WReg::w30, Reg::r29, 255), "ldapur w30, [x29, #255]"); + TEST_SINGLE(ldapur(WReg::w30, Reg::r29, 255), "ldapur w30, [x29, #255]"); TEST_SINGLE(ldapursw(XReg::x30, Reg::r29, -256), "ldapursw x30, [x29, #-256]"); - TEST_SINGLE(ldapursw(XReg::x30, Reg::r29, 255), "ldapursw x30, [x29, #255]"); + TEST_SINGLE(ldapursw(XReg::x30, Reg::r29, 255), "ldapursw x30, [x29, #255]"); TEST_SINGLE(stlur(XReg::x30, Reg::r29, -256), "stlur x30, [x29, #-256]"); - TEST_SINGLE(stlur(XReg::x30, Reg::r29, 255), "stlur x30, [x29, #255]"); + TEST_SINGLE(stlur(XReg::x30, Reg::r29, 255), "stlur x30, [x29, #255]"); TEST_SINGLE(ldapur(XReg::x30, Reg::r29, -256), "ldapur x30, [x29, #-256]"); - TEST_SINGLE(ldapur(XReg::x30, Reg::r29, 255), "ldapur x30, [x29, #255]"); + TEST_SINGLE(ldapur(XReg::x30, Reg::r29, 255), "ldapur x30, [x29, #255]"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Load register literal") { { @@ -1283,34 +1451,34 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Memory copy/set") { } TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Loadstore no-allocate pair") { TEST_SINGLE(stnp(WReg::w30, WReg::w28, Reg::r29, -256), "stnp w30, w28, [x29, #-256]"); - TEST_SINGLE(stnp(WReg::w30, WReg::w28, Reg::r29, 252), "stnp w30, w28, [x29, #252]"); + TEST_SINGLE(stnp(WReg::w30, WReg::w28, Reg::r29, 252), "stnp w30, w28, [x29, #252]"); TEST_SINGLE(ldnp(WReg::w30, WReg::w28, Reg::r29, -256), "ldnp w30, w28, [x29, #-256]"); - TEST_SINGLE(ldnp(WReg::w30, WReg::w28, Reg::r29, 252), "ldnp w30, w28, [x29, #252]"); + TEST_SINGLE(ldnp(WReg::w30, WReg::w28, Reg::r29, 252), "ldnp w30, w28, [x29, #252]"); TEST_SINGLE(stnp(SReg::s30, SReg::s28, Reg::r29, -256), "stnp s30, s28, [x29, #-256]"); - TEST_SINGLE(stnp(SReg::s30, SReg::s28, Reg::r29, 252), "stnp s30, s28, [x29, #252]"); + TEST_SINGLE(stnp(SReg::s30, SReg::s28, Reg::r29, 252), "stnp s30, s28, [x29, #252]"); TEST_SINGLE(ldnp(SReg::s30, SReg::s28, Reg::r29, -256), "ldnp s30, s28, [x29, #-256]"); - TEST_SINGLE(ldnp(SReg::s30, SReg::s28, Reg::r29, 252), "ldnp s30, s28, [x29, #252]"); + TEST_SINGLE(ldnp(SReg::s30, SReg::s28, Reg::r29, 252), "ldnp s30, s28, [x29, #252]"); TEST_SINGLE(stnp(XReg::x30, XReg::x28, Reg::r29, -512), "stnp x30, x28, [x29, #-512]"); - TEST_SINGLE(stnp(XReg::x30, XReg::x28, Reg::r29, 504), "stnp x30, x28, [x29, #504]"); + TEST_SINGLE(stnp(XReg::x30, XReg::x28, Reg::r29, 504), "stnp x30, x28, [x29, #504]"); TEST_SINGLE(ldnp(XReg::x30, XReg::x28, Reg::r29, -512), "ldnp x30, x28, [x29, #-512]"); - TEST_SINGLE(ldnp(XReg::x30, XReg::x28, Reg::r29, 504), "ldnp x30, x28, [x29, #504]"); + TEST_SINGLE(ldnp(XReg::x30, XReg::x28, Reg::r29, 504), "ldnp x30, x28, [x29, #504]"); TEST_SINGLE(stnp(DReg::d30, DReg::d28, Reg::r29, -512), "stnp d30, d28, [x29, #-512]"); - TEST_SINGLE(stnp(DReg::d30, DReg::d28, Reg::r29, 504), "stnp d30, d28, [x29, #504]"); + TEST_SINGLE(stnp(DReg::d30, DReg::d28, Reg::r29, 504), "stnp d30, d28, [x29, #504]"); TEST_SINGLE(ldnp(DReg::d30, DReg::d28, Reg::r29, -512), "ldnp d30, d28, [x29, #-512]"); - TEST_SINGLE(ldnp(DReg::d30, DReg::d28, Reg::r29, 504), "ldnp d30, d28, [x29, #504]"); + TEST_SINGLE(ldnp(DReg::d30, DReg::d28, Reg::r29, 504), "ldnp d30, d28, [x29, #504]"); TEST_SINGLE(stnp(QReg::q30, QReg::q28, Reg::r29, -1024), "stnp q30, q28, [x29, #-1024]"); - TEST_SINGLE(stnp(QReg::q30, QReg::q28, Reg::r29, 1008), "stnp q30, q28, [x29, #1008]"); + TEST_SINGLE(stnp(QReg::q30, QReg::q28, Reg::r29, 1008), "stnp q30, q28, [x29, #1008]"); TEST_SINGLE(ldnp(QReg::q30, QReg::q28, Reg::r29, -1024), "ldnp q30, q28, [x29, #-1024]"); - TEST_SINGLE(ldnp(QReg::q30, QReg::q28, Reg::r29, 1008), "ldnp q30, q28, [x29, #1008]"); + TEST_SINGLE(ldnp(QReg::q30, QReg::q28, Reg::r29, 1008), "ldnp q30, q28, [x29, #1008]"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Loadstore register pair post-indexed") { TEST_SINGLE(stp(WReg::w30, WReg::w28, Reg::r29, -256), "stp w30, w28, [x29], #-256"); @@ -1574,162 +1742,162 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Loadstore register unpri } } TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Atomic memory operations") { - TEST_SINGLE(stadd(SubRegSize::i8Bit, Reg::r30, Reg::r29), "staddb w30, [x29]"); + TEST_SINGLE(stadd(SubRegSize::i8Bit, Reg::r30, Reg::r29), "staddb w30, [x29]"); TEST_SINGLE(stadd(SubRegSize::i16Bit, Reg::r30, Reg::r29), "staddh w30, [x29]"); TEST_SINGLE(stadd(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stadd w30, [x29]"); TEST_SINGLE(stadd(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stadd x30, [x29]"); - TEST_SINGLE(staddl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "staddlb w30, [x29]"); + TEST_SINGLE(staddl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "staddlb w30, [x29]"); TEST_SINGLE(staddl(SubRegSize::i16Bit, Reg::r30, Reg::r29), "staddlh w30, [x29]"); TEST_SINGLE(staddl(SubRegSize::i32Bit, Reg::r30, Reg::r29), "staddl w30, [x29]"); TEST_SINGLE(staddl(SubRegSize::i64Bit, Reg::r30, Reg::r29), "staddl x30, [x29]"); - TEST_SINGLE(stadda(SubRegSize::i8Bit, Reg::r30, Reg::r29), "staddab w30, [x29]"); + TEST_SINGLE(stadda(SubRegSize::i8Bit, Reg::r30, Reg::r29), "staddab w30, [x29]"); TEST_SINGLE(stadda(SubRegSize::i16Bit, Reg::r30, Reg::r29), "staddah w30, [x29]"); TEST_SINGLE(stadda(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stadda w30, [x29]"); TEST_SINGLE(stadda(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stadda x30, [x29]"); - TEST_SINGLE(staddal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "staddalb w30, [x29]"); + TEST_SINGLE(staddal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "staddalb w30, [x29]"); TEST_SINGLE(staddal(SubRegSize::i16Bit, Reg::r30, Reg::r29), "staddalh w30, [x29]"); TEST_SINGLE(staddal(SubRegSize::i32Bit, Reg::r30, Reg::r29), "staddal w30, [x29]"); TEST_SINGLE(staddal(SubRegSize::i64Bit, Reg::r30, Reg::r29), "staddal x30, [x29]"); - TEST_SINGLE(stclr(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stclrb w30, [x29]"); + TEST_SINGLE(stclr(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stclrb w30, [x29]"); TEST_SINGLE(stclr(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stclrh w30, [x29]"); TEST_SINGLE(stclr(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stclr w30, [x29]"); TEST_SINGLE(stclr(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stclr x30, [x29]"); - TEST_SINGLE(stclrl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stclrlb w30, [x29]"); + TEST_SINGLE(stclrl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stclrlb w30, [x29]"); TEST_SINGLE(stclrl(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stclrlh w30, [x29]"); TEST_SINGLE(stclrl(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stclrl w30, [x29]"); TEST_SINGLE(stclrl(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stclrl x30, [x29]"); - TEST_SINGLE(stclra(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stclrab w30, [x29]"); + TEST_SINGLE(stclra(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stclrab w30, [x29]"); TEST_SINGLE(stclra(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stclrah w30, [x29]"); TEST_SINGLE(stclra(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stclra w30, [x29]"); TEST_SINGLE(stclra(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stclra x30, [x29]"); - TEST_SINGLE(stclral(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stclralb w30, [x29]"); + TEST_SINGLE(stclral(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stclralb w30, [x29]"); TEST_SINGLE(stclral(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stclralh w30, [x29]"); TEST_SINGLE(stclral(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stclral w30, [x29]"); TEST_SINGLE(stclral(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stclral x30, [x29]"); - TEST_SINGLE(stset(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsetb w30, [x29]"); + TEST_SINGLE(stset(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsetb w30, [x29]"); TEST_SINGLE(stset(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stseth w30, [x29]"); TEST_SINGLE(stset(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stset w30, [x29]"); TEST_SINGLE(stset(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stset x30, [x29]"); - TEST_SINGLE(stsetl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsetlb w30, [x29]"); + TEST_SINGLE(stsetl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsetlb w30, [x29]"); TEST_SINGLE(stsetl(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsetlh w30, [x29]"); TEST_SINGLE(stsetl(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsetl w30, [x29]"); TEST_SINGLE(stsetl(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsetl x30, [x29]"); - TEST_SINGLE(stseta(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsetab w30, [x29]"); + TEST_SINGLE(stseta(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsetab w30, [x29]"); TEST_SINGLE(stseta(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsetah w30, [x29]"); TEST_SINGLE(stseta(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stseta w30, [x29]"); TEST_SINGLE(stseta(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stseta x30, [x29]"); - TEST_SINGLE(stsetal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsetalb w30, [x29]"); + TEST_SINGLE(stsetal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsetalb w30, [x29]"); TEST_SINGLE(stsetal(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsetalh w30, [x29]"); TEST_SINGLE(stsetal(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsetal w30, [x29]"); TEST_SINGLE(stsetal(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsetal x30, [x29]"); - TEST_SINGLE(steor(SubRegSize::i8Bit, Reg::r30, Reg::r29), "steorb w30, [x29]"); + TEST_SINGLE(steor(SubRegSize::i8Bit, Reg::r30, Reg::r29), "steorb w30, [x29]"); TEST_SINGLE(steor(SubRegSize::i16Bit, Reg::r30, Reg::r29), "steorh w30, [x29]"); TEST_SINGLE(steor(SubRegSize::i32Bit, Reg::r30, Reg::r29), "steor w30, [x29]"); TEST_SINGLE(steor(SubRegSize::i64Bit, Reg::r30, Reg::r29), "steor x30, [x29]"); - TEST_SINGLE(steorl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "steorlb w30, [x29]"); + TEST_SINGLE(steorl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "steorlb w30, [x29]"); TEST_SINGLE(steorl(SubRegSize::i16Bit, Reg::r30, Reg::r29), "steorlh w30, [x29]"); TEST_SINGLE(steorl(SubRegSize::i32Bit, Reg::r30, Reg::r29), "steorl w30, [x29]"); TEST_SINGLE(steorl(SubRegSize::i64Bit, Reg::r30, Reg::r29), "steorl x30, [x29]"); - TEST_SINGLE(steora(SubRegSize::i8Bit, Reg::r30, Reg::r29), "steorab w30, [x29]"); + TEST_SINGLE(steora(SubRegSize::i8Bit, Reg::r30, Reg::r29), "steorab w30, [x29]"); TEST_SINGLE(steora(SubRegSize::i16Bit, Reg::r30, Reg::r29), "steorah w30, [x29]"); TEST_SINGLE(steora(SubRegSize::i32Bit, Reg::r30, Reg::r29), "steora w30, [x29]"); TEST_SINGLE(steora(SubRegSize::i64Bit, Reg::r30, Reg::r29), "steora x30, [x29]"); - TEST_SINGLE(steoral(SubRegSize::i8Bit, Reg::r30, Reg::r29), "steoralb w30, [x29]"); + TEST_SINGLE(steoral(SubRegSize::i8Bit, Reg::r30, Reg::r29), "steoralb w30, [x29]"); TEST_SINGLE(steoral(SubRegSize::i16Bit, Reg::r30, Reg::r29), "steoralh w30, [x29]"); TEST_SINGLE(steoral(SubRegSize::i32Bit, Reg::r30, Reg::r29), "steoral w30, [x29]"); TEST_SINGLE(steoral(SubRegSize::i64Bit, Reg::r30, Reg::r29), "steoral x30, [x29]"); - TEST_SINGLE(stsmax(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsmaxb w30, [x29]"); + TEST_SINGLE(stsmax(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsmaxb w30, [x29]"); TEST_SINGLE(stsmax(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsmaxh w30, [x29]"); TEST_SINGLE(stsmax(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsmax w30, [x29]"); TEST_SINGLE(stsmax(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsmax x30, [x29]"); - TEST_SINGLE(stsmaxl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsmaxlb w30, [x29]"); + TEST_SINGLE(stsmaxl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsmaxlb w30, [x29]"); TEST_SINGLE(stsmaxl(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsmaxlh w30, [x29]"); TEST_SINGLE(stsmaxl(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsmaxl w30, [x29]"); TEST_SINGLE(stsmaxl(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsmaxl x30, [x29]"); - TEST_SINGLE(stsmaxa(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsmaxab w30, [x29]"); + TEST_SINGLE(stsmaxa(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsmaxab w30, [x29]"); TEST_SINGLE(stsmaxa(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsmaxah w30, [x29]"); TEST_SINGLE(stsmaxa(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsmaxa w30, [x29]"); TEST_SINGLE(stsmaxa(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsmaxa x30, [x29]"); - TEST_SINGLE(stsmaxal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsmaxalb w30, [x29]"); + TEST_SINGLE(stsmaxal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsmaxalb w30, [x29]"); TEST_SINGLE(stsmaxal(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsmaxalh w30, [x29]"); TEST_SINGLE(stsmaxal(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsmaxal w30, [x29]"); TEST_SINGLE(stsmaxal(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsmaxal x30, [x29]"); - TEST_SINGLE(stsmin(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsminb w30, [x29]"); + TEST_SINGLE(stsmin(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsminb w30, [x29]"); TEST_SINGLE(stsmin(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsminh w30, [x29]"); TEST_SINGLE(stsmin(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsmin w30, [x29]"); TEST_SINGLE(stsmin(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsmin x30, [x29]"); - TEST_SINGLE(stsminl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsminlb w30, [x29]"); + TEST_SINGLE(stsminl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsminlb w30, [x29]"); TEST_SINGLE(stsminl(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsminlh w30, [x29]"); TEST_SINGLE(stsminl(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsminl w30, [x29]"); TEST_SINGLE(stsminl(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsminl x30, [x29]"); - TEST_SINGLE(stsmina(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsminab w30, [x29]"); + TEST_SINGLE(stsmina(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsminab w30, [x29]"); TEST_SINGLE(stsmina(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsminah w30, [x29]"); TEST_SINGLE(stsmina(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsmina w30, [x29]"); TEST_SINGLE(stsmina(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsmina x30, [x29]"); - TEST_SINGLE(stsminal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsminalb w30, [x29]"); + TEST_SINGLE(stsminal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsminalb w30, [x29]"); TEST_SINGLE(stsminal(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsminalh w30, [x29]"); TEST_SINGLE(stsminal(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsminal w30, [x29]"); TEST_SINGLE(stsminal(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsminal x30, [x29]"); - TEST_SINGLE(stumax(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stumaxb w30, [x29]"); + TEST_SINGLE(stumax(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stumaxb w30, [x29]"); TEST_SINGLE(stumax(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stumaxh w30, [x29]"); TEST_SINGLE(stumax(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stumax w30, [x29]"); TEST_SINGLE(stumax(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stumax x30, [x29]"); - TEST_SINGLE(stumaxl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stumaxlb w30, [x29]"); + TEST_SINGLE(stumaxl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stumaxlb w30, [x29]"); TEST_SINGLE(stumaxl(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stumaxlh w30, [x29]"); TEST_SINGLE(stumaxl(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stumaxl w30, [x29]"); TEST_SINGLE(stumaxl(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stumaxl x30, [x29]"); - TEST_SINGLE(stumaxa(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stumaxab w30, [x29]"); + TEST_SINGLE(stumaxa(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stumaxab w30, [x29]"); TEST_SINGLE(stumaxa(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stumaxah w30, [x29]"); TEST_SINGLE(stumaxa(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stumaxa w30, [x29]"); TEST_SINGLE(stumaxa(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stumaxa x30, [x29]"); - TEST_SINGLE(stumaxal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stumaxalb w30, [x29]"); + TEST_SINGLE(stumaxal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stumaxalb w30, [x29]"); TEST_SINGLE(stumaxal(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stumaxalh w30, [x29]"); TEST_SINGLE(stumaxal(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stumaxal w30, [x29]"); TEST_SINGLE(stumaxal(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stumaxal x30, [x29]"); - TEST_SINGLE(stumin(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stuminb w30, [x29]"); + TEST_SINGLE(stumin(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stuminb w30, [x29]"); TEST_SINGLE(stumin(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stuminh w30, [x29]"); TEST_SINGLE(stumin(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stumin w30, [x29]"); TEST_SINGLE(stumin(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stumin x30, [x29]"); - TEST_SINGLE(stuminl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stuminlb w30, [x29]"); + TEST_SINGLE(stuminl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stuminlb w30, [x29]"); TEST_SINGLE(stuminl(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stuminlh w30, [x29]"); TEST_SINGLE(stuminl(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stuminl w30, [x29]"); TEST_SINGLE(stuminl(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stuminl x30, [x29]"); - TEST_SINGLE(stumina(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stuminab w30, [x29]"); + TEST_SINGLE(stumina(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stuminab w30, [x29]"); TEST_SINGLE(stumina(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stuminah w30, [x29]"); TEST_SINGLE(stumina(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stumina w30, [x29]"); TEST_SINGLE(stumina(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stumina x30, [x29]"); - TEST_SINGLE(stuminal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stuminalb w30, [x29]"); + TEST_SINGLE(stuminal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stuminalb w30, [x29]"); TEST_SINGLE(stuminal(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stuminalh w30, [x29]"); TEST_SINGLE(stuminal(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stuminal w30, [x29]"); TEST_SINGLE(stuminal(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stuminal x30, [x29]"); diff --git a/FEXCore/unittests/Emitter/SVE_Tests.cpp b/FEXCore/unittests/Emitter/SVE_Tests.cpp index 0f2ea5fbdc..60e901907f 100644 --- a/FEXCore/unittests/Emitter/SVE_Tests.cpp +++ b/FEXCore/unittests/Emitter/SVE_Tests.cpp @@ -26,54 +26,54 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: Base Encodings") { TEST_SINGLE(dup(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, 1), "mov z30.q, z29.q[1]"); TEST_SINGLE(dup(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, 3), "mov z30.q, z29.q[3]"); - TEST_SINGLE(sel(SubRegSize::i8Bit, ZReg::z30, PReg::p6, ZReg::z29, ZReg::z28), "sel z30.b, p6, z29.b, z28.b"); - TEST_SINGLE(sel(SubRegSize::i16Bit, ZReg::z30, PReg::p6, ZReg::z29, ZReg::z28), "sel z30.h, p6, z29.h, z28.h"); - TEST_SINGLE(sel(SubRegSize::i32Bit, ZReg::z30, PReg::p6, ZReg::z29, ZReg::z28), "sel z30.s, p6, z29.s, z28.s"); - TEST_SINGLE(sel(SubRegSize::i64Bit, ZReg::z30, PReg::p6, ZReg::z29, ZReg::z28), "sel z30.d, p6, z29.d, z28.d"); - //TEST_SINGLE(sel(SubRegSize::i128Bit, ZReg::z30, PReg::p6, ZReg::z29, ZReg::z28), "sel z30.q, p6, z29.q, z28.q"); - - TEST_SINGLE(mov(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "mov z30.b, p6/m, z29.b"); - TEST_SINGLE(mov(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "mov z30.h, p6/m, z29.h"); - TEST_SINGLE(mov(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "mov z30.s, p6/m, z29.s"); - TEST_SINGLE(mov(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "mov z30.d, p6/m, z29.d"); - //TEST_SINGLE(mov(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "mov z30.q, p6/m, z29.q"); + TEST_SINGLE(sel(SubRegSize::i8Bit, ZReg::z30, PReg::p6, ZReg::z29, ZReg::z28), "sel z30.b, p6, z29.b, z28.b"); + TEST_SINGLE(sel(SubRegSize::i16Bit, ZReg::z30, PReg::p6, ZReg::z29, ZReg::z28), "sel z30.h, p6, z29.h, z28.h"); + TEST_SINGLE(sel(SubRegSize::i32Bit, ZReg::z30, PReg::p6, ZReg::z29, ZReg::z28), "sel z30.s, p6, z29.s, z28.s"); + TEST_SINGLE(sel(SubRegSize::i64Bit, ZReg::z30, PReg::p6, ZReg::z29, ZReg::z28), "sel z30.d, p6, z29.d, z28.d"); + // TEST_SINGLE(sel(SubRegSize::i128Bit, ZReg::z30, PReg::p6, ZReg::z29, ZReg::z28), "sel z30.q, p6, z29.q, z28.q"); + + TEST_SINGLE(mov(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "mov z30.b, p6/m, z29.b"); + TEST_SINGLE(mov(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "mov z30.h, p6/m, z29.h"); + TEST_SINGLE(mov(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "mov z30.s, p6/m, z29.s"); + TEST_SINGLE(mov(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "mov z30.d, p6/m, z29.d"); + // TEST_SINGLE(mov(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "mov z30.q, p6/m, z29.q"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer add/subtract vectors (unpredicated)") { - TEST_SINGLE(add(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "add z30.b, z29.b, z28.b"); - TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "add z30.h, z29.h, z28.h"); - TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "add z30.s, z29.s, z28.s"); - TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "add z30.d, z29.d, z28.d"); - //TEST_SINGLE(add(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "add z30.q, z29.q, z28.q"); - - TEST_SINGLE(sub(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sub z30.b, z29.b, z28.b"); - TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sub z30.h, z29.h, z28.h"); - TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sub z30.s, z29.s, z28.s"); - TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sub z30.d, z29.d, z28.d"); - //TEST_SINGLE(sub(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sub z30.q, z29.q, z28.q"); - - TEST_SINGLE(sqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqadd z30.b, z29.b, z28.b"); - TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqadd z30.h, z29.h, z28.h"); - TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqadd z30.s, z29.s, z28.s"); - TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqadd z30.d, z29.d, z28.d"); - //TEST_SINGLE(sqadd(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqadd z30.q, z29.q, z28.q"); - - TEST_SINGLE(uqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqadd z30.b, z29.b, z28.b"); - TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqadd z30.h, z29.h, z28.h"); - TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqadd z30.s, z29.s, z28.s"); - TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqadd z30.d, z29.d, z28.d"); - //TEST_SINGLE(uqadd(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqadd z30.q, z29.q, z28.q"); - - TEST_SINGLE(sqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqsub z30.b, z29.b, z28.b"); - TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqsub z30.h, z29.h, z28.h"); - TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqsub z30.s, z29.s, z28.s"); - TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqsub z30.d, z29.d, z28.d"); - //TEST_SINGLE(sqsub(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqsub z30.q, z29.q, z28.q"); - - TEST_SINGLE(uqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqsub z30.b, z29.b, z28.b"); - TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqsub z30.h, z29.h, z28.h"); - TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqsub z30.s, z29.s, z28.s"); - TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqsub z30.d, z29.d, z28.d"); - //TEST_SINGLE(uqsub(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqsub z30.q, z29.q, z28.q"); + TEST_SINGLE(add(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "add z30.b, z29.b, z28.b"); + TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "add z30.h, z29.h, z28.h"); + TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "add z30.s, z29.s, z28.s"); + TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "add z30.d, z29.d, z28.d"); + // TEST_SINGLE(add(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "add z30.q, z29.q, z28.q"); + + TEST_SINGLE(sub(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sub z30.b, z29.b, z28.b"); + TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sub z30.h, z29.h, z28.h"); + TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sub z30.s, z29.s, z28.s"); + TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sub z30.d, z29.d, z28.d"); + // TEST_SINGLE(sub(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sub z30.q, z29.q, z28.q"); + + TEST_SINGLE(sqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqadd z30.b, z29.b, z28.b"); + TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqadd z30.h, z29.h, z28.h"); + TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqadd z30.s, z29.s, z28.s"); + TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqadd z30.d, z29.d, z28.d"); + // TEST_SINGLE(sqadd(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqadd z30.q, z29.q, z28.q"); + + TEST_SINGLE(uqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqadd z30.b, z29.b, z28.b"); + TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqadd z30.h, z29.h, z28.h"); + TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqadd z30.s, z29.s, z28.s"); + TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqadd z30.d, z29.d, z28.d"); + // TEST_SINGLE(uqadd(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqadd z30.q, z29.q, z28.q"); + + TEST_SINGLE(sqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqsub z30.b, z29.b, z28.b"); + TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqsub z30.h, z29.h, z28.h"); + TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqsub z30.s, z29.s, z28.s"); + TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqsub z30.d, z29.d, z28.d"); + // TEST_SINGLE(sqsub(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqsub z30.q, z29.q, z28.q"); + + TEST_SINGLE(uqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqsub z30.b, z29.b, z28.b"); + TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqsub z30.h, z29.h, z28.h"); + TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqsub z30.s, z29.s, z28.s"); + TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqsub z30.d, z29.d, z28.d"); + // TEST_SINGLE(uqsub(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqsub z30.q, z29.q, z28.q"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE address generation") { TEST_SINGLE(adr(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z31), "adr z30.s, [z29.s, z31.s]"); @@ -97,146 +97,145 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE address generation") { TEST_SINGLE(adr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z31, SVEModType::MOD_SXTW, 3), "adr z30.d, [z29.d, z31.d, sxtw #3]"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE table lookup (three sources)") { - TEST_SINGLE(tbl(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbl z30.b, {z29.b}, z28.b"); - TEST_SINGLE(tbl(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbl z30.h, {z29.h}, z28.h"); - TEST_SINGLE(tbl(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbl z30.s, {z29.s}, z28.s"); - TEST_SINGLE(tbl(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbl z30.d, {z29.d}, z28.d"); - //TEST_SINGLE(tbl(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbl z30.q, {z29.q}, z28.q"); + TEST_SINGLE(tbl(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbl z30.b, {z29.b}, z28.b"); + TEST_SINGLE(tbl(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbl z30.h, {z29.h}, z28.h"); + TEST_SINGLE(tbl(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbl z30.s, {z29.s}, z28.s"); + TEST_SINGLE(tbl(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbl z30.d, {z29.d}, z28.d"); + // TEST_SINGLE(tbl(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbl z30.q, {z29.q}, z28.q"); - TEST_SINGLE(tbl(SubRegSize::i8Bit, ZReg::z31, ZReg::z29, ZReg::z30, ZReg::z28), "tbl z31.b, {z29.b, z30.b}, z28.b"); + TEST_SINGLE(tbl(SubRegSize::i8Bit, ZReg::z31, ZReg::z29, ZReg::z30, ZReg::z28), "tbl z31.b, {z29.b, z30.b}, z28.b"); TEST_SINGLE(tbl(SubRegSize::i16Bit, ZReg::z31, ZReg::z29, ZReg::z30, ZReg::z28), "tbl z31.h, {z29.h, z30.h}, z28.h"); TEST_SINGLE(tbl(SubRegSize::i32Bit, ZReg::z31, ZReg::z29, ZReg::z30, ZReg::z28), "tbl z31.s, {z29.s, z30.s}, z28.s"); TEST_SINGLE(tbl(SubRegSize::i64Bit, ZReg::z31, ZReg::z29, ZReg::z30, ZReg::z28), "tbl z31.d, {z29.d, z30.d}, z28.d"); - TEST_SINGLE(tbx(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbx z30.b, z29.b, z28.b"); - TEST_SINGLE(tbx(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbx z30.h, z29.h, z28.h"); - TEST_SINGLE(tbx(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbx z30.s, z29.s, z28.s"); - TEST_SINGLE(tbx(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbx z30.d, z29.d, z28.d"); - //TEST_SINGLE(tbx(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbx z30.q, z29.q, z28.q"); - + TEST_SINGLE(tbx(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbx z30.b, z29.b, z28.b"); + TEST_SINGLE(tbx(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbx z30.h, z29.h, z28.h"); + TEST_SINGLE(tbx(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbx z30.s, z29.s, z28.s"); + TEST_SINGLE(tbx(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbx z30.d, z29.d, z28.d"); + // TEST_SINGLE(tbx(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbx z30.q, z29.q, z28.q"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE permute vector elements") { - TEST_SINGLE(zip1(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip1 z30.b, z29.b, z28.b"); + TEST_SINGLE(zip1(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip1 z30.b, z29.b, z28.b"); TEST_SINGLE(zip1(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip1 z30.h, z29.h, z28.h"); TEST_SINGLE(zip1(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip1 z30.s, z29.s, z28.s"); TEST_SINGLE(zip1(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip1 z30.d, z29.d, z28.d"); - TEST_SINGLE(zip2(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip2 z30.b, z29.b, z28.b"); + TEST_SINGLE(zip2(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip2 z30.b, z29.b, z28.b"); TEST_SINGLE(zip2(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip2 z30.h, z29.h, z28.h"); TEST_SINGLE(zip2(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip2 z30.s, z29.s, z28.s"); TEST_SINGLE(zip2(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip2 z30.d, z29.d, z28.d"); - TEST_SINGLE(uzp1(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp1 z30.b, z29.b, z28.b"); + TEST_SINGLE(uzp1(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp1 z30.b, z29.b, z28.b"); TEST_SINGLE(uzp1(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp1 z30.h, z29.h, z28.h"); TEST_SINGLE(uzp1(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp1 z30.s, z29.s, z28.s"); TEST_SINGLE(uzp1(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp1 z30.d, z29.d, z28.d"); - TEST_SINGLE(uzp2(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp2 z30.b, z29.b, z28.b"); + TEST_SINGLE(uzp2(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp2 z30.b, z29.b, z28.b"); TEST_SINGLE(uzp2(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp2 z30.h, z29.h, z28.h"); TEST_SINGLE(uzp2(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp2 z30.s, z29.s, z28.s"); TEST_SINGLE(uzp2(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp2 z30.d, z29.d, z28.d"); - TEST_SINGLE(trn1(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn1 z30.b, z29.b, z28.b"); + TEST_SINGLE(trn1(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn1 z30.b, z29.b, z28.b"); TEST_SINGLE(trn1(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn1 z30.h, z29.h, z28.h"); TEST_SINGLE(trn1(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn1 z30.s, z29.s, z28.s"); TEST_SINGLE(trn1(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn1 z30.d, z29.d, z28.d"); - TEST_SINGLE(trn2(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn2 z30.b, z29.b, z28.b"); + TEST_SINGLE(trn2(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn2 z30.b, z29.b, z28.b"); TEST_SINGLE(trn2(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn2 z30.h, z29.h, z28.h"); TEST_SINGLE(trn2(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn2 z30.s, z29.s, z28.s"); TEST_SINGLE(trn2(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn2 z30.d, z29.d, z28.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer compare with unsigned immediate") { - TEST_SINGLE(cmphi(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphi p6.b, p5/z, z30.b, #0"); + TEST_SINGLE(cmphi(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphi p6.b, p5/z, z30.b, #0"); TEST_SINGLE(cmphi(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphi p6.h, p5/z, z30.h, #0"); TEST_SINGLE(cmphi(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphi p6.s, p5/z, z30.s, #0"); TEST_SINGLE(cmphi(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphi p6.d, p5/z, z30.d, #0"); - TEST_SINGLE(cmphi(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphi p6.b, p5/z, z30.b, #127"); + TEST_SINGLE(cmphi(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphi p6.b, p5/z, z30.b, #127"); TEST_SINGLE(cmphi(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphi p6.h, p5/z, z30.h, #127"); TEST_SINGLE(cmphi(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphi p6.s, p5/z, z30.s, #127"); TEST_SINGLE(cmphi(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphi p6.d, p5/z, z30.d, #127"); - TEST_SINGLE(cmphs(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphs p6.b, p5/z, z30.b, #0"); + TEST_SINGLE(cmphs(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphs p6.b, p5/z, z30.b, #0"); TEST_SINGLE(cmphs(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphs p6.h, p5/z, z30.h, #0"); TEST_SINGLE(cmphs(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphs p6.s, p5/z, z30.s, #0"); TEST_SINGLE(cmphs(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphs p6.d, p5/z, z30.d, #0"); - TEST_SINGLE(cmphs(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphs p6.b, p5/z, z30.b, #127"); + TEST_SINGLE(cmphs(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphs p6.b, p5/z, z30.b, #127"); TEST_SINGLE(cmphs(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphs p6.h, p5/z, z30.h, #127"); TEST_SINGLE(cmphs(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphs p6.s, p5/z, z30.s, #127"); TEST_SINGLE(cmphs(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphs p6.d, p5/z, z30.d, #127"); - TEST_SINGLE(cmplo(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmplo p6.b, p5/z, z30.b, #0"); + TEST_SINGLE(cmplo(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmplo p6.b, p5/z, z30.b, #0"); TEST_SINGLE(cmplo(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmplo p6.h, p5/z, z30.h, #0"); TEST_SINGLE(cmplo(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmplo p6.s, p5/z, z30.s, #0"); TEST_SINGLE(cmplo(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmplo p6.d, p5/z, z30.d, #0"); - TEST_SINGLE(cmplo(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmplo p6.b, p5/z, z30.b, #127"); + TEST_SINGLE(cmplo(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmplo p6.b, p5/z, z30.b, #127"); TEST_SINGLE(cmplo(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmplo p6.h, p5/z, z30.h, #127"); TEST_SINGLE(cmplo(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmplo p6.s, p5/z, z30.s, #127"); TEST_SINGLE(cmplo(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmplo p6.d, p5/z, z30.d, #127"); - TEST_SINGLE(cmpls(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmpls p6.b, p5/z, z30.b, #0"); + TEST_SINGLE(cmpls(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmpls p6.b, p5/z, z30.b, #0"); TEST_SINGLE(cmpls(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmpls p6.h, p5/z, z30.h, #0"); TEST_SINGLE(cmpls(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmpls p6.s, p5/z, z30.s, #0"); TEST_SINGLE(cmpls(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmpls p6.d, p5/z, z30.d, #0"); - TEST_SINGLE(cmpls(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmpls p6.b, p5/z, z30.b, #127"); + TEST_SINGLE(cmpls(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmpls p6.b, p5/z, z30.b, #127"); TEST_SINGLE(cmpls(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmpls p6.h, p5/z, z30.h, #127"); TEST_SINGLE(cmpls(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmpls p6.s, p5/z, z30.s, #127"); TEST_SINGLE(cmpls(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmpls p6.d, p5/z, z30.d, #127"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer compare with signed immediate") { - TEST_SINGLE(cmpeq(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpeq p6.b, p5/z, z30.b, #-16"); + TEST_SINGLE(cmpeq(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpeq p6.b, p5/z, z30.b, #-16"); TEST_SINGLE(cmpeq(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpeq p6.h, p5/z, z30.h, #-16"); TEST_SINGLE(cmpeq(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpeq p6.s, p5/z, z30.s, #-16"); TEST_SINGLE(cmpeq(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpeq p6.d, p5/z, z30.d, #-16"); - TEST_SINGLE(cmpeq(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpeq p6.b, p5/z, z30.b, #15"); - TEST_SINGLE(cmpeq(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpeq p6.h, p5/z, z30.h, #15"); - TEST_SINGLE(cmpeq(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpeq p6.s, p5/z, z30.s, #15"); - TEST_SINGLE(cmpeq(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpeq p6.d, p5/z, z30.d, #15"); + TEST_SINGLE(cmpeq(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpeq p6.b, p5/z, z30.b, #15"); + TEST_SINGLE(cmpeq(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpeq p6.h, p5/z, z30.h, #15"); + TEST_SINGLE(cmpeq(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpeq p6.s, p5/z, z30.s, #15"); + TEST_SINGLE(cmpeq(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpeq p6.d, p5/z, z30.d, #15"); - TEST_SINGLE(cmpgt(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpgt p6.b, p5/z, z30.b, #-16"); + TEST_SINGLE(cmpgt(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpgt p6.b, p5/z, z30.b, #-16"); TEST_SINGLE(cmpgt(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpgt p6.h, p5/z, z30.h, #-16"); TEST_SINGLE(cmpgt(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpgt p6.s, p5/z, z30.s, #-16"); TEST_SINGLE(cmpgt(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpgt p6.d, p5/z, z30.d, #-16"); - TEST_SINGLE(cmpgt(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpgt p6.b, p5/z, z30.b, #15"); - TEST_SINGLE(cmpgt(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpgt p6.h, p5/z, z30.h, #15"); - TEST_SINGLE(cmpgt(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpgt p6.s, p5/z, z30.s, #15"); - TEST_SINGLE(cmpgt(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpgt p6.d, p5/z, z30.d, #15"); + TEST_SINGLE(cmpgt(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpgt p6.b, p5/z, z30.b, #15"); + TEST_SINGLE(cmpgt(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpgt p6.h, p5/z, z30.h, #15"); + TEST_SINGLE(cmpgt(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpgt p6.s, p5/z, z30.s, #15"); + TEST_SINGLE(cmpgt(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpgt p6.d, p5/z, z30.d, #15"); - TEST_SINGLE(cmpge(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpge p6.b, p5/z, z30.b, #-16"); + TEST_SINGLE(cmpge(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpge p6.b, p5/z, z30.b, #-16"); TEST_SINGLE(cmpge(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpge p6.h, p5/z, z30.h, #-16"); TEST_SINGLE(cmpge(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpge p6.s, p5/z, z30.s, #-16"); TEST_SINGLE(cmpge(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpge p6.d, p5/z, z30.d, #-16"); - TEST_SINGLE(cmpge(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpge p6.b, p5/z, z30.b, #15"); - TEST_SINGLE(cmpge(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpge p6.h, p5/z, z30.h, #15"); - TEST_SINGLE(cmpge(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpge p6.s, p5/z, z30.s, #15"); - TEST_SINGLE(cmpge(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpge p6.d, p5/z, z30.d, #15"); + TEST_SINGLE(cmpge(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpge p6.b, p5/z, z30.b, #15"); + TEST_SINGLE(cmpge(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpge p6.h, p5/z, z30.h, #15"); + TEST_SINGLE(cmpge(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpge p6.s, p5/z, z30.s, #15"); + TEST_SINGLE(cmpge(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpge p6.d, p5/z, z30.d, #15"); - TEST_SINGLE(cmplt(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmplt p6.b, p5/z, z30.b, #-16"); + TEST_SINGLE(cmplt(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmplt p6.b, p5/z, z30.b, #-16"); TEST_SINGLE(cmplt(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmplt p6.h, p5/z, z30.h, #-16"); TEST_SINGLE(cmplt(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmplt p6.s, p5/z, z30.s, #-16"); TEST_SINGLE(cmplt(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmplt p6.d, p5/z, z30.d, #-16"); - TEST_SINGLE(cmplt(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmplt p6.b, p5/z, z30.b, #15"); - TEST_SINGLE(cmplt(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmplt p6.h, p5/z, z30.h, #15"); - TEST_SINGLE(cmplt(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmplt p6.s, p5/z, z30.s, #15"); - TEST_SINGLE(cmplt(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmplt p6.d, p5/z, z30.d, #15"); + TEST_SINGLE(cmplt(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmplt p6.b, p5/z, z30.b, #15"); + TEST_SINGLE(cmplt(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmplt p6.h, p5/z, z30.h, #15"); + TEST_SINGLE(cmplt(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmplt p6.s, p5/z, z30.s, #15"); + TEST_SINGLE(cmplt(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmplt p6.d, p5/z, z30.d, #15"); - TEST_SINGLE(cmple(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmple p6.b, p5/z, z30.b, #-16"); + TEST_SINGLE(cmple(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmple p6.b, p5/z, z30.b, #-16"); TEST_SINGLE(cmple(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmple p6.h, p5/z, z30.h, #-16"); TEST_SINGLE(cmple(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmple p6.s, p5/z, z30.s, #-16"); TEST_SINGLE(cmple(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmple p6.d, p5/z, z30.d, #-16"); - TEST_SINGLE(cmple(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmple p6.b, p5/z, z30.b, #15"); - TEST_SINGLE(cmple(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmple p6.h, p5/z, z30.h, #15"); - TEST_SINGLE(cmple(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmple p6.s, p5/z, z30.s, #15"); - TEST_SINGLE(cmple(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmple p6.d, p5/z, z30.d, #15"); + TEST_SINGLE(cmple(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmple p6.b, p5/z, z30.b, #15"); + TEST_SINGLE(cmple(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmple p6.h, p5/z, z30.h, #15"); + TEST_SINGLE(cmple(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmple p6.s, p5/z, z30.s, #15"); + TEST_SINGLE(cmple(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmple p6.d, p5/z, z30.d, #15"); - TEST_SINGLE(cmpne(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpne p6.b, p5/z, z30.b, #-16"); + TEST_SINGLE(cmpne(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpne p6.b, p5/z, z30.b, #-16"); TEST_SINGLE(cmpne(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpne p6.h, p5/z, z30.h, #-16"); TEST_SINGLE(cmpne(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpne p6.s, p5/z, z30.s, #-16"); TEST_SINGLE(cmpne(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpne p6.d, p5/z, z30.d, #-16"); - TEST_SINGLE(cmpne(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpne p6.b, p5/z, z30.b, #15"); - TEST_SINGLE(cmpne(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpne p6.h, p5/z, z30.h, #15"); - TEST_SINGLE(cmpne(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpne p6.s, p5/z, z30.s, #15"); - TEST_SINGLE(cmpne(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpne p6.d, p5/z, z30.d, #15"); + TEST_SINGLE(cmpne(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpne p6.b, p5/z, z30.b, #15"); + TEST_SINGLE(cmpne(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpne p6.h, p5/z, z30.h, #15"); + TEST_SINGLE(cmpne(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpne p6.s, p5/z, z30.s, #15"); + TEST_SINGLE(cmpne(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpne p6.d, p5/z, z30.d, #15"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE predicate logical operations") { TEST_SINGLE(and_(PReg::p6, PReg::p5.Zeroing(), PReg::p4, PReg::p3), "and p6.b, p5/z, p4.b, p3.b"); @@ -271,142 +270,130 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer clamp") { // TODO: Implement in emitter. } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 character match") { - TEST_SINGLE(match(SubRegSize::i8Bit, PReg::p8, PReg::p6.Zeroing(), ZReg::z30, ZReg::z29), - "match p8.b, p6/z, z30.b, z29.b"); - TEST_SINGLE(match(SubRegSize::i16Bit, PReg::p8, PReg::p6.Zeroing(), ZReg::z30, ZReg::z29), - "match p8.h, p6/z, z30.h, z29.h"); + TEST_SINGLE(match(SubRegSize::i8Bit, PReg::p8, PReg::p6.Zeroing(), ZReg::z30, ZReg::z29), "match p8.b, p6/z, z30.b, z29.b"); + TEST_SINGLE(match(SubRegSize::i16Bit, PReg::p8, PReg::p6.Zeroing(), ZReg::z30, ZReg::z29), "match p8.h, p6/z, z30.h, z29.h"); - TEST_SINGLE(nmatch(SubRegSize::i8Bit, PReg::p8, PReg::p6.Zeroing(), ZReg::z30, ZReg::z29), - "nmatch p8.b, p6/z, z30.b, z29.b"); - TEST_SINGLE(nmatch(SubRegSize::i16Bit, PReg::p8, PReg::p6.Zeroing(), ZReg::z30, ZReg::z29), - "nmatch p8.h, p6/z, z30.h, z29.h"); + TEST_SINGLE(nmatch(SubRegSize::i8Bit, PReg::p8, PReg::p6.Zeroing(), ZReg::z30, ZReg::z29), "nmatch p8.b, p6/z, z30.b, z29.b"); + TEST_SINGLE(nmatch(SubRegSize::i16Bit, PReg::p8, PReg::p6.Zeroing(), ZReg::z30, ZReg::z29), "nmatch p8.h, p6/z, z30.h, z29.h"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point convert precision odd elements") { TEST_SINGLE(fcvtxnt(ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtxnt z30.s, p6/m, z29.d"); TEST_SINGLE(fcvtnt(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtnt z30.h, p6/m, z29.s"); TEST_SINGLE(fcvtnt(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtnt z30.s, p6/m, z29.d"); - //TEST_SINGLE(fcvtnt(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtnt z30.d, p6/m, z29.d"); + // TEST_SINGLE(fcvtnt(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtnt z30.d, p6/m, z29.d"); - //TEST_SINGLE(fcvtlt(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtlt z30.h, p6/m, z29.b"); + // TEST_SINGLE(fcvtlt(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtlt z30.h, p6/m, z29.b"); TEST_SINGLE(fcvtlt(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtlt z30.s, p6/m, z29.h"); TEST_SINGLE(fcvtlt(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtlt z30.d, p6/m, z29.s"); - //void fcvtxnt(FEXCore::ARMEmitter::ZRegister zd, FEXCore::ARMEmitter::PRegister pg, FEXCore::ARMEmitter::ZRegister zn) { + // void fcvtxnt(FEXCore::ARMEmitter::ZRegister zd, FEXCore::ARMEmitter::PRegister pg, FEXCore::ARMEmitter::ZRegister zn) { /////< Size is destination size - //void fcvtnt(FEXCore::ARMEmitter::SubRegSize size, FEXCore::ARMEmitter::ZRegister zd, FEXCore::ARMEmitter::PRegister pg, FEXCore::ARMEmitter::ZRegister zn) { + // void fcvtnt(FEXCore::ARMEmitter::SubRegSize size, FEXCore::ARMEmitter::ZRegister zd, FEXCore::ARMEmitter::PRegister pg, FEXCore::ARMEmitter::ZRegister zn) { /////< Size is destination size - //void fcvtlt(FEXCore::ARMEmitter::SubRegSize size, FEXCore::ARMEmitter::ZRegister zd, FEXCore::ARMEmitter::PRegister pg, FEXCore::ARMEmitter::ZRegister zn) { + // void fcvtlt(FEXCore::ARMEmitter::SubRegSize size, FEXCore::ARMEmitter::ZRegister zd, FEXCore::ARMEmitter::PRegister pg, FEXCore::ARMEmitter::ZRegister zn) { // XXX: BFCVTNT } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 floating-point pairwise operations") { - //TEST_SINGLE(faddp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "faddp z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(faddp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "faddp z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(faddp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "faddp z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(faddp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "faddp z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(faddp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "faddp z30.q, p6/m, z30.q, z28.q"); - - //TEST_SINGLE(fmaxnmp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnmp z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(fmaxnmp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnmp z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(fmaxnmp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnmp z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(fmaxnmp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnmp z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(fmaxnmp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnmp z30.q, p6/m, z30.q, z28.q"); - - //TEST_SINGLE(fminnmp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnmp z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(fminnmp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnmp z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(fminnmp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnmp z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(fminnmp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnmp z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(fminnmp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnmp z30.q, p6/m, z30.q, z28.q"); - - //TEST_SINGLE(fmax(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(fmax(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(fmax(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(fmax(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(fmax(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.q, p6/m, z30.q, z28.q"); - - //TEST_SINGLE(fmin(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(fmin(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(fmin(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(fmin(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(fmin(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.q, p6/m, z30.q, z28.q"); + // TEST_SINGLE(faddp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "faddp z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(faddp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "faddp z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(faddp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "faddp z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(faddp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "faddp z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(faddp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "faddp z30.q, p6/m, z30.q, z28.q"); + + // TEST_SINGLE(fmaxnmp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnmp z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(fmaxnmp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnmp z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(fmaxnmp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnmp z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(fmaxnmp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnmp z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(fmaxnmp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnmp z30.q, p6/m, z30.q, z28.q"); + + // TEST_SINGLE(fminnmp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnmp z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(fminnmp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnmp z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(fminnmp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnmp z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(fminnmp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnmp z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(fminnmp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnmp z30.q, p6/m, z30.q, z28.q"); + + // TEST_SINGLE(fmax(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(fmax(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(fmax(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(fmax(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(fmax(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.q, p6/m, z30.q, z28.q"); + + // TEST_SINGLE(fmin(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(fmin(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(fmin(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(fmin(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(fmin(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.q, p6/m, z30.q, z28.q"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point complex add") { - TEST_SINGLE(fcadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_90), - "fcadd z30.h, p6/m, z30.h, z28.h, #90"); - TEST_SINGLE(fcadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_90), - "fcadd z30.s, p6/m, z30.s, z28.s, #90"); - TEST_SINGLE(fcadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_90), - "fcadd z30.d, p6/m, z30.d, z28.d, #90"); - - TEST_SINGLE(fcadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_270), - "fcadd z30.h, p6/m, z30.h, z28.h, #270"); - TEST_SINGLE(fcadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_270), - "fcadd z30.s, p6/m, z30.s, z28.s, #270"); - TEST_SINGLE(fcadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_270), - "fcadd z30.d, p6/m, z30.d, z28.d, #270"); + TEST_SINGLE(fcadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_90), "fcadd z30.h, p6/m, " + "z30.h, z28.h, #90"); + TEST_SINGLE(fcadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_90), "fcadd z30.s, p6/m, " + "z30.s, z28.s, #90"); + TEST_SINGLE(fcadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_90), "fcadd z30.d, p6/m, " + "z30.d, z28.d, #90"); + + TEST_SINGLE(fcadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_270), "fcadd z30.h, p6/m, " + "z30.h, z28.h, #270"); + TEST_SINGLE(fcadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_270), "fcadd z30.s, p6/m, " + "z30.s, z28.s, #270"); + TEST_SINGLE(fcadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_270), "fcadd z30.d, p6/m, " + "z30.d, z28.d, #270"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point multiply-add (vector)") { - TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_0), - "fcmla z30.h, p6/m, z10.h, z28.h, #0"); - TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_0), - "fcmla z30.s, p6/m, z10.s, z28.s, #0"); - TEST_SINGLE(fcmla(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_0), - "fcmla z30.d, p6/m, z10.d, z28.d, #0"); - - TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_90), - "fcmla z30.h, p6/m, z10.h, z28.h, #90"); - TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_90), - "fcmla z30.s, p6/m, z10.s, z28.s, #90"); - TEST_SINGLE(fcmla(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_90), - "fcmla z30.d, p6/m, z10.d, z28.d, #90"); - - TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_180), - "fcmla z30.h, p6/m, z10.h, z28.h, #180"); - TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_180), - "fcmla z30.s, p6/m, z10.s, z28.s, #180"); - TEST_SINGLE(fcmla(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_180), - "fcmla z30.d, p6/m, z10.d, z28.d, #180"); - - TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_270), - "fcmla z30.h, p6/m, z10.h, z28.h, #270"); - TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_270), - "fcmla z30.s, p6/m, z10.s, z28.s, #270"); - TEST_SINGLE(fcmla(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_270), - "fcmla z30.d, p6/m, z10.d, z28.d, #270"); + TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_0), "fcmla z30.h, p6/m, " + "z10.h, z28.h, #0"); + TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_0), "fcmla z30.s, p6/m, " + "z10.s, z28.s, #0"); + TEST_SINGLE(fcmla(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_0), "fcmla z30.d, p6/m, " + "z10.d, z28.d, #0"); + + TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_90), "fcmla z30.h, p6/m, " + "z10.h, z28.h, #90"); + TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_90), "fcmla z30.s, p6/m, " + "z10.s, z28.s, #90"); + TEST_SINGLE(fcmla(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_90), "fcmla z30.d, p6/m, " + "z10.d, z28.d, #90"); + + TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_180), "fcmla z30.h, p6/m, " + "z10.h, z28.h, #180"); + TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_180), "fcmla z30.s, p6/m, " + "z10.s, z28.s, #180"); + TEST_SINGLE(fcmla(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_180), "fcmla z30.d, p6/m, " + "z10.d, z28.d, #180"); + + TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_270), "fcmla z30.h, p6/m, " + "z10.h, z28.h, #270"); + TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_270), "fcmla z30.s, p6/m, " + "z10.s, z28.s, #270"); + TEST_SINGLE(fcmla(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_270), "fcmla z30.d, p6/m, " + "z10.d, z28.d, #270"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point multiply-add (indexed)") { - TEST_SINGLE(fmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z7, 7), "fmla z30.h, z29.h, z7.h[7]"); - TEST_SINGLE(fmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 3), "fmla z30.s, z29.s, z7.s[3]"); + TEST_SINGLE(fmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z7, 7), "fmla z30.h, z29.h, z7.h[7]"); + TEST_SINGLE(fmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 3), "fmla z30.s, z29.s, z7.s[3]"); TEST_SINGLE(fmla(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z15, 1), "fmla z30.d, z29.d, z15.d[1]"); - TEST_SINGLE(fmls(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z7, 7), "fmls z30.h, z29.h, z7.h[7]"); - TEST_SINGLE(fmls(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 3), "fmls z30.s, z29.s, z7.s[3]"); + TEST_SINGLE(fmls(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z7, 7), "fmls z30.h, z29.h, z7.h[7]"); + TEST_SINGLE(fmls(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 3), "fmls z30.s, z29.s, z7.s[3]"); TEST_SINGLE(fmls(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z15, 1), "fmls z30.d, z29.d, z15.d[1]"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point complex multiply-add (indexed)") { - TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z10, ZReg::z7, 0, Rotation::ROTATE_0), - "fcmla z30.h, z10.h, z7.h[0], #0"); - TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z10, ZReg::z15, 0, Rotation::ROTATE_0), - "fcmla z30.s, z10.s, z15.s[0], #0"); - - TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z10, ZReg::z7, 1, Rotation::ROTATE_90), - "fcmla z30.h, z10.h, z7.h[1], #90"); - TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z10, ZReg::z15, 1, Rotation::ROTATE_90), - "fcmla z30.s, z10.s, z15.s[1], #90"); - TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z10, ZReg::z15, 1, Rotation::ROTATE_180), - "fcmla z30.s, z10.s, z15.s[1], #180"); - TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z10, ZReg::z15, 1, Rotation::ROTATE_270), - "fcmla z30.s, z10.s, z15.s[1], #270"); - - TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z10, ZReg::z7, 2, Rotation::ROTATE_180), - "fcmla z30.h, z10.h, z7.h[2], #180"); - TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z10, ZReg::z7, 3, Rotation::ROTATE_270), - "fcmla z30.h, z10.h, z7.h[3], #270"); + TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z10, ZReg::z7, 0, Rotation::ROTATE_0), "fcmla z30.h, z10.h, z7.h[0], #0"); + TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z10, ZReg::z15, 0, Rotation::ROTATE_0), "fcmla z30.s, z10.s, z15.s[0], #0"); + + TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z10, ZReg::z7, 1, Rotation::ROTATE_90), "fcmla z30.h, z10.h, z7.h[1], #90"); + TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z10, ZReg::z15, 1, Rotation::ROTATE_90), "fcmla z30.s, z10.s, z15.s[1], #90"); + TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z10, ZReg::z15, 1, Rotation::ROTATE_180), "fcmla z30.s, z10.s, z15.s[1], #180"); + TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z10, ZReg::z15, 1, Rotation::ROTATE_270), "fcmla z30.s, z10.s, z15.s[1], #270"); + + TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z10, ZReg::z7, 2, Rotation::ROTATE_180), "fcmla z30.h, z10.h, z7.h[2], #180"); + TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z10, ZReg::z7, 3, Rotation::ROTATE_270), "fcmla z30.h, z10.h, z7.h[3], #270"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point multiply (indexed)") { - TEST_SINGLE(fmul(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z7, 7), "fmul z30.h, z29.h, z7.h[7]"); - TEST_SINGLE(fmul(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 3), "fmul z30.s, z29.s, z7.s[3]"); + TEST_SINGLE(fmul(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z7, 7), "fmul z30.h, z29.h, z7.h[7]"); + TEST_SINGLE(fmul(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 3), "fmul z30.s, z29.s, z7.s[3]"); TEST_SINGLE(fmul(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z15, 1), "fmul z30.d, z29.d, z15.d[1]"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating point matrix multiply accumulate") { @@ -451,41 +438,41 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point compare vec TEST_SINGLE(faclt(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "facgt p6.d, p5/z, z29.d, z30.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point arithmetic (unpredicated)") { - //TEST_SINGLE(fadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fadd z30.b, z29.b, z28.b"); - TEST_SINGLE(fadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fadd z30.h, z29.h, z28.h"); - TEST_SINGLE(fadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fadd z30.s, z29.s, z28.s"); - TEST_SINGLE(fadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fadd z30.d, z29.d, z28.d"); - //TEST_SINGLE(fadd(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fadd z30.q, z29.q, z28.q"); - - //TEST_SINGLE(fsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fsub z30.b, z29.b, z28.b"); - TEST_SINGLE(fsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fsub z30.h, z29.h, z28.h"); - TEST_SINGLE(fsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fsub z30.s, z29.s, z28.s"); - TEST_SINGLE(fsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fsub z30.d, z29.d, z28.d"); - //TEST_SINGLE(fsub(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fsub z30.q, z29.q, z28.q"); - - //TEST_SINGLE(fmul(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmul z30.b, z29.b, z28.b"); - TEST_SINGLE(fmul(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmul z30.h, z29.h, z28.h"); - TEST_SINGLE(fmul(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmul z30.s, z29.s, z28.s"); - TEST_SINGLE(fmul(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmul z30.d, z29.d, z28.d"); - //TEST_SINGLE(fmul(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmul z30.q, z29.q, z28.q"); - - //TEST_SINGLE(ftsmul(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ftsmul z30.b, z29.b, z28.b"); - TEST_SINGLE(ftsmul(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ftsmul z30.h, z29.h, z28.h"); - TEST_SINGLE(ftsmul(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ftsmul z30.s, z29.s, z28.s"); - TEST_SINGLE(ftsmul(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ftsmul z30.d, z29.d, z28.d"); - //TEST_SINGLE(ftsmul(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ftsmul z30.q, z29.q, z28.q"); - - //TEST_SINGLE(frecps(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frecps z30.b, z29.b, z28.b"); - TEST_SINGLE(frecps(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frecps z30.h, z29.h, z28.h"); - TEST_SINGLE(frecps(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frecps z30.s, z29.s, z28.s"); - TEST_SINGLE(frecps(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frecps z30.d, z29.d, z28.d"); - //TEST_SINGLE(frecps(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frecps z30.q, z29.q, z28.q"); - - //TEST_SINGLE(frsqrts(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frsqrts z30.b, z29.b, z28.b"); - TEST_SINGLE(frsqrts(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frsqrts z30.h, z29.h, z28.h"); - TEST_SINGLE(frsqrts(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frsqrts z30.s, z29.s, z28.s"); - TEST_SINGLE(frsqrts(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frsqrts z30.d, z29.d, z28.d"); - //TEST_SINGLE(frsqrts(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frsqrts z30.q, z29.q, z28.q"); + // TEST_SINGLE(fadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fadd z30.b, z29.b, z28.b"); + TEST_SINGLE(fadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fadd z30.h, z29.h, z28.h"); + TEST_SINGLE(fadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fadd z30.s, z29.s, z28.s"); + TEST_SINGLE(fadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fadd z30.d, z29.d, z28.d"); + // TEST_SINGLE(fadd(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fadd z30.q, z29.q, z28.q"); + + // TEST_SINGLE(fsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fsub z30.b, z29.b, z28.b"); + TEST_SINGLE(fsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fsub z30.h, z29.h, z28.h"); + TEST_SINGLE(fsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fsub z30.s, z29.s, z28.s"); + TEST_SINGLE(fsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fsub z30.d, z29.d, z28.d"); + // TEST_SINGLE(fsub(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fsub z30.q, z29.q, z28.q"); + + // TEST_SINGLE(fmul(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmul z30.b, z29.b, z28.b"); + TEST_SINGLE(fmul(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmul z30.h, z29.h, z28.h"); + TEST_SINGLE(fmul(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmul z30.s, z29.s, z28.s"); + TEST_SINGLE(fmul(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmul z30.d, z29.d, z28.d"); + // TEST_SINGLE(fmul(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmul z30.q, z29.q, z28.q"); + + // TEST_SINGLE(ftsmul(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ftsmul z30.b, z29.b, z28.b"); + TEST_SINGLE(ftsmul(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ftsmul z30.h, z29.h, z28.h"); + TEST_SINGLE(ftsmul(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ftsmul z30.s, z29.s, z28.s"); + TEST_SINGLE(ftsmul(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ftsmul z30.d, z29.d, z28.d"); + // TEST_SINGLE(ftsmul(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ftsmul z30.q, z29.q, z28.q"); + + // TEST_SINGLE(frecps(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frecps z30.b, z29.b, z28.b"); + TEST_SINGLE(frecps(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frecps z30.h, z29.h, z28.h"); + TEST_SINGLE(frecps(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frecps z30.s, z29.s, z28.s"); + TEST_SINGLE(frecps(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frecps z30.d, z29.d, z28.d"); + // TEST_SINGLE(frecps(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frecps z30.q, z29.q, z28.q"); + + // TEST_SINGLE(frsqrts(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frsqrts z30.b, z29.b, z28.b"); + TEST_SINGLE(frsqrts(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frsqrts z30.h, z29.h, z28.h"); + TEST_SINGLE(frsqrts(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frsqrts z30.s, z29.s, z28.s"); + TEST_SINGLE(frsqrts(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frsqrts z30.d, z29.d, z28.d"); + // TEST_SINGLE(frsqrts(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frsqrts z30.q, z29.q, z28.q"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point recursive reduction") { TEST_SINGLE(faddv(SubRegSize::i16Bit, VReg::v30, PReg::p7, ZReg::z28), "faddv h30, p7, z28.h"); @@ -510,41 +497,41 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point recursive r } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer multiply-accumulate writing addend (predicated)") { - TEST_SINGLE(mla(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mla z30.b, p7/m, z28.b, z29.b"); + TEST_SINGLE(mla(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mla z30.b, p7/m, z28.b, z29.b"); TEST_SINGLE(mla(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mla z30.h, p7/m, z28.h, z29.h"); TEST_SINGLE(mla(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mla z30.s, p7/m, z28.s, z29.s"); TEST_SINGLE(mla(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mla z30.d, p7/m, z28.d, z29.d"); - TEST_SINGLE(mls(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mls z30.b, p7/m, z28.b, z29.b"); + TEST_SINGLE(mls(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mls z30.b, p7/m, z28.b, z29.b"); TEST_SINGLE(mls(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mls z30.h, p7/m, z28.h, z29.h"); TEST_SINGLE(mls(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mls z30.s, p7/m, z28.s, z29.s"); TEST_SINGLE(mls(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mls z30.d, p7/m, z28.d, z29.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer multiply-add writing multiplicand (predicated)") { - TEST_SINGLE(mad(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mad z30.b, p7/m, z28.b, z29.b"); + TEST_SINGLE(mad(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mad z30.b, p7/m, z28.b, z29.b"); TEST_SINGLE(mad(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mad z30.h, p7/m, z28.h, z29.h"); TEST_SINGLE(mad(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mad z30.s, p7/m, z28.s, z29.s"); TEST_SINGLE(mad(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mad z30.d, p7/m, z28.d, z29.d"); - TEST_SINGLE(msb(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "msb z30.b, p7/m, z28.b, z29.b"); + TEST_SINGLE(msb(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "msb z30.b, p7/m, z28.b, z29.b"); TEST_SINGLE(msb(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "msb z30.h, p7/m, z28.h, z29.h"); TEST_SINGLE(msb(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "msb z30.s, p7/m, z28.s, z29.s"); TEST_SINGLE(msb(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "msb z30.d, p7/m, z28.d, z29.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer add/subtract vectors (predicated)") { - TEST_SINGLE(add(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "add z30.b, p7/m, z30.b, z28.b"); + TEST_SINGLE(add(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "add z30.b, p7/m, z30.b, z28.b"); TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "add z30.h, p7/m, z30.h, z28.h"); TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "add z30.s, p7/m, z30.s, z28.s"); TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "add z30.d, p7/m, z30.d, z28.d"); - TEST_SINGLE(sub(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sub z30.b, p7/m, z30.b, z28.b"); + TEST_SINGLE(sub(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sub z30.b, p7/m, z30.b, z28.b"); TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sub z30.h, p7/m, z30.h, z28.h"); TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sub z30.s, p7/m, z30.s, z28.s"); TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sub z30.d, p7/m, z30.d, z28.d"); - TEST_SINGLE(subr(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "subr z30.b, p7/m, z30.b, z28.b"); + TEST_SINGLE(subr(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "subr z30.b, p7/m, z30.b, z28.b"); TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "subr z30.h, p7/m, z30.h, z28.h"); TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "subr z30.s, p7/m, z30.s, z28.s"); TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "subr z30.d, p7/m, z30.d, z28.d"); @@ -583,136 +570,116 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer min/max/difference } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer multiply vectors (predicated)") { - TEST_SINGLE(mul(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "mul z30.b, p7/m, z30.b, z29.b"); - TEST_SINGLE(mul(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "mul z30.h, p7/m, z30.h, z29.h"); - TEST_SINGLE(mul(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "mul z30.s, p7/m, z30.s, z29.s"); - TEST_SINGLE(mul(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "mul z30.d, p7/m, z30.d, z29.d"); - - TEST_SINGLE(smulh(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "smulh z30.b, p7/m, z30.b, z29.b"); - TEST_SINGLE(smulh(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "smulh z30.h, p7/m, z30.h, z29.h"); - TEST_SINGLE(smulh(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "smulh z30.s, p7/m, z30.s, z29.s"); - TEST_SINGLE(smulh(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "smulh z30.d, p7/m, z30.d, z29.d"); - - TEST_SINGLE(umulh(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "umulh z30.b, p7/m, z30.b, z29.b"); - TEST_SINGLE(umulh(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "umulh z30.h, p7/m, z30.h, z29.h"); - TEST_SINGLE(umulh(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "umulh z30.s, p7/m, z30.s, z29.s"); - TEST_SINGLE(umulh(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "umulh z30.d, p7/m, z30.d, z29.d"); + TEST_SINGLE(mul(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "mul z30.b, p7/m, z30.b, z29.b"); + TEST_SINGLE(mul(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "mul z30.h, p7/m, z30.h, z29.h"); + TEST_SINGLE(mul(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "mul z30.s, p7/m, z30.s, z29.s"); + TEST_SINGLE(mul(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "mul z30.d, p7/m, z30.d, z29.d"); + + TEST_SINGLE(smulh(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "smulh z30.b, p7/m, z30.b, z29.b"); + TEST_SINGLE(smulh(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "smulh z30.h, p7/m, z30.h, z29.h"); + TEST_SINGLE(smulh(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "smulh z30.s, p7/m, z30.s, z29.s"); + TEST_SINGLE(smulh(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "smulh z30.d, p7/m, z30.d, z29.d"); + + TEST_SINGLE(umulh(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "umulh z30.b, p7/m, z30.b, z29.b"); + TEST_SINGLE(umulh(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "umulh z30.h, p7/m, z30.h, z29.h"); + TEST_SINGLE(umulh(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "umulh z30.s, p7/m, z30.s, z29.s"); + TEST_SINGLE(umulh(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "umulh z30.d, p7/m, z30.d, z29.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer divide vectors (predicated)") { - TEST_SINGLE(sdiv(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "sdiv z30.s, p7/m, z30.s, z29.s"); - TEST_SINGLE(sdiv(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "sdiv z30.d, p7/m, z30.d, z29.d"); + TEST_SINGLE(sdiv(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "sdiv z30.s, p7/m, z30.s, z29.s"); + TEST_SINGLE(sdiv(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "sdiv z30.d, p7/m, z30.d, z29.d"); - TEST_SINGLE(udiv(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "udiv z30.s, p7/m, z30.s, z29.s"); - TEST_SINGLE(udiv(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "udiv z30.d, p7/m, z30.d, z29.d"); + TEST_SINGLE(udiv(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "udiv z30.s, p7/m, z30.s, z29.s"); + TEST_SINGLE(udiv(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "udiv z30.d, p7/m, z30.d, z29.d"); - TEST_SINGLE(sdivr(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "sdivr z30.s, p7/m, z30.s, z29.s"); - TEST_SINGLE(sdivr(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "sdivr z30.d, p7/m, z30.d, z29.d"); + TEST_SINGLE(sdivr(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "sdivr z30.s, p7/m, z30.s, z29.s"); + TEST_SINGLE(sdivr(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "sdivr z30.d, p7/m, z30.d, z29.d"); - TEST_SINGLE(udivr(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "udivr z30.s, p7/m, z30.s, z29.s"); - TEST_SINGLE(udivr(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), - "udivr z30.d, p7/m, z30.d, z29.d"); + TEST_SINGLE(udivr(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "udivr z30.s, p7/m, z30.s, z29.s"); + TEST_SINGLE(udivr(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "udivr z30.d, p7/m, z30.d, z29.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise logical operations (predicated)") { - TEST_SINGLE(orr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "orr z30.b, p6/m, z30.b, z29.b"); - TEST_SINGLE(orr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "orr z30.h, p6/m, z30.h, z29.h"); - TEST_SINGLE(orr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "orr z30.s, p6/m, z30.s, z29.s"); - TEST_SINGLE(orr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "orr z30.d, p6/m, z30.d, z29.d"); - //TEST_SINGLE(orr(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "orr z30.q, p6/m, z30.q, z29.q"); - - TEST_SINGLE(eor(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "eor z30.b, p6/m, z30.b, z29.b"); - TEST_SINGLE(eor(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "eor z30.h, p6/m, z30.h, z29.h"); - TEST_SINGLE(eor(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "eor z30.s, p6/m, z30.s, z29.s"); - TEST_SINGLE(eor(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "eor z30.d, p6/m, z30.d, z29.d"); - //TEST_SINGLE(eor(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "eor z30.q, p6/m, z30.q, z29.q"); - - TEST_SINGLE(and_(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "and z30.b, p6/m, z30.b, z29.b"); - TEST_SINGLE(and_(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "and z30.h, p6/m, z30.h, z29.h"); - TEST_SINGLE(and_(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "and z30.s, p6/m, z30.s, z29.s"); - TEST_SINGLE(and_(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "and z30.d, p6/m, z30.d, z29.d"); - //TEST_SINGLE(and_(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "and z30.q, p6/m, z30.q, z29.q"); - - TEST_SINGLE(bic(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "bic z30.b, p6/m, z30.b, z29.b"); - TEST_SINGLE(bic(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "bic z30.h, p6/m, z30.h, z29.h"); - TEST_SINGLE(bic(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "bic z30.s, p6/m, z30.s, z29.s"); - TEST_SINGLE(bic(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "bic z30.d, p6/m, z30.d, z29.d"); - //TEST_SINGLE(bic(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "bic z30.q, p6/m, z30.q, z29.q"); + TEST_SINGLE(orr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "orr z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(orr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "orr z30.h, p6/m, z30.h, z29.h"); + TEST_SINGLE(orr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "orr z30.s, p6/m, z30.s, z29.s"); + TEST_SINGLE(orr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "orr z30.d, p6/m, z30.d, z29.d"); + // TEST_SINGLE(orr(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "orr z30.q, p6/m, z30.q, z29.q"); + + TEST_SINGLE(eor(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "eor z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(eor(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "eor z30.h, p6/m, z30.h, z29.h"); + TEST_SINGLE(eor(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "eor z30.s, p6/m, z30.s, z29.s"); + TEST_SINGLE(eor(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "eor z30.d, p6/m, z30.d, z29.d"); + // TEST_SINGLE(eor(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "eor z30.q, p6/m, z30.q, z29.q"); + + TEST_SINGLE(and_(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "and z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(and_(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "and z30.h, p6/m, z30.h, z29.h"); + TEST_SINGLE(and_(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "and z30.s, p6/m, z30.s, z29.s"); + TEST_SINGLE(and_(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "and z30.d, p6/m, z30.d, z29.d"); + // TEST_SINGLE(and_(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "and z30.q, p6/m, z30.q, z29.q"); + + TEST_SINGLE(bic(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "bic z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(bic(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "bic z30.h, p6/m, z30.h, z29.h"); + TEST_SINGLE(bic(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "bic z30.s, p6/m, z30.s, z29.s"); + TEST_SINGLE(bic(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "bic z30.d, p6/m, z30.d, z29.d"); + // TEST_SINGLE(bic(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "bic z30.q, p6/m, z30.q, z29.q"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer add reduction (predicated)") { - TEST_SINGLE(saddv(SubRegSize::i8Bit, DReg::d30, PReg::p7, ZReg::z29), "saddv d30, p7, z29.b"); + TEST_SINGLE(saddv(SubRegSize::i8Bit, DReg::d30, PReg::p7, ZReg::z29), "saddv d30, p7, z29.b"); TEST_SINGLE(saddv(SubRegSize::i16Bit, DReg::d30, PReg::p7, ZReg::z29), "saddv d30, p7, z29.h"); TEST_SINGLE(saddv(SubRegSize::i32Bit, DReg::d30, PReg::p7, ZReg::z29), "saddv d30, p7, z29.s"); - TEST_SINGLE(uaddv(SubRegSize::i8Bit, DReg::d30, PReg::p7, ZReg::z29), "uaddv d30, p7, z29.b"); + TEST_SINGLE(uaddv(SubRegSize::i8Bit, DReg::d30, PReg::p7, ZReg::z29), "uaddv d30, p7, z29.b"); TEST_SINGLE(uaddv(SubRegSize::i16Bit, DReg::d30, PReg::p7, ZReg::z29), "uaddv d30, p7, z29.h"); TEST_SINGLE(uaddv(SubRegSize::i32Bit, DReg::d30, PReg::p7, ZReg::z29), "uaddv d30, p7, z29.s"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer min/max reduction (predicated)") { - TEST_SINGLE(smaxv(SubRegSize::i8Bit, VReg::v30, PReg::p6, ZReg::z29), "smaxv b30, p6, z29.b"); - TEST_SINGLE(smaxv(SubRegSize::i16Bit, VReg::v30, PReg::p6, ZReg::z29), "smaxv h30, p6, z29.h"); - TEST_SINGLE(smaxv(SubRegSize::i32Bit, VReg::v30, PReg::p6, ZReg::z29), "smaxv s30, p6, z29.s"); - TEST_SINGLE(smaxv(SubRegSize::i64Bit, VReg::v30, PReg::p6, ZReg::z29), "smaxv d30, p6, z29.d"); - - TEST_SINGLE(umaxv(SubRegSize::i8Bit, VReg::v30, PReg::p6, ZReg::z29), "umaxv b30, p6, z29.b"); - TEST_SINGLE(umaxv(SubRegSize::i16Bit, VReg::v30, PReg::p6, ZReg::z29), "umaxv h30, p6, z29.h"); - TEST_SINGLE(umaxv(SubRegSize::i32Bit, VReg::v30, PReg::p6, ZReg::z29), "umaxv s30, p6, z29.s"); - TEST_SINGLE(umaxv(SubRegSize::i64Bit, VReg::v30, PReg::p6, ZReg::z29), "umaxv d30, p6, z29.d"); - - TEST_SINGLE(sminv(SubRegSize::i8Bit, VReg::v30, PReg::p6, ZReg::z29), "sminv b30, p6, z29.b"); - TEST_SINGLE(sminv(SubRegSize::i16Bit, VReg::v30, PReg::p6, ZReg::z29), "sminv h30, p6, z29.h"); - TEST_SINGLE(sminv(SubRegSize::i32Bit, VReg::v30, PReg::p6, ZReg::z29), "sminv s30, p6, z29.s"); - TEST_SINGLE(sminv(SubRegSize::i64Bit, VReg::v30, PReg::p6, ZReg::z29), "sminv d30, p6, z29.d"); - - TEST_SINGLE(uminv(SubRegSize::i8Bit, VReg::v30, PReg::p6, ZReg::z29), "uminv b30, p6, z29.b"); - TEST_SINGLE(uminv(SubRegSize::i16Bit, VReg::v30, PReg::p6, ZReg::z29), "uminv h30, p6, z29.h"); - TEST_SINGLE(uminv(SubRegSize::i32Bit, VReg::v30, PReg::p6, ZReg::z29), "uminv s30, p6, z29.s"); - TEST_SINGLE(uminv(SubRegSize::i64Bit, VReg::v30, PReg::p6, ZReg::z29), "uminv d30, p6, z29.d"); + TEST_SINGLE(smaxv(SubRegSize::i8Bit, VReg::v30, PReg::p6, ZReg::z29), "smaxv b30, p6, z29.b"); + TEST_SINGLE(smaxv(SubRegSize::i16Bit, VReg::v30, PReg::p6, ZReg::z29), "smaxv h30, p6, z29.h"); + TEST_SINGLE(smaxv(SubRegSize::i32Bit, VReg::v30, PReg::p6, ZReg::z29), "smaxv s30, p6, z29.s"); + TEST_SINGLE(smaxv(SubRegSize::i64Bit, VReg::v30, PReg::p6, ZReg::z29), "smaxv d30, p6, z29.d"); + + TEST_SINGLE(umaxv(SubRegSize::i8Bit, VReg::v30, PReg::p6, ZReg::z29), "umaxv b30, p6, z29.b"); + TEST_SINGLE(umaxv(SubRegSize::i16Bit, VReg::v30, PReg::p6, ZReg::z29), "umaxv h30, p6, z29.h"); + TEST_SINGLE(umaxv(SubRegSize::i32Bit, VReg::v30, PReg::p6, ZReg::z29), "umaxv s30, p6, z29.s"); + TEST_SINGLE(umaxv(SubRegSize::i64Bit, VReg::v30, PReg::p6, ZReg::z29), "umaxv d30, p6, z29.d"); + + TEST_SINGLE(sminv(SubRegSize::i8Bit, VReg::v30, PReg::p6, ZReg::z29), "sminv b30, p6, z29.b"); + TEST_SINGLE(sminv(SubRegSize::i16Bit, VReg::v30, PReg::p6, ZReg::z29), "sminv h30, p6, z29.h"); + TEST_SINGLE(sminv(SubRegSize::i32Bit, VReg::v30, PReg::p6, ZReg::z29), "sminv s30, p6, z29.s"); + TEST_SINGLE(sminv(SubRegSize::i64Bit, VReg::v30, PReg::p6, ZReg::z29), "sminv d30, p6, z29.d"); + + TEST_SINGLE(uminv(SubRegSize::i8Bit, VReg::v30, PReg::p6, ZReg::z29), "uminv b30, p6, z29.b"); + TEST_SINGLE(uminv(SubRegSize::i16Bit, VReg::v30, PReg::p6, ZReg::z29), "uminv h30, p6, z29.h"); + TEST_SINGLE(uminv(SubRegSize::i32Bit, VReg::v30, PReg::p6, ZReg::z29), "uminv s30, p6, z29.s"); + TEST_SINGLE(uminv(SubRegSize::i64Bit, VReg::v30, PReg::p6, ZReg::z29), "uminv d30, p6, z29.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE constructive prefix (predicated)") { - TEST_SINGLE(movprfx(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "movprfx z30.b, p6/m, z29.b"); - TEST_SINGLE(movprfx(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "movprfx z30.h, p6/m, z29.h"); - TEST_SINGLE(movprfx(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "movprfx z30.s, p6/m, z29.s"); - TEST_SINGLE(movprfx(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "movprfx z30.d, p6/m, z29.d"); - //TEST_SINGLE(movprfx(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "movprfx z30.q, p6/m, z29.q"); - TEST_SINGLE(movprfx(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29), "movprfx z30.b, p6/z, z29.b"); - TEST_SINGLE(movprfx(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29), "movprfx z30.h, p6/z, z29.h"); - TEST_SINGLE(movprfx(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29), "movprfx z30.s, p6/z, z29.s"); - TEST_SINGLE(movprfx(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29), "movprfx z30.d, p6/z, z29.d"); - //TEST_SINGLE(movprfx(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29), "movprfx z30.q, p6/z, z29.q"); + TEST_SINGLE(movprfx(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "movprfx z30.b, p6/m, z29.b"); + TEST_SINGLE(movprfx(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "movprfx z30.h, p6/m, z29.h"); + TEST_SINGLE(movprfx(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "movprfx z30.s, p6/m, z29.s"); + TEST_SINGLE(movprfx(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "movprfx z30.d, p6/m, z29.d"); + // TEST_SINGLE(movprfx(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "movprfx z30.q, p6/m, z29.q"); + TEST_SINGLE(movprfx(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29), "movprfx z30.b, p6/z, z29.b"); + TEST_SINGLE(movprfx(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29), "movprfx z30.h, p6/z, z29.h"); + TEST_SINGLE(movprfx(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29), "movprfx z30.s, p6/z, z29.s"); + TEST_SINGLE(movprfx(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29), "movprfx z30.d, p6/z, z29.d"); + // TEST_SINGLE(movprfx(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29), "movprfx z30.q, p6/z, z29.q"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise logical reduction (predicated)") { - TEST_SINGLE(orv(SubRegSize::i8Bit, VReg::v30, PReg::p7, ZReg::z29), "orv b30, p7, z29.b"); + TEST_SINGLE(orv(SubRegSize::i8Bit, VReg::v30, PReg::p7, ZReg::z29), "orv b30, p7, z29.b"); TEST_SINGLE(orv(SubRegSize::i16Bit, VReg::v30, PReg::p7, ZReg::z29), "orv h30, p7, z29.h"); TEST_SINGLE(orv(SubRegSize::i32Bit, VReg::v30, PReg::p7, ZReg::z29), "orv s30, p7, z29.s"); TEST_SINGLE(orv(SubRegSize::i64Bit, VReg::v30, PReg::p7, ZReg::z29), "orv d30, p7, z29.d"); - TEST_SINGLE(eorv(SubRegSize::i8Bit, VReg::v30, PReg::p7, ZReg::z29), "eorv b30, p7, z29.b"); + TEST_SINGLE(eorv(SubRegSize::i8Bit, VReg::v30, PReg::p7, ZReg::z29), "eorv b30, p7, z29.b"); TEST_SINGLE(eorv(SubRegSize::i16Bit, VReg::v30, PReg::p7, ZReg::z29), "eorv h30, p7, z29.h"); TEST_SINGLE(eorv(SubRegSize::i32Bit, VReg::v30, PReg::p7, ZReg::z29), "eorv s30, p7, z29.s"); TEST_SINGLE(eorv(SubRegSize::i64Bit, VReg::v30, PReg::p7, ZReg::z29), "eorv d30, p7, z29.d"); - TEST_SINGLE(andv(SubRegSize::i8Bit, VReg::v30, PReg::p7, ZReg::z29), "andv b30, p7, z29.b"); + TEST_SINGLE(andv(SubRegSize::i8Bit, VReg::v30, PReg::p7, ZReg::z29), "andv b30, p7, z29.b"); TEST_SINGLE(andv(SubRegSize::i16Bit, VReg::v30, PReg::p7, ZReg::z29), "andv h30, p7, z29.h"); TEST_SINGLE(andv(SubRegSize::i32Bit, VReg::v30, PReg::p7, ZReg::z29), "andv s30, p7, z29.s"); TEST_SINGLE(andv(SubRegSize::i64Bit, VReg::v30, PReg::p7, ZReg::z29), "andv d30, p7, z29.d"); @@ -800,140 +767,140 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise shift by immediate TEST_SINGLE(sqshlu(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 63), "sqshlu z30.d, p6/m, z30.d, #63"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise shift by vector (predicated)") { - TEST_SINGLE(asr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asr z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(asr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asr z30.b, p6/m, z30.b, z29.b"); TEST_SINGLE(asr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asr z30.h, p6/m, z30.h, z29.h"); TEST_SINGLE(asr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asr z30.s, p6/m, z30.s, z29.s"); TEST_SINGLE(asr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asr z30.d, p6/m, z30.d, z29.d"); - TEST_SINGLE(lsr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsr z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(lsr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsr z30.b, p6/m, z30.b, z29.b"); TEST_SINGLE(lsr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsr z30.h, p6/m, z30.h, z29.h"); TEST_SINGLE(lsr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsr z30.s, p6/m, z30.s, z29.s"); TEST_SINGLE(lsr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsr z30.d, p6/m, z30.d, z29.d"); - TEST_SINGLE(lsl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsl z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(lsl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsl z30.b, p6/m, z30.b, z29.b"); TEST_SINGLE(lsl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsl z30.h, p6/m, z30.h, z29.h"); TEST_SINGLE(lsl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsl z30.s, p6/m, z30.s, z29.s"); TEST_SINGLE(lsl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsl z30.d, p6/m, z30.d, z29.d"); - TEST_SINGLE(asrr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asrr z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(asrr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asrr z30.b, p6/m, z30.b, z29.b"); TEST_SINGLE(asrr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asrr z30.h, p6/m, z30.h, z29.h"); TEST_SINGLE(asrr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asrr z30.s, p6/m, z30.s, z29.s"); TEST_SINGLE(asrr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asrr z30.d, p6/m, z30.d, z29.d"); - TEST_SINGLE(lsrr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsrr z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(lsrr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsrr z30.b, p6/m, z30.b, z29.b"); TEST_SINGLE(lsrr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsrr z30.h, p6/m, z30.h, z29.h"); TEST_SINGLE(lsrr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsrr z30.s, p6/m, z30.s, z29.s"); TEST_SINGLE(lsrr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsrr z30.d, p6/m, z30.d, z29.d"); - TEST_SINGLE(lslr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lslr z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(lslr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lslr z30.b, p6/m, z30.b, z29.b"); TEST_SINGLE(lslr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lslr z30.h, p6/m, z30.h, z29.h"); TEST_SINGLE(lslr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lslr z30.s, p6/m, z30.s, z29.s"); TEST_SINGLE(lslr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lslr z30.d, p6/m, z30.d, z29.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise shift by wide elements (predicated)") { - TEST_SINGLE(asr_wide(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "asr z30.b, p7/m, z30.b, z29.d"); + TEST_SINGLE(asr_wide(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "asr z30.b, p7/m, z30.b, z29.d"); TEST_SINGLE(asr_wide(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "asr z30.h, p7/m, z30.h, z29.d"); TEST_SINGLE(asr_wide(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "asr z30.s, p7/m, z30.s, z29.d"); - TEST_SINGLE(lsr_wide(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "lsr z30.b, p7/m, z30.b, z29.d"); + TEST_SINGLE(lsr_wide(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "lsr z30.b, p7/m, z30.b, z29.d"); TEST_SINGLE(lsr_wide(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "lsr z30.h, p7/m, z30.h, z29.d"); TEST_SINGLE(lsr_wide(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "lsr z30.s, p7/m, z30.s, z29.d"); - TEST_SINGLE(lsl_wide(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "lsl z30.b, p7/m, z30.b, z29.d"); + TEST_SINGLE(lsl_wide(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "lsl z30.b, p7/m, z30.b, z29.d"); TEST_SINGLE(lsl_wide(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "lsl z30.h, p7/m, z30.h, z29.d"); TEST_SINGLE(lsl_wide(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "lsl z30.s, p7/m, z30.s, z29.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer unary operations (predicated)") { - //TEST_SINGLE(sxtb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtb z30.b, p6/m, z29.b"); - TEST_SINGLE(sxtb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtb z30.h, p6/m, z29.h"); - TEST_SINGLE(sxtb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtb z30.s, p6/m, z29.s"); - TEST_SINGLE(sxtb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtb z30.d, p6/m, z29.d"); - //TEST_SINGLE(sxtb(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtb z30.q, p6/m, z29.q"); - - //TEST_SINGLE(uxtb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtb z30.b, p6/m, z29.b"); - TEST_SINGLE(uxtb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtb z30.h, p6/m, z29.h"); - TEST_SINGLE(uxtb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtb z30.s, p6/m, z29.s"); - TEST_SINGLE(uxtb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtb z30.d, p6/m, z29.d"); - //TEST_SINGLE(uxtb(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtb z30.q, p6/m, z29.q"); - - //TEST_SINGLE(sxth(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxth z30.b, p6/m, z29.b"); - //TEST_SINGLE(sxth(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxth z30.h, p6/m, z29.h"); - TEST_SINGLE(sxth(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxth z30.s, p6/m, z29.s"); - TEST_SINGLE(sxth(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxth z30.d, p6/m, z29.d"); - //TEST_SINGLE(sxth(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxth z30.q, p6/m, z29.q"); - - //TEST_SINGLE(uxth(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxth z30.b, p6/m, z29.b"); - //TEST_SINGLE(uxth(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxth z30.h, p6/m, z29.h"); - TEST_SINGLE(uxth(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxth z30.s, p6/m, z29.s"); - TEST_SINGLE(uxth(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxth z30.d, p6/m, z29.d"); - //TEST_SINGLE(uxth(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxth z30.q, p6/m, z29.q"); - - //TEST_SINGLE(sxtw(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtw z30.b, p6/m, z29.b"); - //TEST_SINGLE(sxtw(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtw z30.h, p6/m, z29.h"); - //TEST_SINGLE(sxtw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtw z30.s, p6/m, z29.s"); - TEST_SINGLE(sxtw(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtw z30.d, p6/m, z29.d"); - //TEST_SINGLE(sxtw(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtw z30.q, p6/m, z29.q"); - - //TEST_SINGLE(uxtw(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtw z30.b, p6/m, z29.b"); - //TEST_SINGLE(uxtw(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtw z30.h, p6/m, z29.h"); - //TEST_SINGLE(uxtw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtw z30.s, p6/m, z29.s"); - TEST_SINGLE(uxtw(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtw z30.d, p6/m, z29.d"); - //TEST_SINGLE(uxtw(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtw z30.q, p6/m, z29.q"); - - TEST_SINGLE(abs(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "abs z30.b, p6/m, z29.b"); - TEST_SINGLE(abs(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "abs z30.h, p6/m, z29.h"); - TEST_SINGLE(abs(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "abs z30.s, p6/m, z29.s"); - TEST_SINGLE(abs(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "abs z30.d, p6/m, z29.d"); - //TEST_SINGLE(abs(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "abs z30.q, p6/m, z29.q"); - - TEST_SINGLE(neg(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "neg z30.b, p6/m, z29.b"); - TEST_SINGLE(neg(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "neg z30.h, p6/m, z29.h"); - TEST_SINGLE(neg(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "neg z30.s, p6/m, z29.s"); - TEST_SINGLE(neg(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "neg z30.d, p6/m, z29.d"); - //TEST_SINGLE(neg(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "neg z30.q, p6/m, z29.q"); + // TEST_SINGLE(sxtb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtb z30.b, p6/m, z29.b"); + TEST_SINGLE(sxtb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtb z30.h, p6/m, z29.h"); + TEST_SINGLE(sxtb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtb z30.s, p6/m, z29.s"); + TEST_SINGLE(sxtb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtb z30.d, p6/m, z29.d"); + // TEST_SINGLE(sxtb(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtb z30.q, p6/m, z29.q"); + + // TEST_SINGLE(uxtb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtb z30.b, p6/m, z29.b"); + TEST_SINGLE(uxtb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtb z30.h, p6/m, z29.h"); + TEST_SINGLE(uxtb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtb z30.s, p6/m, z29.s"); + TEST_SINGLE(uxtb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtb z30.d, p6/m, z29.d"); + // TEST_SINGLE(uxtb(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtb z30.q, p6/m, z29.q"); + + // TEST_SINGLE(sxth(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxth z30.b, p6/m, z29.b"); + // TEST_SINGLE(sxth(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxth z30.h, p6/m, z29.h"); + TEST_SINGLE(sxth(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxth z30.s, p6/m, z29.s"); + TEST_SINGLE(sxth(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxth z30.d, p6/m, z29.d"); + // TEST_SINGLE(sxth(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxth z30.q, p6/m, z29.q"); + + // TEST_SINGLE(uxth(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxth z30.b, p6/m, z29.b"); + // TEST_SINGLE(uxth(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxth z30.h, p6/m, z29.h"); + TEST_SINGLE(uxth(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxth z30.s, p6/m, z29.s"); + TEST_SINGLE(uxth(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxth z30.d, p6/m, z29.d"); + // TEST_SINGLE(uxth(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxth z30.q, p6/m, z29.q"); + + // TEST_SINGLE(sxtw(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtw z30.b, p6/m, z29.b"); + // TEST_SINGLE(sxtw(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtw z30.h, p6/m, z29.h"); + // TEST_SINGLE(sxtw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtw z30.s, p6/m, z29.s"); + TEST_SINGLE(sxtw(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtw z30.d, p6/m, z29.d"); + // TEST_SINGLE(sxtw(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtw z30.q, p6/m, z29.q"); + + // TEST_SINGLE(uxtw(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtw z30.b, p6/m, z29.b"); + // TEST_SINGLE(uxtw(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtw z30.h, p6/m, z29.h"); + // TEST_SINGLE(uxtw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtw z30.s, p6/m, z29.s"); + TEST_SINGLE(uxtw(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtw z30.d, p6/m, z29.d"); + // TEST_SINGLE(uxtw(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtw z30.q, p6/m, z29.q"); + + TEST_SINGLE(abs(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "abs z30.b, p6/m, z29.b"); + TEST_SINGLE(abs(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "abs z30.h, p6/m, z29.h"); + TEST_SINGLE(abs(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "abs z30.s, p6/m, z29.s"); + TEST_SINGLE(abs(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "abs z30.d, p6/m, z29.d"); + // TEST_SINGLE(abs(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "abs z30.q, p6/m, z29.q"); + + TEST_SINGLE(neg(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "neg z30.b, p6/m, z29.b"); + TEST_SINGLE(neg(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "neg z30.h, p6/m, z29.h"); + TEST_SINGLE(neg(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "neg z30.s, p6/m, z29.s"); + TEST_SINGLE(neg(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "neg z30.d, p6/m, z29.d"); + // TEST_SINGLE(neg(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "neg z30.q, p6/m, z29.q"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise unary operations (predicated)") { - TEST_SINGLE(cls(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cls z30.b, p6/m, z29.b"); - TEST_SINGLE(cls(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cls z30.h, p6/m, z29.h"); - TEST_SINGLE(cls(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cls z30.s, p6/m, z29.s"); - TEST_SINGLE(cls(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cls z30.d, p6/m, z29.d"); - //TEST_SINGLE(cls(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cls z30.q, p6/m, z29.q"); - - TEST_SINGLE(clz(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "clz z30.b, p6/m, z29.b"); - TEST_SINGLE(clz(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "clz z30.h, p6/m, z29.h"); - TEST_SINGLE(clz(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "clz z30.s, p6/m, z29.s"); - TEST_SINGLE(clz(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "clz z30.d, p6/m, z29.d"); - //TEST_SINGLE(clz(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "clz z30.q, p6/m, z29.q"); - - TEST_SINGLE(cnt(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnt z30.b, p6/m, z29.b"); - TEST_SINGLE(cnt(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnt z30.h, p6/m, z29.h"); - TEST_SINGLE(cnt(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnt z30.s, p6/m, z29.s"); - TEST_SINGLE(cnt(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnt z30.d, p6/m, z29.d"); - //TEST_SINGLE(cnt(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnt z30.q, p6/m, z29.q"); - - TEST_SINGLE(cnot(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnot z30.b, p6/m, z29.b"); - TEST_SINGLE(cnot(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnot z30.h, p6/m, z29.h"); - TEST_SINGLE(cnot(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnot z30.s, p6/m, z29.s"); - TEST_SINGLE(cnot(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnot z30.d, p6/m, z29.d"); - //TEST_SINGLE(cnot(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnot z30.q, p6/m, z29.q"); - - //TEST_SINGLE(fabs(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fabs z30.b, p6/m, z29.b"); - TEST_SINGLE(fabs(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fabs z30.h, p6/m, z29.h"); - TEST_SINGLE(fabs(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fabs z30.s, p6/m, z29.s"); - TEST_SINGLE(fabs(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fabs z30.d, p6/m, z29.d"); - //TEST_SINGLE(fabs(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fabs z30.q, p6/m, z29.q"); - - //TEST_SINGLE(fneg(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fneg z30.b, p6/m, z29.b"); - TEST_SINGLE(fneg(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fneg z30.h, p6/m, z29.h"); - TEST_SINGLE(fneg(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fneg z30.s, p6/m, z29.s"); - TEST_SINGLE(fneg(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fneg z30.d, p6/m, z29.d"); - //TEST_SINGLE(fneg(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fneg z30.q, p6/m, z29.q"); - - TEST_SINGLE(not_(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "not z30.b, p6/m, z29.b"); - TEST_SINGLE(not_(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "not z30.h, p6/m, z29.h"); - TEST_SINGLE(not_(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "not z30.s, p6/m, z29.s"); - TEST_SINGLE(not_(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "not z30.d, p6/m, z29.d"); - //TEST_SINGLE(not_(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "not z30.q, p6/m, z29.q"); + TEST_SINGLE(cls(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cls z30.b, p6/m, z29.b"); + TEST_SINGLE(cls(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cls z30.h, p6/m, z29.h"); + TEST_SINGLE(cls(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cls z30.s, p6/m, z29.s"); + TEST_SINGLE(cls(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cls z30.d, p6/m, z29.d"); + // TEST_SINGLE(cls(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cls z30.q, p6/m, z29.q"); + + TEST_SINGLE(clz(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "clz z30.b, p6/m, z29.b"); + TEST_SINGLE(clz(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "clz z30.h, p6/m, z29.h"); + TEST_SINGLE(clz(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "clz z30.s, p6/m, z29.s"); + TEST_SINGLE(clz(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "clz z30.d, p6/m, z29.d"); + // TEST_SINGLE(clz(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "clz z30.q, p6/m, z29.q"); + + TEST_SINGLE(cnt(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnt z30.b, p6/m, z29.b"); + TEST_SINGLE(cnt(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnt z30.h, p6/m, z29.h"); + TEST_SINGLE(cnt(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnt z30.s, p6/m, z29.s"); + TEST_SINGLE(cnt(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnt z30.d, p6/m, z29.d"); + // TEST_SINGLE(cnt(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnt z30.q, p6/m, z29.q"); + + TEST_SINGLE(cnot(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnot z30.b, p6/m, z29.b"); + TEST_SINGLE(cnot(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnot z30.h, p6/m, z29.h"); + TEST_SINGLE(cnot(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnot z30.s, p6/m, z29.s"); + TEST_SINGLE(cnot(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnot z30.d, p6/m, z29.d"); + // TEST_SINGLE(cnot(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnot z30.q, p6/m, z29.q"); + + // TEST_SINGLE(fabs(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fabs z30.b, p6/m, z29.b"); + TEST_SINGLE(fabs(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fabs z30.h, p6/m, z29.h"); + TEST_SINGLE(fabs(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fabs z30.s, p6/m, z29.s"); + TEST_SINGLE(fabs(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fabs z30.d, p6/m, z29.d"); + // TEST_SINGLE(fabs(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fabs z30.q, p6/m, z29.q"); + + // TEST_SINGLE(fneg(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fneg z30.b, p6/m, z29.b"); + TEST_SINGLE(fneg(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fneg z30.h, p6/m, z29.h"); + TEST_SINGLE(fneg(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fneg z30.s, p6/m, z29.s"); + TEST_SINGLE(fneg(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fneg z30.d, p6/m, z29.d"); + // TEST_SINGLE(fneg(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fneg z30.q, p6/m, z29.q"); + + TEST_SINGLE(not_(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "not z30.b, p6/m, z29.b"); + TEST_SINGLE(not_(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "not z30.h, p6/m, z29.h"); + TEST_SINGLE(not_(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "not z30.s, p6/m, z29.s"); + TEST_SINGLE(not_(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "not z30.d, p6/m, z29.d"); + // TEST_SINGLE(not_(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "not z30.q, p6/m, z29.q"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise logical operations (unpredicated)") { TEST_SINGLE(and_(ZReg::z30, ZReg::z29, ZReg::z28), "and z30.d, z29.d, z28.d"); @@ -942,13 +909,13 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise logical operations TEST_SINGLE(eor(ZReg::z30, ZReg::z29, ZReg::z28), "eor z30.d, z29.d, z28.d"); TEST_SINGLE(bic(ZReg::z30, ZReg::z29, ZReg::z28), "bic z30.d, z29.d, z28.d"); - TEST_SINGLE(xar(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "xar z30.b, z30.b, z29.b, #1"); - TEST_SINGLE(xar(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "xar z30.b, z30.b, z29.b, #8"); - TEST_SINGLE(xar(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "xar z30.h, z30.h, z29.h, #1"); + TEST_SINGLE(xar(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "xar z30.b, z30.b, z29.b, #1"); + TEST_SINGLE(xar(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "xar z30.b, z30.b, z29.b, #8"); + TEST_SINGLE(xar(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "xar z30.h, z30.h, z29.h, #1"); TEST_SINGLE(xar(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "xar z30.h, z30.h, z29.h, #16"); - TEST_SINGLE(xar(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "xar z30.s, z30.s, z29.s, #1"); + TEST_SINGLE(xar(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "xar z30.s, z30.s, z29.s, #1"); TEST_SINGLE(xar(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "xar z30.s, z30.s, z29.s, #32"); - TEST_SINGLE(xar(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "xar z30.d, z30.d, z29.d, #1"); + TEST_SINGLE(xar(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "xar z30.d, z30.d, z29.d, #1"); TEST_SINGLE(xar(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "xar z30.d, z30.d, z29.d, #64"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 bitwise ternary operations") { @@ -960,35 +927,35 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 bitwise ternary operation TEST_SINGLE(nbsl(ZReg::z30, ZReg::z30, ZReg::z28, ZReg::z29), "nbsl z30.d, z30.d, z28.d, z29.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Index Generation") { - TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, -16, -16), "index z30.b, #-16, #-16"); - TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, -16, 15), "index z30.b, #-16, #15"); + TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, -16, -16), "index z30.b, #-16, #-16"); + TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, -16, 15), "index z30.b, #-16, #15"); TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, -16, -16), "index z30.h, #-16, #-16"); - TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, -16, 15), "index z30.h, #-16, #15"); + TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, -16, 15), "index z30.h, #-16, #15"); TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, -16, -16), "index z30.s, #-16, #-16"); - TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, -16, 15), "index z30.s, #-16, #15"); + TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, -16, 15), "index z30.s, #-16, #15"); TEST_SINGLE(index(SubRegSize::i64Bit, ZReg::z30, -16, -16), "index z30.d, #-16, #-16"); - TEST_SINGLE(index(SubRegSize::i64Bit, ZReg::z30, -16, 15), "index z30.d, #-16, #15"); + TEST_SINGLE(index(SubRegSize::i64Bit, ZReg::z30, -16, 15), "index z30.d, #-16, #15"); - TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, WReg::w29, -16), "index z30.b, w29, #-16"); - TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, WReg::w29, 15), "index z30.b, w29, #15"); + TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, WReg::w29, -16), "index z30.b, w29, #-16"); + TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, WReg::w29, 15), "index z30.b, w29, #15"); TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, WReg::w29, -16), "index z30.h, w29, #-16"); - TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, WReg::w29, 15), "index z30.h, w29, #15"); + TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, WReg::w29, 15), "index z30.h, w29, #15"); TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, WReg::w29, -16), "index z30.s, w29, #-16"); - TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, WReg::w29, 15), "index z30.s, w29, #15"); + TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, WReg::w29, 15), "index z30.s, w29, #15"); TEST_SINGLE(index(SubRegSize::i64Bit, ZReg::z30, XReg::x29, -16), "index z30.d, x29, #-16"); - TEST_SINGLE(index(SubRegSize::i64Bit, ZReg::z30, XReg::x29, 15), "index z30.d, x29, #15"); + TEST_SINGLE(index(SubRegSize::i64Bit, ZReg::z30, XReg::x29, 15), "index z30.d, x29, #15"); - TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, -16, WReg::w29), "index z30.b, #-16, w29"); - TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, 15, WReg::w29), "index z30.b, #15, w29"); + TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, -16, WReg::w29), "index z30.b, #-16, w29"); + TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, 15, WReg::w29), "index z30.b, #15, w29"); TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, -16, WReg::w29), "index z30.h, #-16, w29"); - TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, 15, WReg::w29), "index z30.h, #15, w29"); + TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, 15, WReg::w29), "index z30.h, #15, w29"); TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, -16, WReg::w29), "index z30.s, #-16, w29"); - TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, 15, WReg::w29), "index z30.s, #15, w29"); + TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, 15, WReg::w29), "index z30.s, #15, w29"); TEST_SINGLE(index(SubRegSize::i64Bit, ZReg::z30, -16, XReg::x29), "index z30.d, #-16, x29"); - TEST_SINGLE(index(SubRegSize::i64Bit, ZReg::z30, 15, XReg::x29), "index z30.d, #15, x29"); + TEST_SINGLE(index(SubRegSize::i64Bit, ZReg::z30, 15, XReg::x29), "index z30.d, #15, x29"); - TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, WReg::w29, WReg::w28), "index z30.b, w29, w28"); - TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, WReg::w29, WReg::w28), "index z30.b, w29, w28"); + TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, WReg::w29, WReg::w28), "index z30.b, w29, w28"); + TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, WReg::w29, WReg::w28), "index z30.b, w29, w28"); TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, WReg::w29, WReg::w28), "index z30.h, w29, w28"); TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, WReg::w29, WReg::w28), "index z30.h, w29, w28"); TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, WReg::w29, WReg::w28), "index z30.s, w29, w28"); @@ -998,20 +965,20 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Index Generation") { } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE stack frame adjustment") { TEST_SINGLE(addvl(XReg::rsp, XReg::rsp, -32), "addvl sp, sp, #-32"); - TEST_SINGLE(addvl(XReg::rsp, XReg::rsp, 31), "addvl sp, sp, #31"); - TEST_SINGLE(addvl(XReg::x30, XReg::x29, 15), "addvl x30, x29, #15"); + TEST_SINGLE(addvl(XReg::rsp, XReg::rsp, 31), "addvl sp, sp, #31"); + TEST_SINGLE(addvl(XReg::x30, XReg::x29, 15), "addvl x30, x29, #15"); TEST_SINGLE(addpl(XReg::rsp, XReg::rsp, -32), "addpl sp, sp, #-32"); - TEST_SINGLE(addpl(XReg::rsp, XReg::rsp, 31), "addpl sp, sp, #31"); - TEST_SINGLE(addpl(XReg::x30, XReg::x29, 15), "addpl x30, x29, #15"); + TEST_SINGLE(addpl(XReg::rsp, XReg::rsp, 31), "addpl sp, sp, #31"); + TEST_SINGLE(addpl(XReg::x30, XReg::x29, 15), "addpl x30, x29, #15"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: Streaming SVE stack frame adjustment") { // TODO: Implement in emitter. } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE stack frame size") { TEST_SINGLE(rdvl(XReg::x30, -32), "rdvl x30, #-32"); - TEST_SINGLE(rdvl(XReg::x30, 31), "rdvl x30, #31"); - TEST_SINGLE(rdvl(XReg::x30, 15), "rdvl x30, #15"); + TEST_SINGLE(rdvl(XReg::x30, 31), "rdvl x30, #31"); + TEST_SINGLE(rdvl(XReg::x30, 15), "rdvl x30, #15"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: Streaming SVE stack frame size") { // TODO: Implement in emitter. @@ -1034,60 +1001,60 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer multiply vectors TEST_SINGLE(umulh(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umulh z30.d, z29.d, z28.d"); TEST_SINGLE(pmul(ZReg::z30, ZReg::z29, ZReg::z28), "pmul z30.b, z29.b, z28.b"); - } +} TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 signed saturating doubling multiply high (unpredicated)") { - TEST_SINGLE(sqdmulh(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmulh z30.b, z29.b, z28.b"); + TEST_SINGLE(sqdmulh(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmulh z30.b, z29.b, z28.b"); TEST_SINGLE(sqdmulh(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmulh z30.h, z29.h, z28.h"); TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmulh z30.s, z29.s, z28.s"); TEST_SINGLE(sqdmulh(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmulh z30.d, z29.d, z28.d"); - TEST_SINGLE(sqrdmulh(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmulh z30.b, z29.b, z28.b"); + TEST_SINGLE(sqrdmulh(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmulh z30.b, z29.b, z28.b"); TEST_SINGLE(sqrdmulh(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmulh z30.h, z29.h, z28.h"); TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmulh z30.s, z29.s, z28.s"); TEST_SINGLE(sqrdmulh(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmulh z30.d, z29.d, z28.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise shift by wide elements (unpredicated)") { - TEST_SINGLE(asr_wide(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "asr z30.b, z29.b, z28.d"); + TEST_SINGLE(asr_wide(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "asr z30.b, z29.b, z28.d"); TEST_SINGLE(asr_wide(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "asr z30.h, z29.h, z28.d"); TEST_SINGLE(asr_wide(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "asr z30.s, z29.s, z28.d"); - TEST_SINGLE(lsr_wide(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "lsr z30.b, z29.b, z28.d"); + TEST_SINGLE(lsr_wide(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "lsr z30.b, z29.b, z28.d"); TEST_SINGLE(lsr_wide(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "lsr z30.h, z29.h, z28.d"); TEST_SINGLE(lsr_wide(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "lsr z30.s, z29.s, z28.d"); - TEST_SINGLE(lsl_wide(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "lsl z30.b, z29.b, z28.d"); + TEST_SINGLE(lsl_wide(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "lsl z30.b, z29.b, z28.d"); TEST_SINGLE(lsl_wide(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "lsl z30.h, z29.h, z28.d"); TEST_SINGLE(lsl_wide(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "lsl z30.s, z29.s, z28.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise shift by immediate (unpredicated)") { - TEST_SINGLE(asr(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "asr z30.b, z29.b, #1"); - TEST_SINGLE(asr(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "asr z30.b, z29.b, #8"); - TEST_SINGLE(asr(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "asr z30.h, z29.h, #1"); + TEST_SINGLE(asr(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "asr z30.b, z29.b, #1"); + TEST_SINGLE(asr(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "asr z30.b, z29.b, #8"); + TEST_SINGLE(asr(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "asr z30.h, z29.h, #1"); TEST_SINGLE(asr(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "asr z30.h, z29.h, #16"); - TEST_SINGLE(asr(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "asr z30.s, z29.s, #1"); + TEST_SINGLE(asr(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "asr z30.s, z29.s, #1"); TEST_SINGLE(asr(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "asr z30.s, z29.s, #32"); - TEST_SINGLE(asr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "asr z30.d, z29.d, #1"); + TEST_SINGLE(asr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "asr z30.d, z29.d, #1"); TEST_SINGLE(asr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "asr z30.d, z29.d, #64"); - TEST_SINGLE(lsr(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "lsr z30.b, z29.b, #1"); - TEST_SINGLE(lsr(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "lsr z30.b, z29.b, #8"); - TEST_SINGLE(lsr(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "lsr z30.h, z29.h, #1"); + TEST_SINGLE(lsr(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "lsr z30.b, z29.b, #1"); + TEST_SINGLE(lsr(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "lsr z30.b, z29.b, #8"); + TEST_SINGLE(lsr(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "lsr z30.h, z29.h, #1"); TEST_SINGLE(lsr(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "lsr z30.h, z29.h, #16"); - TEST_SINGLE(lsr(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "lsr z30.s, z29.s, #1"); + TEST_SINGLE(lsr(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "lsr z30.s, z29.s, #1"); TEST_SINGLE(lsr(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "lsr z30.s, z29.s, #32"); - TEST_SINGLE(lsr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "lsr z30.d, z29.d, #1"); + TEST_SINGLE(lsr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "lsr z30.d, z29.d, #1"); TEST_SINGLE(lsr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "lsr z30.d, z29.d, #64"); - TEST_SINGLE(lsl(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 0), "lsl z30.b, z29.b, #0"); - TEST_SINGLE(lsl(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 7), "lsl z30.b, z29.b, #7"); - TEST_SINGLE(lsl(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "lsl z30.h, z29.h, #0"); + TEST_SINGLE(lsl(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 0), "lsl z30.b, z29.b, #0"); + TEST_SINGLE(lsl(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 7), "lsl z30.b, z29.b, #7"); + TEST_SINGLE(lsl(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "lsl z30.h, z29.h, #0"); TEST_SINGLE(lsl(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 15), "lsl z30.h, z29.h, #15"); - TEST_SINGLE(lsl(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "lsl z30.s, z29.s, #0"); + TEST_SINGLE(lsl(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "lsl z30.s, z29.s, #0"); TEST_SINGLE(lsl(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 31), "lsl z30.s, z29.s, #31"); - TEST_SINGLE(lsl(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "lsl z30.d, z29.d, #0"); + TEST_SINGLE(lsl(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "lsl z30.d, z29.d, #0"); TEST_SINGLE(lsl(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 63), "lsl z30.d, z29.d, #63"); } @@ -1106,261 +1073,261 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE constructive prefix (unpre } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE saturating inc/dec vector by element count") { - TEST_SINGLE(sqinch(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "sqinch z30.h, pow2"); - TEST_SINGLE(sqinch(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqinch z30.h, vl256, mul #7"); - TEST_SINGLE(sqinch(ZReg::z30, PredicatePattern::SVE_ALL , 16), "sqinch z30.h, all, mul #16"); + TEST_SINGLE(sqinch(ZReg::z30, PredicatePattern::SVE_POW2, 1), "sqinch z30.h, pow2"); + TEST_SINGLE(sqinch(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqinch z30.h, vl256, mul #7"); + TEST_SINGLE(sqinch(ZReg::z30, PredicatePattern::SVE_ALL, 16), "sqinch z30.h, all, mul #16"); - TEST_SINGLE(uqinch(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "uqinch z30.h, pow2"); - TEST_SINGLE(uqinch(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqinch z30.h, vl256, mul #7"); - TEST_SINGLE(uqinch(ZReg::z30, PredicatePattern::SVE_ALL , 16), "uqinch z30.h, all, mul #16"); + TEST_SINGLE(uqinch(ZReg::z30, PredicatePattern::SVE_POW2, 1), "uqinch z30.h, pow2"); + TEST_SINGLE(uqinch(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqinch z30.h, vl256, mul #7"); + TEST_SINGLE(uqinch(ZReg::z30, PredicatePattern::SVE_ALL, 16), "uqinch z30.h, all, mul #16"); - TEST_SINGLE(sqdech(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "sqdech z30.h, pow2"); - TEST_SINGLE(sqdech(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqdech z30.h, vl256, mul #7"); - TEST_SINGLE(sqdech(ZReg::z30, PredicatePattern::SVE_ALL , 16), "sqdech z30.h, all, mul #16"); + TEST_SINGLE(sqdech(ZReg::z30, PredicatePattern::SVE_POW2, 1), "sqdech z30.h, pow2"); + TEST_SINGLE(sqdech(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqdech z30.h, vl256, mul #7"); + TEST_SINGLE(sqdech(ZReg::z30, PredicatePattern::SVE_ALL, 16), "sqdech z30.h, all, mul #16"); - TEST_SINGLE(uqdech(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "uqdech z30.h, pow2"); - TEST_SINGLE(uqdech(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqdech z30.h, vl256, mul #7"); - TEST_SINGLE(uqdech(ZReg::z30, PredicatePattern::SVE_ALL , 16), "uqdech z30.h, all, mul #16"); + TEST_SINGLE(uqdech(ZReg::z30, PredicatePattern::SVE_POW2, 1), "uqdech z30.h, pow2"); + TEST_SINGLE(uqdech(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqdech z30.h, vl256, mul #7"); + TEST_SINGLE(uqdech(ZReg::z30, PredicatePattern::SVE_ALL, 16), "uqdech z30.h, all, mul #16"); - TEST_SINGLE(sqincw(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "sqincw z30.s, pow2"); - TEST_SINGLE(sqincw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqincw z30.s, vl256, mul #7"); - TEST_SINGLE(sqincw(ZReg::z30, PredicatePattern::SVE_ALL , 16), "sqincw z30.s, all, mul #16"); + TEST_SINGLE(sqincw(ZReg::z30, PredicatePattern::SVE_POW2, 1), "sqincw z30.s, pow2"); + TEST_SINGLE(sqincw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqincw z30.s, vl256, mul #7"); + TEST_SINGLE(sqincw(ZReg::z30, PredicatePattern::SVE_ALL, 16), "sqincw z30.s, all, mul #16"); - TEST_SINGLE(uqincw(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "uqincw z30.s, pow2"); - TEST_SINGLE(uqincw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqincw z30.s, vl256, mul #7"); - TEST_SINGLE(uqincw(ZReg::z30, PredicatePattern::SVE_ALL , 16), "uqincw z30.s, all, mul #16"); + TEST_SINGLE(uqincw(ZReg::z30, PredicatePattern::SVE_POW2, 1), "uqincw z30.s, pow2"); + TEST_SINGLE(uqincw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqincw z30.s, vl256, mul #7"); + TEST_SINGLE(uqincw(ZReg::z30, PredicatePattern::SVE_ALL, 16), "uqincw z30.s, all, mul #16"); - TEST_SINGLE(sqdecw(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "sqdecw z30.s, pow2"); - TEST_SINGLE(sqdecw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqdecw z30.s, vl256, mul #7"); - TEST_SINGLE(sqdecw(ZReg::z30, PredicatePattern::SVE_ALL , 16), "sqdecw z30.s, all, mul #16"); + TEST_SINGLE(sqdecw(ZReg::z30, PredicatePattern::SVE_POW2, 1), "sqdecw z30.s, pow2"); + TEST_SINGLE(sqdecw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqdecw z30.s, vl256, mul #7"); + TEST_SINGLE(sqdecw(ZReg::z30, PredicatePattern::SVE_ALL, 16), "sqdecw z30.s, all, mul #16"); - TEST_SINGLE(uqdecw(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "uqdecw z30.s, pow2"); - TEST_SINGLE(uqdecw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqdecw z30.s, vl256, mul #7"); - TEST_SINGLE(uqdecw(ZReg::z30, PredicatePattern::SVE_ALL , 16), "uqdecw z30.s, all, mul #16"); + TEST_SINGLE(uqdecw(ZReg::z30, PredicatePattern::SVE_POW2, 1), "uqdecw z30.s, pow2"); + TEST_SINGLE(uqdecw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqdecw z30.s, vl256, mul #7"); + TEST_SINGLE(uqdecw(ZReg::z30, PredicatePattern::SVE_ALL, 16), "uqdecw z30.s, all, mul #16"); - TEST_SINGLE(sqincd(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "sqincd z30.d, pow2"); - TEST_SINGLE(sqincd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqincd z30.d, vl256, mul #7"); - TEST_SINGLE(sqincd(ZReg::z30, PredicatePattern::SVE_ALL , 16), "sqincd z30.d, all, mul #16"); + TEST_SINGLE(sqincd(ZReg::z30, PredicatePattern::SVE_POW2, 1), "sqincd z30.d, pow2"); + TEST_SINGLE(sqincd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqincd z30.d, vl256, mul #7"); + TEST_SINGLE(sqincd(ZReg::z30, PredicatePattern::SVE_ALL, 16), "sqincd z30.d, all, mul #16"); - TEST_SINGLE(uqincd(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "uqincd z30.d, pow2"); - TEST_SINGLE(uqincd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqincd z30.d, vl256, mul #7"); - TEST_SINGLE(uqincd(ZReg::z30, PredicatePattern::SVE_ALL , 16), "uqincd z30.d, all, mul #16"); + TEST_SINGLE(uqincd(ZReg::z30, PredicatePattern::SVE_POW2, 1), "uqincd z30.d, pow2"); + TEST_SINGLE(uqincd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqincd z30.d, vl256, mul #7"); + TEST_SINGLE(uqincd(ZReg::z30, PredicatePattern::SVE_ALL, 16), "uqincd z30.d, all, mul #16"); - TEST_SINGLE(sqdecd(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "sqdecd z30.d, pow2"); - TEST_SINGLE(sqdecd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqdecd z30.d, vl256, mul #7"); - TEST_SINGLE(sqdecd(ZReg::z30, PredicatePattern::SVE_ALL , 16), "sqdecd z30.d, all, mul #16"); + TEST_SINGLE(sqdecd(ZReg::z30, PredicatePattern::SVE_POW2, 1), "sqdecd z30.d, pow2"); + TEST_SINGLE(sqdecd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqdecd z30.d, vl256, mul #7"); + TEST_SINGLE(sqdecd(ZReg::z30, PredicatePattern::SVE_ALL, 16), "sqdecd z30.d, all, mul #16"); - TEST_SINGLE(uqdecd(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "uqdecd z30.d, pow2"); - TEST_SINGLE(uqdecd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqdecd z30.d, vl256, mul #7"); - TEST_SINGLE(uqdecd(ZReg::z30, PredicatePattern::SVE_ALL , 16), "uqdecd z30.d, all, mul #16"); + TEST_SINGLE(uqdecd(ZReg::z30, PredicatePattern::SVE_POW2, 1), "uqdecd z30.d, pow2"); + TEST_SINGLE(uqdecd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqdecd z30.d, vl256, mul #7"); + TEST_SINGLE(uqdecd(ZReg::z30, PredicatePattern::SVE_ALL, 16), "uqdecd z30.d, all, mul #16"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE element count") { - TEST_SINGLE(cntb(XReg::x30, PredicatePattern::SVE_POW2 , 1), "cntb x30, pow2"); - TEST_SINGLE(cntb(XReg::x30, PredicatePattern::SVE_VL256, 7), "cntb x30, vl256, mul #7"); - TEST_SINGLE(cntb(XReg::x30, PredicatePattern::SVE_ALL , 16), "cntb x30, all, mul #16"); + TEST_SINGLE(cntb(XReg::x30, PredicatePattern::SVE_POW2, 1), "cntb x30, pow2"); + TEST_SINGLE(cntb(XReg::x30, PredicatePattern::SVE_VL256, 7), "cntb x30, vl256, mul #7"); + TEST_SINGLE(cntb(XReg::x30, PredicatePattern::SVE_ALL, 16), "cntb x30, all, mul #16"); - TEST_SINGLE(cnth(XReg::x30, PredicatePattern::SVE_POW2 , 1), "cnth x30, pow2"); - TEST_SINGLE(cnth(XReg::x30, PredicatePattern::SVE_VL256, 7), "cnth x30, vl256, mul #7"); - TEST_SINGLE(cnth(XReg::x30, PredicatePattern::SVE_ALL , 16), "cnth x30, all, mul #16"); + TEST_SINGLE(cnth(XReg::x30, PredicatePattern::SVE_POW2, 1), "cnth x30, pow2"); + TEST_SINGLE(cnth(XReg::x30, PredicatePattern::SVE_VL256, 7), "cnth x30, vl256, mul #7"); + TEST_SINGLE(cnth(XReg::x30, PredicatePattern::SVE_ALL, 16), "cnth x30, all, mul #16"); - TEST_SINGLE(cntw(XReg::x30, PredicatePattern::SVE_POW2 , 1), "cntw x30, pow2"); - TEST_SINGLE(cntw(XReg::x30, PredicatePattern::SVE_VL256, 7), "cntw x30, vl256, mul #7"); - TEST_SINGLE(cntw(XReg::x30, PredicatePattern::SVE_ALL , 16), "cntw x30, all, mul #16"); + TEST_SINGLE(cntw(XReg::x30, PredicatePattern::SVE_POW2, 1), "cntw x30, pow2"); + TEST_SINGLE(cntw(XReg::x30, PredicatePattern::SVE_VL256, 7), "cntw x30, vl256, mul #7"); + TEST_SINGLE(cntw(XReg::x30, PredicatePattern::SVE_ALL, 16), "cntw x30, all, mul #16"); - TEST_SINGLE(cntd(XReg::x30, PredicatePattern::SVE_POW2 , 1), "cntd x30, pow2"); - TEST_SINGLE(cntd(XReg::x30, PredicatePattern::SVE_VL256, 7), "cntd x30, vl256, mul #7"); - TEST_SINGLE(cntd(XReg::x30, PredicatePattern::SVE_ALL , 16), "cntd x30, all, mul #16"); + TEST_SINGLE(cntd(XReg::x30, PredicatePattern::SVE_POW2, 1), "cntd x30, pow2"); + TEST_SINGLE(cntd(XReg::x30, PredicatePattern::SVE_VL256, 7), "cntd x30, vl256, mul #7"); + TEST_SINGLE(cntd(XReg::x30, PredicatePattern::SVE_ALL, 16), "cntd x30, all, mul #16"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE inc/dec vector by element count") { - TEST_SINGLE(inch(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "inch z30.h, pow2"); - TEST_SINGLE(inch(ZReg::z30, PredicatePattern::SVE_VL256, 7), "inch z30.h, vl256, mul #7"); - TEST_SINGLE(inch(ZReg::z30, PredicatePattern::SVE_ALL , 16), "inch z30.h, all, mul #16"); - - TEST_SINGLE(dech(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "dech z30.h, pow2"); - TEST_SINGLE(dech(ZReg::z30, PredicatePattern::SVE_VL256, 7), "dech z30.h, vl256, mul #7"); - TEST_SINGLE(dech(ZReg::z30, PredicatePattern::SVE_ALL , 16), "dech z30.h, all, mul #16"); - - TEST_SINGLE(incw(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "incw z30.s, pow2"); - TEST_SINGLE(incw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "incw z30.s, vl256, mul #7"); - TEST_SINGLE(incw(ZReg::z30, PredicatePattern::SVE_ALL , 16), "incw z30.s, all, mul #16"); - - TEST_SINGLE(decw(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "decw z30.s, pow2"); - TEST_SINGLE(decw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "decw z30.s, vl256, mul #7"); - TEST_SINGLE(decw(ZReg::z30, PredicatePattern::SVE_ALL , 16), "decw z30.s, all, mul #16"); - - TEST_SINGLE(incd(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "incd z30.d, pow2"); - TEST_SINGLE(incd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "incd z30.d, vl256, mul #7"); - TEST_SINGLE(incd(ZReg::z30, PredicatePattern::SVE_ALL , 16), "incd z30.d, all, mul #16"); - - TEST_SINGLE(decd(ZReg::z30, PredicatePattern::SVE_POW2 , 1), "decd z30.d, pow2"); - TEST_SINGLE(decd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "decd z30.d, vl256, mul #7"); - TEST_SINGLE(decd(ZReg::z30, PredicatePattern::SVE_ALL , 16), "decd z30.d, all, mul #16"); + TEST_SINGLE(inch(ZReg::z30, PredicatePattern::SVE_POW2, 1), "inch z30.h, pow2"); + TEST_SINGLE(inch(ZReg::z30, PredicatePattern::SVE_VL256, 7), "inch z30.h, vl256, mul #7"); + TEST_SINGLE(inch(ZReg::z30, PredicatePattern::SVE_ALL, 16), "inch z30.h, all, mul #16"); + + TEST_SINGLE(dech(ZReg::z30, PredicatePattern::SVE_POW2, 1), "dech z30.h, pow2"); + TEST_SINGLE(dech(ZReg::z30, PredicatePattern::SVE_VL256, 7), "dech z30.h, vl256, mul #7"); + TEST_SINGLE(dech(ZReg::z30, PredicatePattern::SVE_ALL, 16), "dech z30.h, all, mul #16"); + + TEST_SINGLE(incw(ZReg::z30, PredicatePattern::SVE_POW2, 1), "incw z30.s, pow2"); + TEST_SINGLE(incw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "incw z30.s, vl256, mul #7"); + TEST_SINGLE(incw(ZReg::z30, PredicatePattern::SVE_ALL, 16), "incw z30.s, all, mul #16"); + + TEST_SINGLE(decw(ZReg::z30, PredicatePattern::SVE_POW2, 1), "decw z30.s, pow2"); + TEST_SINGLE(decw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "decw z30.s, vl256, mul #7"); + TEST_SINGLE(decw(ZReg::z30, PredicatePattern::SVE_ALL, 16), "decw z30.s, all, mul #16"); + + TEST_SINGLE(incd(ZReg::z30, PredicatePattern::SVE_POW2, 1), "incd z30.d, pow2"); + TEST_SINGLE(incd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "incd z30.d, vl256, mul #7"); + TEST_SINGLE(incd(ZReg::z30, PredicatePattern::SVE_ALL, 16), "incd z30.d, all, mul #16"); + + TEST_SINGLE(decd(ZReg::z30, PredicatePattern::SVE_POW2, 1), "decd z30.d, pow2"); + TEST_SINGLE(decd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "decd z30.d, vl256, mul #7"); + TEST_SINGLE(decd(ZReg::z30, PredicatePattern::SVE_ALL, 16), "decd z30.d, all, mul #16"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE inc/dec register by element count") { - TEST_SINGLE(incb(XReg::x30, PredicatePattern::SVE_POW2 , 1), "incb x30, pow2"); - TEST_SINGLE(incb(XReg::x30, PredicatePattern::SVE_VL256, 7), "incb x30, vl256, mul #7"); - TEST_SINGLE(incb(XReg::x30, PredicatePattern::SVE_ALL , 16), "incb x30, all, mul #16"); - - TEST_SINGLE(decb(XReg::x30, PredicatePattern::SVE_POW2 , 1), "decb x30, pow2"); - TEST_SINGLE(decb(XReg::x30, PredicatePattern::SVE_VL256, 7), "decb x30, vl256, mul #7"); - TEST_SINGLE(decb(XReg::x30, PredicatePattern::SVE_ALL , 16), "decb x30, all, mul #16"); - - TEST_SINGLE(inch(XReg::x30, PredicatePattern::SVE_POW2 , 1), "inch x30, pow2"); - TEST_SINGLE(inch(XReg::x30, PredicatePattern::SVE_VL256, 7), "inch x30, vl256, mul #7"); - TEST_SINGLE(inch(XReg::x30, PredicatePattern::SVE_ALL , 16), "inch x30, all, mul #16"); - - TEST_SINGLE(dech(XReg::x30, PredicatePattern::SVE_POW2 , 1), "dech x30, pow2"); - TEST_SINGLE(dech(XReg::x30, PredicatePattern::SVE_VL256, 7), "dech x30, vl256, mul #7"); - TEST_SINGLE(dech(XReg::x30, PredicatePattern::SVE_ALL , 16), "dech x30, all, mul #16"); - - TEST_SINGLE(incw(XReg::x30, PredicatePattern::SVE_POW2 , 1), "incw x30, pow2"); - TEST_SINGLE(incw(XReg::x30, PredicatePattern::SVE_VL256, 7), "incw x30, vl256, mul #7"); - TEST_SINGLE(incw(XReg::x30, PredicatePattern::SVE_ALL , 16), "incw x30, all, mul #16"); - - TEST_SINGLE(decw(XReg::x30, PredicatePattern::SVE_POW2 , 1), "decw x30, pow2"); - TEST_SINGLE(decw(XReg::x30, PredicatePattern::SVE_VL256, 7), "decw x30, vl256, mul #7"); - TEST_SINGLE(decw(XReg::x30, PredicatePattern::SVE_ALL , 16), "decw x30, all, mul #16"); - - TEST_SINGLE(incd(XReg::x30, PredicatePattern::SVE_POW2 , 1), "incd x30, pow2"); - TEST_SINGLE(incd(XReg::x30, PredicatePattern::SVE_VL256, 7), "incd x30, vl256, mul #7"); - TEST_SINGLE(incd(XReg::x30, PredicatePattern::SVE_ALL , 16), "incd x30, all, mul #16"); - - TEST_SINGLE(decd(XReg::x30, PredicatePattern::SVE_POW2 , 1), "decd x30, pow2"); - TEST_SINGLE(decd(XReg::x30, PredicatePattern::SVE_VL256, 7), "decd x30, vl256, mul #7"); - TEST_SINGLE(decd(XReg::x30, PredicatePattern::SVE_ALL , 16), "decd x30, all, mul #16"); + TEST_SINGLE(incb(XReg::x30, PredicatePattern::SVE_POW2, 1), "incb x30, pow2"); + TEST_SINGLE(incb(XReg::x30, PredicatePattern::SVE_VL256, 7), "incb x30, vl256, mul #7"); + TEST_SINGLE(incb(XReg::x30, PredicatePattern::SVE_ALL, 16), "incb x30, all, mul #16"); + + TEST_SINGLE(decb(XReg::x30, PredicatePattern::SVE_POW2, 1), "decb x30, pow2"); + TEST_SINGLE(decb(XReg::x30, PredicatePattern::SVE_VL256, 7), "decb x30, vl256, mul #7"); + TEST_SINGLE(decb(XReg::x30, PredicatePattern::SVE_ALL, 16), "decb x30, all, mul #16"); + + TEST_SINGLE(inch(XReg::x30, PredicatePattern::SVE_POW2, 1), "inch x30, pow2"); + TEST_SINGLE(inch(XReg::x30, PredicatePattern::SVE_VL256, 7), "inch x30, vl256, mul #7"); + TEST_SINGLE(inch(XReg::x30, PredicatePattern::SVE_ALL, 16), "inch x30, all, mul #16"); + + TEST_SINGLE(dech(XReg::x30, PredicatePattern::SVE_POW2, 1), "dech x30, pow2"); + TEST_SINGLE(dech(XReg::x30, PredicatePattern::SVE_VL256, 7), "dech x30, vl256, mul #7"); + TEST_SINGLE(dech(XReg::x30, PredicatePattern::SVE_ALL, 16), "dech x30, all, mul #16"); + + TEST_SINGLE(incw(XReg::x30, PredicatePattern::SVE_POW2, 1), "incw x30, pow2"); + TEST_SINGLE(incw(XReg::x30, PredicatePattern::SVE_VL256, 7), "incw x30, vl256, mul #7"); + TEST_SINGLE(incw(XReg::x30, PredicatePattern::SVE_ALL, 16), "incw x30, all, mul #16"); + + TEST_SINGLE(decw(XReg::x30, PredicatePattern::SVE_POW2, 1), "decw x30, pow2"); + TEST_SINGLE(decw(XReg::x30, PredicatePattern::SVE_VL256, 7), "decw x30, vl256, mul #7"); + TEST_SINGLE(decw(XReg::x30, PredicatePattern::SVE_ALL, 16), "decw x30, all, mul #16"); + + TEST_SINGLE(incd(XReg::x30, PredicatePattern::SVE_POW2, 1), "incd x30, pow2"); + TEST_SINGLE(incd(XReg::x30, PredicatePattern::SVE_VL256, 7), "incd x30, vl256, mul #7"); + TEST_SINGLE(incd(XReg::x30, PredicatePattern::SVE_ALL, 16), "incd x30, all, mul #16"); + + TEST_SINGLE(decd(XReg::x30, PredicatePattern::SVE_POW2, 1), "decd x30, pow2"); + TEST_SINGLE(decd(XReg::x30, PredicatePattern::SVE_VL256, 7), "decd x30, vl256, mul #7"); + TEST_SINGLE(decd(XReg::x30, PredicatePattern::SVE_ALL, 16), "decd x30, all, mul #16"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE saturating inc/dec register by element count") { - TEST_SINGLE(sqincb(XReg::x30, PredicatePattern::SVE_POW2 , 1), "sqincb x30, pow2"); - TEST_SINGLE(sqincb(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqincb x30, vl256, mul #7"); - TEST_SINGLE(sqincb(XReg::x30, PredicatePattern::SVE_ALL , 16), "sqincb x30, all, mul #16"); + TEST_SINGLE(sqincb(XReg::x30, PredicatePattern::SVE_POW2, 1), "sqincb x30, pow2"); + TEST_SINGLE(sqincb(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqincb x30, vl256, mul #7"); + TEST_SINGLE(sqincb(XReg::x30, PredicatePattern::SVE_ALL, 16), "sqincb x30, all, mul #16"); - TEST_SINGLE(sqincb(WReg::w30, PredicatePattern::SVE_POW2 , 1), "sqincb x30, w30, pow2"); - TEST_SINGLE(sqincb(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqincb x30, w30, vl256, mul #7"); - TEST_SINGLE(sqincb(WReg::w30, PredicatePattern::SVE_ALL , 16), "sqincb x30, w30, all, mul #16"); + TEST_SINGLE(sqincb(WReg::w30, PredicatePattern::SVE_POW2, 1), "sqincb x30, w30, pow2"); + TEST_SINGLE(sqincb(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqincb x30, w30, vl256, mul #7"); + TEST_SINGLE(sqincb(WReg::w30, PredicatePattern::SVE_ALL, 16), "sqincb x30, w30, all, mul #16"); - TEST_SINGLE(uqincb(XReg::x30, PredicatePattern::SVE_POW2 , 1), "uqincb x30, pow2"); - TEST_SINGLE(uqincb(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqincb x30, vl256, mul #7"); - TEST_SINGLE(uqincb(XReg::x30, PredicatePattern::SVE_ALL , 16), "uqincb x30, all, mul #16"); + TEST_SINGLE(uqincb(XReg::x30, PredicatePattern::SVE_POW2, 1), "uqincb x30, pow2"); + TEST_SINGLE(uqincb(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqincb x30, vl256, mul #7"); + TEST_SINGLE(uqincb(XReg::x30, PredicatePattern::SVE_ALL, 16), "uqincb x30, all, mul #16"); - TEST_SINGLE(uqincb(WReg::w30, PredicatePattern::SVE_POW2 , 1), "uqincb w30, pow2"); - TEST_SINGLE(uqincb(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqincb w30, vl256, mul #7"); - TEST_SINGLE(uqincb(WReg::w30, PredicatePattern::SVE_ALL , 16), "uqincb w30, all, mul #16"); + TEST_SINGLE(uqincb(WReg::w30, PredicatePattern::SVE_POW2, 1), "uqincb w30, pow2"); + TEST_SINGLE(uqincb(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqincb w30, vl256, mul #7"); + TEST_SINGLE(uqincb(WReg::w30, PredicatePattern::SVE_ALL, 16), "uqincb w30, all, mul #16"); - TEST_SINGLE(sqdecb(XReg::x30, PredicatePattern::SVE_POW2 , 1), "sqdecb x30, pow2"); - TEST_SINGLE(sqdecb(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqdecb x30, vl256, mul #7"); - TEST_SINGLE(sqdecb(XReg::x30, PredicatePattern::SVE_ALL , 16), "sqdecb x30, all, mul #16"); + TEST_SINGLE(sqdecb(XReg::x30, PredicatePattern::SVE_POW2, 1), "sqdecb x30, pow2"); + TEST_SINGLE(sqdecb(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqdecb x30, vl256, mul #7"); + TEST_SINGLE(sqdecb(XReg::x30, PredicatePattern::SVE_ALL, 16), "sqdecb x30, all, mul #16"); - TEST_SINGLE(sqdecb(WReg::w30, PredicatePattern::SVE_POW2 , 1), "sqdecb x30, w30, pow2"); - TEST_SINGLE(sqdecb(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqdecb x30, w30, vl256, mul #7"); - TEST_SINGLE(sqdecb(WReg::w30, PredicatePattern::SVE_ALL , 16), "sqdecb x30, w30, all, mul #16"); + TEST_SINGLE(sqdecb(WReg::w30, PredicatePattern::SVE_POW2, 1), "sqdecb x30, w30, pow2"); + TEST_SINGLE(sqdecb(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqdecb x30, w30, vl256, mul #7"); + TEST_SINGLE(sqdecb(WReg::w30, PredicatePattern::SVE_ALL, 16), "sqdecb x30, w30, all, mul #16"); - TEST_SINGLE(uqdecb(XReg::x30, PredicatePattern::SVE_POW2 , 1), "uqdecb x30, pow2"); - TEST_SINGLE(uqdecb(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqdecb x30, vl256, mul #7"); - TEST_SINGLE(uqdecb(XReg::x30, PredicatePattern::SVE_ALL , 16), "uqdecb x30, all, mul #16"); + TEST_SINGLE(uqdecb(XReg::x30, PredicatePattern::SVE_POW2, 1), "uqdecb x30, pow2"); + TEST_SINGLE(uqdecb(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqdecb x30, vl256, mul #7"); + TEST_SINGLE(uqdecb(XReg::x30, PredicatePattern::SVE_ALL, 16), "uqdecb x30, all, mul #16"); - TEST_SINGLE(uqdecb(WReg::w30, PredicatePattern::SVE_POW2 , 1), "uqdecb w30, pow2"); - TEST_SINGLE(uqdecb(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqdecb w30, vl256, mul #7"); - TEST_SINGLE(uqdecb(WReg::w30, PredicatePattern::SVE_ALL , 16), "uqdecb w30, all, mul #16"); + TEST_SINGLE(uqdecb(WReg::w30, PredicatePattern::SVE_POW2, 1), "uqdecb w30, pow2"); + TEST_SINGLE(uqdecb(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqdecb w30, vl256, mul #7"); + TEST_SINGLE(uqdecb(WReg::w30, PredicatePattern::SVE_ALL, 16), "uqdecb w30, all, mul #16"); - TEST_SINGLE(sqinch(XReg::x30, PredicatePattern::SVE_POW2 , 1), "sqinch x30, pow2"); - TEST_SINGLE(sqinch(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqinch x30, vl256, mul #7"); - TEST_SINGLE(sqinch(XReg::x30, PredicatePattern::SVE_ALL , 16), "sqinch x30, all, mul #16"); + TEST_SINGLE(sqinch(XReg::x30, PredicatePattern::SVE_POW2, 1), "sqinch x30, pow2"); + TEST_SINGLE(sqinch(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqinch x30, vl256, mul #7"); + TEST_SINGLE(sqinch(XReg::x30, PredicatePattern::SVE_ALL, 16), "sqinch x30, all, mul #16"); - TEST_SINGLE(sqinch(WReg::w30, PredicatePattern::SVE_POW2 , 1), "sqinch x30, w30, pow2"); - TEST_SINGLE(sqinch(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqinch x30, w30, vl256, mul #7"); - TEST_SINGLE(sqinch(WReg::w30, PredicatePattern::SVE_ALL , 16), "sqinch x30, w30, all, mul #16"); + TEST_SINGLE(sqinch(WReg::w30, PredicatePattern::SVE_POW2, 1), "sqinch x30, w30, pow2"); + TEST_SINGLE(sqinch(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqinch x30, w30, vl256, mul #7"); + TEST_SINGLE(sqinch(WReg::w30, PredicatePattern::SVE_ALL, 16), "sqinch x30, w30, all, mul #16"); - TEST_SINGLE(uqinch(XReg::x30, PredicatePattern::SVE_POW2 , 1), "uqinch x30, pow2"); - TEST_SINGLE(uqinch(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqinch x30, vl256, mul #7"); - TEST_SINGLE(uqinch(XReg::x30, PredicatePattern::SVE_ALL , 16), "uqinch x30, all, mul #16"); + TEST_SINGLE(uqinch(XReg::x30, PredicatePattern::SVE_POW2, 1), "uqinch x30, pow2"); + TEST_SINGLE(uqinch(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqinch x30, vl256, mul #7"); + TEST_SINGLE(uqinch(XReg::x30, PredicatePattern::SVE_ALL, 16), "uqinch x30, all, mul #16"); - TEST_SINGLE(uqinch(WReg::w30, PredicatePattern::SVE_POW2 , 1), "uqinch w30, pow2"); - TEST_SINGLE(uqinch(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqinch w30, vl256, mul #7"); - TEST_SINGLE(uqinch(WReg::w30, PredicatePattern::SVE_ALL , 16), "uqinch w30, all, mul #16"); + TEST_SINGLE(uqinch(WReg::w30, PredicatePattern::SVE_POW2, 1), "uqinch w30, pow2"); + TEST_SINGLE(uqinch(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqinch w30, vl256, mul #7"); + TEST_SINGLE(uqinch(WReg::w30, PredicatePattern::SVE_ALL, 16), "uqinch w30, all, mul #16"); - TEST_SINGLE(sqdech(XReg::x30, PredicatePattern::SVE_POW2 , 1), "sqdech x30, pow2"); - TEST_SINGLE(sqdech(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqdech x30, vl256, mul #7"); - TEST_SINGLE(sqdech(XReg::x30, PredicatePattern::SVE_ALL , 16), "sqdech x30, all, mul #16"); + TEST_SINGLE(sqdech(XReg::x30, PredicatePattern::SVE_POW2, 1), "sqdech x30, pow2"); + TEST_SINGLE(sqdech(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqdech x30, vl256, mul #7"); + TEST_SINGLE(sqdech(XReg::x30, PredicatePattern::SVE_ALL, 16), "sqdech x30, all, mul #16"); - TEST_SINGLE(sqdech(WReg::w30, PredicatePattern::SVE_POW2 , 1), "sqdech x30, w30, pow2"); - TEST_SINGLE(sqdech(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqdech x30, w30, vl256, mul #7"); - TEST_SINGLE(sqdech(WReg::w30, PredicatePattern::SVE_ALL , 16), "sqdech x30, w30, all, mul #16"); + TEST_SINGLE(sqdech(WReg::w30, PredicatePattern::SVE_POW2, 1), "sqdech x30, w30, pow2"); + TEST_SINGLE(sqdech(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqdech x30, w30, vl256, mul #7"); + TEST_SINGLE(sqdech(WReg::w30, PredicatePattern::SVE_ALL, 16), "sqdech x30, w30, all, mul #16"); - TEST_SINGLE(uqdech(XReg::x30, PredicatePattern::SVE_POW2 , 1), "uqdech x30, pow2"); - TEST_SINGLE(uqdech(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqdech x30, vl256, mul #7"); - TEST_SINGLE(uqdech(XReg::x30, PredicatePattern::SVE_ALL , 16), "uqdech x30, all, mul #16"); + TEST_SINGLE(uqdech(XReg::x30, PredicatePattern::SVE_POW2, 1), "uqdech x30, pow2"); + TEST_SINGLE(uqdech(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqdech x30, vl256, mul #7"); + TEST_SINGLE(uqdech(XReg::x30, PredicatePattern::SVE_ALL, 16), "uqdech x30, all, mul #16"); - TEST_SINGLE(uqdech(WReg::w30, PredicatePattern::SVE_POW2 , 1), "uqdech w30, pow2"); - TEST_SINGLE(uqdech(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqdech w30, vl256, mul #7"); - TEST_SINGLE(uqdech(WReg::w30, PredicatePattern::SVE_ALL , 16), "uqdech w30, all, mul #16"); + TEST_SINGLE(uqdech(WReg::w30, PredicatePattern::SVE_POW2, 1), "uqdech w30, pow2"); + TEST_SINGLE(uqdech(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqdech w30, vl256, mul #7"); + TEST_SINGLE(uqdech(WReg::w30, PredicatePattern::SVE_ALL, 16), "uqdech w30, all, mul #16"); - TEST_SINGLE(sqincw(XReg::x30, PredicatePattern::SVE_POW2 , 1), "sqincw x30, pow2"); - TEST_SINGLE(sqincw(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqincw x30, vl256, mul #7"); - TEST_SINGLE(sqincw(XReg::x30, PredicatePattern::SVE_ALL , 16), "sqincw x30, all, mul #16"); + TEST_SINGLE(sqincw(XReg::x30, PredicatePattern::SVE_POW2, 1), "sqincw x30, pow2"); + TEST_SINGLE(sqincw(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqincw x30, vl256, mul #7"); + TEST_SINGLE(sqincw(XReg::x30, PredicatePattern::SVE_ALL, 16), "sqincw x30, all, mul #16"); - TEST_SINGLE(sqincw(WReg::w30, PredicatePattern::SVE_POW2 , 1), "sqincw x30, w30, pow2"); - TEST_SINGLE(sqincw(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqincw x30, w30, vl256, mul #7"); - TEST_SINGLE(sqincw(WReg::w30, PredicatePattern::SVE_ALL , 16), "sqincw x30, w30, all, mul #16"); + TEST_SINGLE(sqincw(WReg::w30, PredicatePattern::SVE_POW2, 1), "sqincw x30, w30, pow2"); + TEST_SINGLE(sqincw(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqincw x30, w30, vl256, mul #7"); + TEST_SINGLE(sqincw(WReg::w30, PredicatePattern::SVE_ALL, 16), "sqincw x30, w30, all, mul #16"); - TEST_SINGLE(uqincw(XReg::x30, PredicatePattern::SVE_POW2 , 1), "uqincw x30, pow2"); - TEST_SINGLE(uqincw(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqincw x30, vl256, mul #7"); - TEST_SINGLE(uqincw(XReg::x30, PredicatePattern::SVE_ALL , 16), "uqincw x30, all, mul #16"); + TEST_SINGLE(uqincw(XReg::x30, PredicatePattern::SVE_POW2, 1), "uqincw x30, pow2"); + TEST_SINGLE(uqincw(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqincw x30, vl256, mul #7"); + TEST_SINGLE(uqincw(XReg::x30, PredicatePattern::SVE_ALL, 16), "uqincw x30, all, mul #16"); - TEST_SINGLE(uqincw(WReg::w30, PredicatePattern::SVE_POW2 , 1), "uqincw w30, pow2"); - TEST_SINGLE(uqincw(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqincw w30, vl256, mul #7"); - TEST_SINGLE(uqincw(WReg::w30, PredicatePattern::SVE_ALL , 16), "uqincw w30, all, mul #16"); + TEST_SINGLE(uqincw(WReg::w30, PredicatePattern::SVE_POW2, 1), "uqincw w30, pow2"); + TEST_SINGLE(uqincw(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqincw w30, vl256, mul #7"); + TEST_SINGLE(uqincw(WReg::w30, PredicatePattern::SVE_ALL, 16), "uqincw w30, all, mul #16"); - TEST_SINGLE(sqdecw(XReg::x30, PredicatePattern::SVE_POW2 , 1), "sqdecw x30, pow2"); - TEST_SINGLE(sqdecw(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqdecw x30, vl256, mul #7"); - TEST_SINGLE(sqdecw(XReg::x30, PredicatePattern::SVE_ALL , 16), "sqdecw x30, all, mul #16"); + TEST_SINGLE(sqdecw(XReg::x30, PredicatePattern::SVE_POW2, 1), "sqdecw x30, pow2"); + TEST_SINGLE(sqdecw(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqdecw x30, vl256, mul #7"); + TEST_SINGLE(sqdecw(XReg::x30, PredicatePattern::SVE_ALL, 16), "sqdecw x30, all, mul #16"); - TEST_SINGLE(sqdecw(WReg::w30, PredicatePattern::SVE_POW2 , 1), "sqdecw x30, w30, pow2"); - TEST_SINGLE(sqdecw(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqdecw x30, w30, vl256, mul #7"); - TEST_SINGLE(sqdecw(WReg::w30, PredicatePattern::SVE_ALL , 16), "sqdecw x30, w30, all, mul #16"); + TEST_SINGLE(sqdecw(WReg::w30, PredicatePattern::SVE_POW2, 1), "sqdecw x30, w30, pow2"); + TEST_SINGLE(sqdecw(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqdecw x30, w30, vl256, mul #7"); + TEST_SINGLE(sqdecw(WReg::w30, PredicatePattern::SVE_ALL, 16), "sqdecw x30, w30, all, mul #16"); - TEST_SINGLE(uqdecw(XReg::x30, PredicatePattern::SVE_POW2 , 1), "uqdecw x30, pow2"); - TEST_SINGLE(uqdecw(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqdecw x30, vl256, mul #7"); - TEST_SINGLE(uqdecw(XReg::x30, PredicatePattern::SVE_ALL , 16), "uqdecw x30, all, mul #16"); + TEST_SINGLE(uqdecw(XReg::x30, PredicatePattern::SVE_POW2, 1), "uqdecw x30, pow2"); + TEST_SINGLE(uqdecw(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqdecw x30, vl256, mul #7"); + TEST_SINGLE(uqdecw(XReg::x30, PredicatePattern::SVE_ALL, 16), "uqdecw x30, all, mul #16"); - TEST_SINGLE(uqdecw(WReg::w30, PredicatePattern::SVE_POW2 , 1), "uqdecw w30, pow2"); - TEST_SINGLE(uqdecw(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqdecw w30, vl256, mul #7"); - TEST_SINGLE(uqdecw(WReg::w30, PredicatePattern::SVE_ALL , 16), "uqdecw w30, all, mul #16"); + TEST_SINGLE(uqdecw(WReg::w30, PredicatePattern::SVE_POW2, 1), "uqdecw w30, pow2"); + TEST_SINGLE(uqdecw(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqdecw w30, vl256, mul #7"); + TEST_SINGLE(uqdecw(WReg::w30, PredicatePattern::SVE_ALL, 16), "uqdecw w30, all, mul #16"); - TEST_SINGLE(sqincd(XReg::x30, PredicatePattern::SVE_POW2 , 1), "sqincd x30, pow2"); - TEST_SINGLE(sqincd(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqincd x30, vl256, mul #7"); - TEST_SINGLE(sqincd(XReg::x30, PredicatePattern::SVE_ALL , 16), "sqincd x30, all, mul #16"); + TEST_SINGLE(sqincd(XReg::x30, PredicatePattern::SVE_POW2, 1), "sqincd x30, pow2"); + TEST_SINGLE(sqincd(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqincd x30, vl256, mul #7"); + TEST_SINGLE(sqincd(XReg::x30, PredicatePattern::SVE_ALL, 16), "sqincd x30, all, mul #16"); - TEST_SINGLE(sqincd(WReg::w30, PredicatePattern::SVE_POW2 , 1), "sqincd x30, w30, pow2"); - TEST_SINGLE(sqincd(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqincd x30, w30, vl256, mul #7"); - TEST_SINGLE(sqincd(WReg::w30, PredicatePattern::SVE_ALL , 16), "sqincd x30, w30, all, mul #16"); + TEST_SINGLE(sqincd(WReg::w30, PredicatePattern::SVE_POW2, 1), "sqincd x30, w30, pow2"); + TEST_SINGLE(sqincd(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqincd x30, w30, vl256, mul #7"); + TEST_SINGLE(sqincd(WReg::w30, PredicatePattern::SVE_ALL, 16), "sqincd x30, w30, all, mul #16"); - TEST_SINGLE(uqincd(XReg::x30, PredicatePattern::SVE_POW2 , 1), "uqincd x30, pow2"); - TEST_SINGLE(uqincd(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqincd x30, vl256, mul #7"); - TEST_SINGLE(uqincd(XReg::x30, PredicatePattern::SVE_ALL , 16), "uqincd x30, all, mul #16"); + TEST_SINGLE(uqincd(XReg::x30, PredicatePattern::SVE_POW2, 1), "uqincd x30, pow2"); + TEST_SINGLE(uqincd(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqincd x30, vl256, mul #7"); + TEST_SINGLE(uqincd(XReg::x30, PredicatePattern::SVE_ALL, 16), "uqincd x30, all, mul #16"); - TEST_SINGLE(uqincd(WReg::w30, PredicatePattern::SVE_POW2 , 1), "uqincd w30, pow2"); - TEST_SINGLE(uqincd(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqincd w30, vl256, mul #7"); - TEST_SINGLE(uqincd(WReg::w30, PredicatePattern::SVE_ALL , 16), "uqincd w30, all, mul #16"); + TEST_SINGLE(uqincd(WReg::w30, PredicatePattern::SVE_POW2, 1), "uqincd w30, pow2"); + TEST_SINGLE(uqincd(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqincd w30, vl256, mul #7"); + TEST_SINGLE(uqincd(WReg::w30, PredicatePattern::SVE_ALL, 16), "uqincd w30, all, mul #16"); - TEST_SINGLE(sqdecd(XReg::x30, PredicatePattern::SVE_POW2 , 1), "sqdecd x30, pow2"); - TEST_SINGLE(sqdecd(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqdecd x30, vl256, mul #7"); - TEST_SINGLE(sqdecd(XReg::x30, PredicatePattern::SVE_ALL , 16), "sqdecd x30, all, mul #16"); + TEST_SINGLE(sqdecd(XReg::x30, PredicatePattern::SVE_POW2, 1), "sqdecd x30, pow2"); + TEST_SINGLE(sqdecd(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqdecd x30, vl256, mul #7"); + TEST_SINGLE(sqdecd(XReg::x30, PredicatePattern::SVE_ALL, 16), "sqdecd x30, all, mul #16"); - TEST_SINGLE(sqdecd(WReg::w30, PredicatePattern::SVE_POW2 , 1), "sqdecd x30, w30, pow2"); - TEST_SINGLE(sqdecd(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqdecd x30, w30, vl256, mul #7"); - TEST_SINGLE(sqdecd(WReg::w30, PredicatePattern::SVE_ALL , 16), "sqdecd x30, w30, all, mul #16"); + TEST_SINGLE(sqdecd(WReg::w30, PredicatePattern::SVE_POW2, 1), "sqdecd x30, w30, pow2"); + TEST_SINGLE(sqdecd(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqdecd x30, w30, vl256, mul #7"); + TEST_SINGLE(sqdecd(WReg::w30, PredicatePattern::SVE_ALL, 16), "sqdecd x30, w30, all, mul #16"); - TEST_SINGLE(uqdecd(XReg::x30, PredicatePattern::SVE_POW2 , 1), "uqdecd x30, pow2"); - TEST_SINGLE(uqdecd(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqdecd x30, vl256, mul #7"); - TEST_SINGLE(uqdecd(XReg::x30, PredicatePattern::SVE_ALL , 16), "uqdecd x30, all, mul #16"); + TEST_SINGLE(uqdecd(XReg::x30, PredicatePattern::SVE_POW2, 1), "uqdecd x30, pow2"); + TEST_SINGLE(uqdecd(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqdecd x30, vl256, mul #7"); + TEST_SINGLE(uqdecd(XReg::x30, PredicatePattern::SVE_ALL, 16), "uqdecd x30, all, mul #16"); - TEST_SINGLE(uqdecd(WReg::w30, PredicatePattern::SVE_POW2 , 1), "uqdecd w30, pow2"); - TEST_SINGLE(uqdecd(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqdecd w30, vl256, mul #7"); - TEST_SINGLE(uqdecd(WReg::w30, PredicatePattern::SVE_ALL , 16), "uqdecd w30, all, mul #16"); + TEST_SINGLE(uqdecd(WReg::w30, PredicatePattern::SVE_POW2, 1), "uqdecd w30, pow2"); + TEST_SINGLE(uqdecd(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqdecd w30, vl256, mul #7"); + TEST_SINGLE(uqdecd(WReg::w30, PredicatePattern::SVE_ALL, 16), "uqdecd w30, all, mul #16"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Bitwise Immediate") { @@ -1376,12 +1343,12 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Integer Wide Immediate - P } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE copy integer immediate (predicated)") { - TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.b, p6/m, #-128") + TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.b, p6/m, #-128") TEST_SINGLE(cpy(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.h, p6/m, #-128"); TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.s, p6/m, #-128"); TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.d, p6/m, #-128"); - TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.b, p6/m, #127"); + TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.b, p6/m, #127"); TEST_SINGLE(cpy(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.h, p6/m, #127"); TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.s, p6/m, #127"); TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.d, p6/m, #127"); @@ -1394,12 +1361,12 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE copy integer immediate (pr TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), 32512), "mov z30.s, p6/m, #127, lsl #8"); TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), 32512), "mov z30.d, p6/m, #127, lsl #8"); - TEST_SINGLE(mov_imm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.b, p6/m, #-128") + TEST_SINGLE(mov_imm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.b, p6/m, #-128") TEST_SINGLE(mov_imm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.h, p6/m, #-128"); TEST_SINGLE(mov_imm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.s, p6/m, #-128"); TEST_SINGLE(mov_imm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.d, p6/m, #-128"); - TEST_SINGLE(mov_imm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.b, p6/m, #127"); + TEST_SINGLE(mov_imm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.b, p6/m, #127"); TEST_SINGLE(mov_imm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.h, p6/m, #127"); TEST_SINGLE(mov_imm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.s, p6/m, #127"); TEST_SINGLE(mov_imm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.d, p6/m, #127"); @@ -1412,12 +1379,12 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE copy integer immediate (pr TEST_SINGLE(mov_imm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), 32512), "mov z30.s, p6/m, #127, lsl #8"); TEST_SINGLE(mov_imm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), 32512), "mov z30.d, p6/m, #127, lsl #8"); - TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.b, p6/z, #-128") + TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.b, p6/z, #-128") TEST_SINGLE(cpy(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.h, p6/z, #-128"); TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.s, p6/z, #-128"); TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.d, p6/z, #-128"); - TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.b, p6/z, #127"); + TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.b, p6/z, #127"); TEST_SINGLE(cpy(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.h, p6/z, #127"); TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.s, p6/z, #127"); TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.d, p6/z, #127"); @@ -1430,12 +1397,12 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE copy integer immediate (pr TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), 32512), "mov z30.s, p6/z, #127, lsl #8"); TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), 32512), "mov z30.d, p6/z, #127, lsl #8"); - TEST_SINGLE(mov_imm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.b, p6/z, #-128") + TEST_SINGLE(mov_imm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.b, p6/z, #-128") TEST_SINGLE(mov_imm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.h, p6/z, #-128"); TEST_SINGLE(mov_imm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.s, p6/z, #-128"); TEST_SINGLE(mov_imm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.d, p6/z, #-128"); - TEST_SINGLE(mov_imm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.b, p6/z, #127"); + TEST_SINGLE(mov_imm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.b, p6/z, #127"); TEST_SINGLE(mov_imm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.h, p6/z, #127"); TEST_SINGLE(mov_imm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.s, p6/z, #127"); TEST_SINGLE(mov_imm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.d, p6/z, #127"); @@ -1450,93 +1417,93 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE copy integer immediate (pr } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Permute Vector - Unpredicated") { - TEST_SINGLE(dup(SubRegSize::i8Bit, ZReg::z30, Reg::r29), "mov z30.b, w29"); - TEST_SINGLE(dup(SubRegSize::i16Bit, ZReg::z30, Reg::r29), "mov z30.h, w29"); - TEST_SINGLE(dup(SubRegSize::i32Bit, ZReg::z30, Reg::r29), "mov z30.s, w29"); - TEST_SINGLE(dup(SubRegSize::i64Bit, ZReg::z30, Reg::r29), "mov z30.d, x29"); + TEST_SINGLE(dup(SubRegSize::i8Bit, ZReg::z30, Reg::r29), "mov z30.b, w29"); + TEST_SINGLE(dup(SubRegSize::i16Bit, ZReg::z30, Reg::r29), "mov z30.h, w29"); + TEST_SINGLE(dup(SubRegSize::i32Bit, ZReg::z30, Reg::r29), "mov z30.s, w29"); + TEST_SINGLE(dup(SubRegSize::i64Bit, ZReg::z30, Reg::r29), "mov z30.d, x29"); - TEST_SINGLE(mov(SubRegSize::i8Bit, ZReg::z30, Reg::r29), "mov z30.b, w29"); - TEST_SINGLE(mov(SubRegSize::i16Bit, ZReg::z30, Reg::r29), "mov z30.h, w29"); - TEST_SINGLE(mov(SubRegSize::i32Bit, ZReg::z30, Reg::r29), "mov z30.s, w29"); - TEST_SINGLE(mov(SubRegSize::i64Bit, ZReg::z30, Reg::r29), "mov z30.d, x29"); + TEST_SINGLE(mov(SubRegSize::i8Bit, ZReg::z30, Reg::r29), "mov z30.b, w29"); + TEST_SINGLE(mov(SubRegSize::i16Bit, ZReg::z30, Reg::r29), "mov z30.h, w29"); + TEST_SINGLE(mov(SubRegSize::i32Bit, ZReg::z30, Reg::r29), "mov z30.s, w29"); + TEST_SINGLE(mov(SubRegSize::i64Bit, ZReg::z30, Reg::r29), "mov z30.d, x29"); - TEST_SINGLE(insr(SubRegSize::i8Bit, ZReg::z30, Reg::r29), "insr z30.b, w29"); + TEST_SINGLE(insr(SubRegSize::i8Bit, ZReg::z30, Reg::r29), "insr z30.b, w29"); TEST_SINGLE(insr(SubRegSize::i16Bit, ZReg::z30, Reg::r29), "insr z30.h, w29"); TEST_SINGLE(insr(SubRegSize::i32Bit, ZReg::z30, Reg::r29), "insr z30.s, w29"); TEST_SINGLE(insr(SubRegSize::i64Bit, ZReg::z30, Reg::r29), "insr z30.d, x29"); - TEST_SINGLE(insr(SubRegSize::i8Bit, ZReg::z30, VReg::v29), "insr z30.b, b29"); + TEST_SINGLE(insr(SubRegSize::i8Bit, ZReg::z30, VReg::v29), "insr z30.b, b29"); TEST_SINGLE(insr(SubRegSize::i16Bit, ZReg::z30, VReg::v29), "insr z30.h, h29"); TEST_SINGLE(insr(SubRegSize::i32Bit, ZReg::z30, VReg::v29), "insr z30.s, s29"); TEST_SINGLE(insr(SubRegSize::i64Bit, ZReg::z30, VReg::v29), "insr z30.d, d29"); - TEST_SINGLE(rev(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "rev z30.b, z29.b"); + TEST_SINGLE(rev(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "rev z30.b, z29.b"); TEST_SINGLE(rev(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "rev z30.h, z29.h"); TEST_SINGLE(rev(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "rev z30.s, z29.s"); TEST_SINGLE(rev(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "rev z30.d, z29.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE unpack vector elements") { - //TEST_SINGLE(sunpklo(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "sunpklo z30.b, z29.b"); - TEST_SINGLE(sunpklo(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "sunpklo z30.h, z29.b"); - TEST_SINGLE(sunpklo(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "sunpklo z30.s, z29.h"); - TEST_SINGLE(sunpklo(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sunpklo z30.d, z29.s"); - //TEST_SINGLE(sunpklo(SubRegSize::i128Bit, ZReg::z30, ZReg::z29), "sunpklo z30.q, z29.q"); - - //TEST_SINGLE(sunpkhi(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "sunpkhi z30.b, z29.b"); - TEST_SINGLE(sunpkhi(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "sunpkhi z30.h, z29.b"); - TEST_SINGLE(sunpkhi(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "sunpkhi z30.s, z29.h"); - TEST_SINGLE(sunpkhi(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sunpkhi z30.d, z29.s"); - //TEST_SINGLE(sunpkhi(SubRegSize::i128Bit, ZReg::z30, ZReg::z29), "sunpkhi z30.q, z29.q"); - - //TEST_SINGLE(uunpklo(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "uunpklo z30.b, z29.b"); - TEST_SINGLE(uunpklo(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "uunpklo z30.h, z29.b"); - TEST_SINGLE(uunpklo(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "uunpklo z30.s, z29.h"); - TEST_SINGLE(uunpklo(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "uunpklo z30.d, z29.s"); - //TEST_SINGLE(uunpklo(SubRegSize::i128Bit, ZReg::z30, ZReg::z29), "uunpklo z30.q, z29.q"); - - //TEST_SINGLE(uunpkhi(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "uunpkhi z30.b, z29.b"); - TEST_SINGLE(uunpkhi(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "uunpkhi z30.h, z29.b"); - TEST_SINGLE(uunpkhi(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "uunpkhi z30.s, z29.h"); - TEST_SINGLE(uunpkhi(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "uunpkhi z30.d, z29.s"); - //TEST_SINGLE(uunpkhi(SubRegSize::i128Bit, ZReg::z30, ZReg::z29), "uunpkhi z30.q, z29.q"); + // TEST_SINGLE(sunpklo(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "sunpklo z30.b, z29.b"); + TEST_SINGLE(sunpklo(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "sunpklo z30.h, z29.b"); + TEST_SINGLE(sunpklo(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "sunpklo z30.s, z29.h"); + TEST_SINGLE(sunpklo(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sunpklo z30.d, z29.s"); + // TEST_SINGLE(sunpklo(SubRegSize::i128Bit, ZReg::z30, ZReg::z29), "sunpklo z30.q, z29.q"); + + // TEST_SINGLE(sunpkhi(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "sunpkhi z30.b, z29.b"); + TEST_SINGLE(sunpkhi(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "sunpkhi z30.h, z29.b"); + TEST_SINGLE(sunpkhi(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "sunpkhi z30.s, z29.h"); + TEST_SINGLE(sunpkhi(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sunpkhi z30.d, z29.s"); + // TEST_SINGLE(sunpkhi(SubRegSize::i128Bit, ZReg::z30, ZReg::z29), "sunpkhi z30.q, z29.q"); + + // TEST_SINGLE(uunpklo(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "uunpklo z30.b, z29.b"); + TEST_SINGLE(uunpklo(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "uunpklo z30.h, z29.b"); + TEST_SINGLE(uunpklo(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "uunpklo z30.s, z29.h"); + TEST_SINGLE(uunpklo(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "uunpklo z30.d, z29.s"); + // TEST_SINGLE(uunpklo(SubRegSize::i128Bit, ZReg::z30, ZReg::z29), "uunpklo z30.q, z29.q"); + + // TEST_SINGLE(uunpkhi(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "uunpkhi z30.b, z29.b"); + TEST_SINGLE(uunpkhi(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "uunpkhi z30.h, z29.b"); + TEST_SINGLE(uunpkhi(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "uunpkhi z30.s, z29.h"); + TEST_SINGLE(uunpkhi(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "uunpkhi z30.d, z29.s"); + // TEST_SINGLE(uunpkhi(SubRegSize::i128Bit, ZReg::z30, ZReg::z29), "uunpkhi z30.q, z29.q"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Permute Predicate") { - TEST_SINGLE(rev(SubRegSize::i8Bit, PReg::p15, PReg::p14), "rev p15.b, p14.b"); + TEST_SINGLE(rev(SubRegSize::i8Bit, PReg::p15, PReg::p14), "rev p15.b, p14.b"); TEST_SINGLE(rev(SubRegSize::i16Bit, PReg::p15, PReg::p14), "rev p15.h, p14.h"); TEST_SINGLE(rev(SubRegSize::i32Bit, PReg::p15, PReg::p14), "rev p15.s, p14.s"); TEST_SINGLE(rev(SubRegSize::i64Bit, PReg::p15, PReg::p14), "rev p15.d, p14.d"); - + TEST_SINGLE(punpklo(PReg::p15, PReg::p14), "punpklo p15.h, p14.b"); TEST_SINGLE(punpkhi(PReg::p15, PReg::p14), "punpkhi p15.h, p14.b"); - TEST_SINGLE(zip1(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "zip1 p15.b, p14.b, p13.b"); + TEST_SINGLE(zip1(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "zip1 p15.b, p14.b, p13.b"); TEST_SINGLE(zip1(SubRegSize::i16Bit, PReg::p15, PReg::p14, PReg::p13), "zip1 p15.h, p14.h, p13.h"); TEST_SINGLE(zip1(SubRegSize::i32Bit, PReg::p15, PReg::p14, PReg::p13), "zip1 p15.s, p14.s, p13.s"); TEST_SINGLE(zip1(SubRegSize::i64Bit, PReg::p15, PReg::p14, PReg::p13), "zip1 p15.d, p14.d, p13.d"); - TEST_SINGLE(zip2(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "zip2 p15.b, p14.b, p13.b"); + TEST_SINGLE(zip2(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "zip2 p15.b, p14.b, p13.b"); TEST_SINGLE(zip2(SubRegSize::i16Bit, PReg::p15, PReg::p14, PReg::p13), "zip2 p15.h, p14.h, p13.h"); TEST_SINGLE(zip2(SubRegSize::i32Bit, PReg::p15, PReg::p14, PReg::p13), "zip2 p15.s, p14.s, p13.s"); TEST_SINGLE(zip2(SubRegSize::i64Bit, PReg::p15, PReg::p14, PReg::p13), "zip2 p15.d, p14.d, p13.d"); - TEST_SINGLE(uzp1(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "uzp1 p15.b, p14.b, p13.b"); + TEST_SINGLE(uzp1(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "uzp1 p15.b, p14.b, p13.b"); TEST_SINGLE(uzp1(SubRegSize::i16Bit, PReg::p15, PReg::p14, PReg::p13), "uzp1 p15.h, p14.h, p13.h"); TEST_SINGLE(uzp1(SubRegSize::i32Bit, PReg::p15, PReg::p14, PReg::p13), "uzp1 p15.s, p14.s, p13.s"); TEST_SINGLE(uzp1(SubRegSize::i64Bit, PReg::p15, PReg::p14, PReg::p13), "uzp1 p15.d, p14.d, p13.d"); - TEST_SINGLE(uzp2(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "uzp2 p15.b, p14.b, p13.b"); + TEST_SINGLE(uzp2(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "uzp2 p15.b, p14.b, p13.b"); TEST_SINGLE(uzp2(SubRegSize::i16Bit, PReg::p15, PReg::p14, PReg::p13), "uzp2 p15.h, p14.h, p13.h"); TEST_SINGLE(uzp2(SubRegSize::i32Bit, PReg::p15, PReg::p14, PReg::p13), "uzp2 p15.s, p14.s, p13.s"); TEST_SINGLE(uzp2(SubRegSize::i64Bit, PReg::p15, PReg::p14, PReg::p13), "uzp2 p15.d, p14.d, p13.d"); - TEST_SINGLE(trn1(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "trn1 p15.b, p14.b, p13.b"); + TEST_SINGLE(trn1(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "trn1 p15.b, p14.b, p13.b"); TEST_SINGLE(trn1(SubRegSize::i16Bit, PReg::p15, PReg::p14, PReg::p13), "trn1 p15.h, p14.h, p13.h"); TEST_SINGLE(trn1(SubRegSize::i32Bit, PReg::p15, PReg::p14, PReg::p13), "trn1 p15.s, p14.s, p13.s"); TEST_SINGLE(trn1(SubRegSize::i64Bit, PReg::p15, PReg::p14, PReg::p13), "trn1 p15.d, p14.d, p13.d"); - TEST_SINGLE(trn2(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "trn2 p15.b, p14.b, p13.b"); + TEST_SINGLE(trn2(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "trn2 p15.b, p14.b, p13.b"); TEST_SINGLE(trn2(SubRegSize::i16Bit, PReg::p15, PReg::p14, PReg::p13), "trn2 p15.h, p14.h, p13.h"); TEST_SINGLE(trn2(SubRegSize::i32Bit, PReg::p15, PReg::p14, PReg::p13), "trn2 p15.s, p14.s, p13.s"); TEST_SINGLE(trn2(SubRegSize::i64Bit, PReg::p15, PReg::p14, PReg::p13), "trn2 p15.d, p14.d, p13.d"); @@ -1544,75 +1511,80 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Permute Predicate") { TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Permute Vector - Predicated - Base") { // CPY (SIMD&FP scalar) - TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), VReg::v30), "mov z30.b, p7/m, b30"); + TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), VReg::v30), "mov z30.b, p7/m, b30"); TEST_SINGLE(cpy(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), VReg::v30), "mov z30.h, p7/m, h30"); TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), VReg::v30), "mov z30.s, p7/m, s30"); TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), VReg::v30), "mov z30.d, p7/m, d30"); - //TEST_SINGLE(compact(SubRegSize::i8Bit, ZReg::z30, PReg::p6, ZReg::z29), "compact z30.b, p6, z29.b"); - //TEST_SINGLE(compact(SubRegSize::i16Bit, ZReg::z30, PReg::p6, ZReg::z29), "compact z30.h, p6, z29.h"); - TEST_SINGLE(compact(SubRegSize::i32Bit, ZReg::z30, PReg::p6, ZReg::z29), "compact z30.s, p6, z29.s"); - TEST_SINGLE(compact(SubRegSize::i64Bit, ZReg::z30, PReg::p6, ZReg::z29), "compact z30.d, p6, z29.d"); - //TEST_SINGLE(compact(SubRegSize::i128Bit, ZReg::z30, PReg::p6, ZReg::z29), "compact z30.q, p6, z29.q"); + // TEST_SINGLE(compact(SubRegSize::i8Bit, ZReg::z30, PReg::p6, ZReg::z29), "compact z30.b, p6, z29.b"); + // TEST_SINGLE(compact(SubRegSize::i16Bit, ZReg::z30, PReg::p6, ZReg::z29), "compact z30.h, p6, z29.h"); + TEST_SINGLE(compact(SubRegSize::i32Bit, ZReg::z30, PReg::p6, ZReg::z29), "compact z30.s, p6, z29.s"); + TEST_SINGLE(compact(SubRegSize::i64Bit, ZReg::z30, PReg::p6, ZReg::z29), "compact z30.d, p6, z29.d"); + // TEST_SINGLE(compact(SubRegSize::i128Bit, ZReg::z30, PReg::p6, ZReg::z29), "compact z30.q, p6, z29.q"); // CPY (scalar) - TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), WReg::rsp), "mov z30.b, p7/m, wsp"); + TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), WReg::rsp), "mov z30.b, p7/m, wsp"); TEST_SINGLE(cpy(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), WReg::rsp), "mov z30.h, p7/m, wsp"); TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), WReg::rsp), "mov z30.s, p7/m, wsp"); TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), XReg::rsp), "mov z30.d, p7/m, sp"); - TEST_SINGLE(splice(SubRegSize::i8Bit, ZReg::z30, PReg::p6, ZReg::z28, ZReg::z29), "splice z30.b, p6, {z28.b, z29.b}"); - TEST_SINGLE(splice(SubRegSize::i16Bit, ZReg::z30, PReg::p6, ZReg::z28, ZReg::z29), "splice z30.h, p6, {z28.h, z29.h}"); - TEST_SINGLE(splice(SubRegSize::i32Bit, ZReg::z30, PReg::p6, ZReg::z28, ZReg::z29), "splice z30.s, p6, {z28.s, z29.s}"); - TEST_SINGLE(splice(SubRegSize::i64Bit, ZReg::z30, PReg::p6, ZReg::z28, ZReg::z29), "splice z30.d, p6, {z28.d, z29.d}"); - TEST_SINGLE(splice(SubRegSize::i64Bit, ZReg::z30, PReg::p6, ZReg::z31, ZReg::z0), "splice z30.d, p6, {z31.d, z0.d}"); - //TEST_SINGLE(splice(SubRegSize::i128Bit, ZReg::z30, PReg::p6, ZReg::z28, ZReg::z29), "splice z30.q, p6, {z28.q, z29.q}"); - - TEST_SINGLE(splice(SubRegSize::i8Bit, ZReg::z30, PReg::p6, ZReg::z30, ZReg::z28), "splice z30.b, p6, z30.b, z28.b"); - TEST_SINGLE(splice(SubRegSize::i16Bit, ZReg::z30, PReg::p6, ZReg::z30, ZReg::z28), "splice z30.h, p6, z30.h, z28.h"); - TEST_SINGLE(splice(SubRegSize::i32Bit, ZReg::z30, PReg::p6, ZReg::z30, ZReg::z28), "splice z30.s, p6, z30.s, z28.s"); - TEST_SINGLE(splice(SubRegSize::i64Bit, ZReg::z30, PReg::p6, ZReg::z30, ZReg::z28), "splice z30.d, p6, z30.d, z28.d"); - //TEST_SINGLE(splice(SubRegSize::i128Bit, ZReg::z30, PReg::p6, ZReg::z30, ZReg::z28), "splice z30.q, p6, z30.q, z28.q"); + TEST_SINGLE(splice(SubRegSize::i8Bit, ZReg::z30, PReg::p6, ZReg::z28, ZReg::z29), "splice z30.b, p6, {z28.b, " + "z29.b}"); + TEST_SINGLE(splice(SubRegSize::i16Bit, ZReg::z30, PReg::p6, ZReg::z28, ZReg::z29), "splice z30.h, p6, {z28.h, " + "z29.h}"); + TEST_SINGLE(splice(SubRegSize::i32Bit, ZReg::z30, PReg::p6, ZReg::z28, ZReg::z29), "splice z30.s, p6, {z28.s, " + "z29.s}"); + TEST_SINGLE(splice(SubRegSize::i64Bit, ZReg::z30, PReg::p6, ZReg::z28, ZReg::z29), "splice z30.d, p6, {z28.d, " + "z29.d}"); + TEST_SINGLE(splice(SubRegSize::i64Bit, ZReg::z30, PReg::p6, ZReg::z31, ZReg::z0), "splice z30.d, p6, {z31.d, " + "z0.d}"); + // TEST_SINGLE(splice(SubRegSize::i128Bit, ZReg::z30, PReg::p6, ZReg::z28, ZReg::z29), "splice z30.q, p6, {z28.q, z29.q}"); + + TEST_SINGLE(splice(SubRegSize::i8Bit, ZReg::z30, PReg::p6, ZReg::z30, ZReg::z28), "splice z30.b, p6, z30.b, z28.b"); + TEST_SINGLE(splice(SubRegSize::i16Bit, ZReg::z30, PReg::p6, ZReg::z30, ZReg::z28), "splice z30.h, p6, z30.h, z28.h"); + TEST_SINGLE(splice(SubRegSize::i32Bit, ZReg::z30, PReg::p6, ZReg::z30, ZReg::z28), "splice z30.s, p6, z30.s, z28.s"); + TEST_SINGLE(splice(SubRegSize::i64Bit, ZReg::z30, PReg::p6, ZReg::z30, ZReg::z28), "splice z30.d, p6, z30.d, z28.d"); + // TEST_SINGLE(splice(SubRegSize::i128Bit, ZReg::z30, PReg::p6, ZReg::z30, ZReg::z28), "splice z30.q, p6, z30.q, z28.q"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE extract element to general register") { - TEST_SINGLE(lasta(SubRegSize::i8Bit, WReg::w30, PReg::p7, ZReg::z30), "lasta w30, p7, z30.b"); + TEST_SINGLE(lasta(SubRegSize::i8Bit, WReg::w30, PReg::p7, ZReg::z30), "lasta w30, p7, z30.b"); TEST_SINGLE(lasta(SubRegSize::i16Bit, WReg::w30, PReg::p7, ZReg::z30), "lasta w30, p7, z30.h"); TEST_SINGLE(lasta(SubRegSize::i32Bit, WReg::w30, PReg::p7, ZReg::z30), "lasta w30, p7, z30.s"); TEST_SINGLE(lasta(SubRegSize::i64Bit, XReg::x30, PReg::p7, ZReg::z30), "lasta x30, p7, z30.d"); - TEST_SINGLE(lastb(SubRegSize::i8Bit, WReg::w30, PReg::p7, ZReg::z30), "lastb w30, p7, z30.b"); + TEST_SINGLE(lastb(SubRegSize::i8Bit, WReg::w30, PReg::p7, ZReg::z30), "lastb w30, p7, z30.b"); TEST_SINGLE(lastb(SubRegSize::i16Bit, WReg::w30, PReg::p7, ZReg::z30), "lastb w30, p7, z30.h"); TEST_SINGLE(lastb(SubRegSize::i32Bit, WReg::w30, PReg::p7, ZReg::z30), "lastb w30, p7, z30.s"); TEST_SINGLE(lastb(SubRegSize::i64Bit, XReg::x30, PReg::p7, ZReg::z30), "lastb x30, p7, z30.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE extract element to SIMD&FP scalar register") { - TEST_SINGLE(lasta(SubRegSize::i8Bit, BReg::b30, PReg::p7, ZReg::z29), "lasta b30, p7, z29.b"); + TEST_SINGLE(lasta(SubRegSize::i8Bit, BReg::b30, PReg::p7, ZReg::z29), "lasta b30, p7, z29.b"); TEST_SINGLE(lasta(SubRegSize::i16Bit, HReg::h30, PReg::p7, ZReg::z29), "lasta h30, p7, z29.h"); TEST_SINGLE(lasta(SubRegSize::i32Bit, SReg::s30, PReg::p7, ZReg::z29), "lasta s30, p7, z29.s"); TEST_SINGLE(lasta(SubRegSize::i64Bit, DReg::d30, PReg::p7, ZReg::z29), "lasta d30, p7, z29.d"); - TEST_SINGLE(lastb(SubRegSize::i8Bit, BReg::b30, PReg::p7, ZReg::z29), "lastb b30, p7, z29.b"); + TEST_SINGLE(lastb(SubRegSize::i8Bit, BReg::b30, PReg::p7, ZReg::z29), "lastb b30, p7, z29.b"); TEST_SINGLE(lastb(SubRegSize::i16Bit, HReg::h30, PReg::p7, ZReg::z29), "lastb h30, p7, z29.h"); TEST_SINGLE(lastb(SubRegSize::i32Bit, SReg::s30, PReg::p7, ZReg::z29), "lastb s30, p7, z29.s"); TEST_SINGLE(lastb(SubRegSize::i64Bit, DReg::d30, PReg::p7, ZReg::z29), "lastb d30, p7, z29.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE reverse within elements") { - //TEST_SINGLE(revb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revb z30.b, p6/m, z29.b"); + // TEST_SINGLE(revb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revb z30.b, p6/m, z29.b"); TEST_SINGLE(revb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revb z30.h, p6/m, z29.h"); TEST_SINGLE(revb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revb z30.s, p6/m, z29.s"); TEST_SINGLE(revb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revb z30.d, p6/m, z29.d"); - //TEST_SINGLE(revh(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revh z30.b, p6/m, z29.b"); - //TEST_SINGLE(revh(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revh z30.h, p6/m, z29.h"); + // TEST_SINGLE(revh(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revh z30.b, p6/m, z29.b"); + // TEST_SINGLE(revh(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revh z30.h, p6/m, z29.h"); TEST_SINGLE(revh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revh z30.s, p6/m, z29.s"); TEST_SINGLE(revh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revh z30.d, p6/m, z29.d"); - //TEST_SINGLE(revw(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revw z30.b, p6/m, z29.b"); - //TEST_SINGLE(revw(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revw z30.h, p6/m, z29.h"); - //TEST_SINGLE(revw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revw z30.s, p6/m, z29.s"); + // TEST_SINGLE(revw(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revw z30.b, p6/m, z29.b"); + // TEST_SINGLE(revw(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revw z30.h, p6/m, z29.h"); + // TEST_SINGLE(revw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revw z30.s, p6/m, z29.s"); TEST_SINGLE(revw(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revw z30.d, p6/m, z29.d"); TEST_SINGLE(rbit(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "rbit z30.b, p6/m, z29.b"); @@ -1622,24 +1594,24 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE reverse within elements") } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE conditionally broadcast element to vector") { - TEST_SINGLE(clasta(SubRegSize::i8Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clasta z30.b, p7, z30.b, z29.b"); + TEST_SINGLE(clasta(SubRegSize::i8Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clasta z30.b, p7, z30.b, z29.b"); TEST_SINGLE(clasta(SubRegSize::i16Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clasta z30.h, p7, z30.h, z29.h"); TEST_SINGLE(clasta(SubRegSize::i32Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clasta z30.s, p7, z30.s, z29.s"); TEST_SINGLE(clasta(SubRegSize::i64Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clasta z30.d, p7, z30.d, z29.d"); - TEST_SINGLE(clastb(SubRegSize::i8Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clastb z30.b, p7, z30.b, z29.b"); + TEST_SINGLE(clastb(SubRegSize::i8Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clastb z30.b, p7, z30.b, z29.b"); TEST_SINGLE(clastb(SubRegSize::i16Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clastb z30.h, p7, z30.h, z29.h"); TEST_SINGLE(clastb(SubRegSize::i32Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clastb z30.s, p7, z30.s, z29.s"); TEST_SINGLE(clastb(SubRegSize::i64Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clastb z30.d, p7, z30.d, z29.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE conditionally extract element to SIMD&FP scalar") { - TEST_SINGLE(clasta(SubRegSize::i8Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clasta b30, p7, b30, z29.b"); + TEST_SINGLE(clasta(SubRegSize::i8Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clasta b30, p7, b30, z29.b"); TEST_SINGLE(clasta(SubRegSize::i16Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clasta h30, p7, h30, z29.h"); TEST_SINGLE(clasta(SubRegSize::i32Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clasta s30, p7, s30, z29.s"); TEST_SINGLE(clasta(SubRegSize::i64Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clasta d30, p7, d30, z29.d"); - TEST_SINGLE(clastb(SubRegSize::i8Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clastb b30, p7, b30, z29.b"); + TEST_SINGLE(clastb(SubRegSize::i8Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clastb b30, p7, b30, z29.b"); TEST_SINGLE(clastb(SubRegSize::i16Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clastb h30, p7, h30, z29.h"); TEST_SINGLE(clastb(SubRegSize::i32Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clastb s30, p7, s30, z29.s"); TEST_SINGLE(clastb(SubRegSize::i64Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clastb d30, p7, d30, z29.d"); @@ -1650,24 +1622,24 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE reverse doublewords") { } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE conditionally extract element to general register") { - TEST_SINGLE(clasta(SubRegSize::i8Bit, WReg::w30, PReg::p7, WReg::w30, ZReg::z29), "clasta w30, p7, w30, z29.b"); + TEST_SINGLE(clasta(SubRegSize::i8Bit, WReg::w30, PReg::p7, WReg::w30, ZReg::z29), "clasta w30, p7, w30, z29.b"); TEST_SINGLE(clasta(SubRegSize::i16Bit, WReg::w30, PReg::p7, WReg::w30, ZReg::z29), "clasta w30, p7, w30, z29.h"); TEST_SINGLE(clasta(SubRegSize::i32Bit, WReg::w30, PReg::p7, WReg::w30, ZReg::z29), "clasta w30, p7, w30, z29.s"); TEST_SINGLE(clasta(SubRegSize::i64Bit, XReg::x30, PReg::p7, XReg::x30, ZReg::z29), "clasta x30, p7, x30, z29.d"); - TEST_SINGLE(clastb(SubRegSize::i8Bit, WReg::w30, PReg::p7, WReg::w30, ZReg::z29), "clastb w30, p7, w30, z29.b"); + TEST_SINGLE(clastb(SubRegSize::i8Bit, WReg::w30, PReg::p7, WReg::w30, ZReg::z29), "clastb w30, p7, w30, z29.b"); TEST_SINGLE(clastb(SubRegSize::i16Bit, WReg::w30, PReg::p7, WReg::w30, ZReg::z29), "clastb w30, p7, w30, z29.h"); TEST_SINGLE(clastb(SubRegSize::i32Bit, WReg::w30, PReg::p7, WReg::w30, ZReg::z29), "clastb w30, p7, w30, z29.s"); TEST_SINGLE(clastb(SubRegSize::i64Bit, XReg::x30, PReg::p7, XReg::x30, ZReg::z29), "clastb x30, p7, x30, z29.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Permute Vector - Extract") { - TEST_SINGLE(ext(ZReg::z30, ZReg::z30, ZReg::z29, 0), "ext z30.b, z30.b, z29.b, #0"); + TEST_SINGLE(ext(ZReg::z30, ZReg::z30, ZReg::z29, 0), "ext z30.b, z30.b, z29.b, #0"); TEST_SINGLE(ext(ZReg::z30, ZReg::z30, ZReg::z29, 255), "ext z30.b, z30.b, z29.b, #255"); - TEST_SINGLE(ext(ZReg::z30, ZReg::z28, ZReg::z29, 0), "ext z30.b, {z28.b, z29.b}, #0"); + TEST_SINGLE(ext(ZReg::z30, ZReg::z28, ZReg::z29, 0), "ext z30.b, {z28.b, z29.b}, #0"); TEST_SINGLE(ext(ZReg::z30, ZReg::z28, ZReg::z29, 255), "ext z30.b, {z28.b, z29.b}, #255"); - TEST_SINGLE(ext(ZReg::z30, ZReg::z31, ZReg::z0, 255), "ext z30.b, {z31.b, z0.b}, #255"); + TEST_SINGLE(ext(ZReg::z30, ZReg::z31, ZReg::z0, 255), "ext z30.b, {z31.b, z0.b}, #255"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE permute vector segments") { @@ -1675,132 +1647,132 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE permute vector segments") } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer compare vectors") { - TEST_SINGLE(cmpeq(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpeq p6.b, p5/z, z30.b, z29.b"); + TEST_SINGLE(cmpeq(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpeq p6.b, p5/z, z30.b, z29.b"); TEST_SINGLE(cmpeq(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpeq p6.h, p5/z, z30.h, z29.h"); TEST_SINGLE(cmpeq(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpeq p6.s, p5/z, z30.s, z29.s"); TEST_SINGLE(cmpeq(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpeq p6.d, p5/z, z30.d, z29.d"); - TEST_SINGLE(cmpge(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpge p6.b, p5/z, z30.b, z29.b"); + TEST_SINGLE(cmpge(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpge p6.b, p5/z, z30.b, z29.b"); TEST_SINGLE(cmpge(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpge p6.h, p5/z, z30.h, z29.h"); TEST_SINGLE(cmpge(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpge p6.s, p5/z, z30.s, z29.s"); TEST_SINGLE(cmpge(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpge p6.d, p5/z, z30.d, z29.d"); - TEST_SINGLE(cmpgt(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpgt p6.b, p5/z, z30.b, z29.b"); + TEST_SINGLE(cmpgt(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpgt p6.b, p5/z, z30.b, z29.b"); TEST_SINGLE(cmpgt(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpgt p6.h, p5/z, z30.h, z29.h"); TEST_SINGLE(cmpgt(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpgt p6.s, p5/z, z30.s, z29.s"); TEST_SINGLE(cmpgt(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpgt p6.d, p5/z, z30.d, z29.d"); - TEST_SINGLE(cmphi(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphi p6.b, p5/z, z30.b, z29.b"); + TEST_SINGLE(cmphi(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphi p6.b, p5/z, z30.b, z29.b"); TEST_SINGLE(cmphi(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphi p6.h, p5/z, z30.h, z29.h"); TEST_SINGLE(cmphi(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphi p6.s, p5/z, z30.s, z29.s"); TEST_SINGLE(cmphi(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphi p6.d, p5/z, z30.d, z29.d"); - TEST_SINGLE(cmphs(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphs p6.b, p5/z, z30.b, z29.b"); + TEST_SINGLE(cmphs(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphs p6.b, p5/z, z30.b, z29.b"); TEST_SINGLE(cmphs(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphs p6.h, p5/z, z30.h, z29.h"); TEST_SINGLE(cmphs(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphs p6.s, p5/z, z30.s, z29.s"); TEST_SINGLE(cmphs(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphs p6.d, p5/z, z30.d, z29.d"); - TEST_SINGLE(cmpne(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpne p6.b, p5/z, z30.b, z29.b"); + TEST_SINGLE(cmpne(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpne p6.b, p5/z, z30.b, z29.b"); TEST_SINGLE(cmpne(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpne p6.h, p5/z, z30.h, z29.h"); TEST_SINGLE(cmpne(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpne p6.s, p5/z, z30.s, z29.s"); TEST_SINGLE(cmpne(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpne p6.d, p5/z, z30.d, z29.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer compare with wide elements") { - TEST_SINGLE(cmpeq_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpeq p6.b, p5/z, z30.b, z29.d"); + TEST_SINGLE(cmpeq_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpeq p6.b, p5/z, z30.b, z29.d"); TEST_SINGLE(cmpeq_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpeq p6.h, p5/z, z30.h, z29.d"); TEST_SINGLE(cmpeq_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpeq p6.s, p5/z, z30.s, z29.d"); - TEST_SINGLE(cmpgt_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpgt p6.b, p5/z, z30.b, z29.d"); + TEST_SINGLE(cmpgt_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpgt p6.b, p5/z, z30.b, z29.d"); TEST_SINGLE(cmpgt_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpgt p6.h, p5/z, z30.h, z29.d"); TEST_SINGLE(cmpgt_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpgt p6.s, p5/z, z30.s, z29.d"); - TEST_SINGLE(cmpge_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpge p6.b, p5/z, z30.b, z29.d"); + TEST_SINGLE(cmpge_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpge p6.b, p5/z, z30.b, z29.d"); TEST_SINGLE(cmpge_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpge p6.h, p5/z, z30.h, z29.d"); TEST_SINGLE(cmpge_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpge p6.s, p5/z, z30.s, z29.d"); - TEST_SINGLE(cmphi_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphi p6.b, p5/z, z30.b, z29.d"); + TEST_SINGLE(cmphi_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphi p6.b, p5/z, z30.b, z29.d"); TEST_SINGLE(cmphi_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphi p6.h, p5/z, z30.h, z29.d"); TEST_SINGLE(cmphi_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphi p6.s, p5/z, z30.s, z29.d"); - TEST_SINGLE(cmphs_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphs p6.b, p5/z, z30.b, z29.d"); + TEST_SINGLE(cmphs_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphs p6.b, p5/z, z30.b, z29.d"); TEST_SINGLE(cmphs_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphs p6.h, p5/z, z30.h, z29.d"); TEST_SINGLE(cmphs_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphs p6.s, p5/z, z30.s, z29.d"); - TEST_SINGLE(cmplt_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmplt p6.b, p5/z, z30.b, z29.d"); + TEST_SINGLE(cmplt_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmplt p6.b, p5/z, z30.b, z29.d"); TEST_SINGLE(cmplt_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmplt p6.h, p5/z, z30.h, z29.d"); TEST_SINGLE(cmplt_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmplt p6.s, p5/z, z30.s, z29.d"); - TEST_SINGLE(cmple_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmple p6.b, p5/z, z30.b, z29.d"); + TEST_SINGLE(cmple_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmple p6.b, p5/z, z30.b, z29.d"); TEST_SINGLE(cmple_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmple p6.h, p5/z, z30.h, z29.d"); TEST_SINGLE(cmple_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmple p6.s, p5/z, z30.s, z29.d"); - TEST_SINGLE(cmplo_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmplo p6.b, p5/z, z30.b, z29.d"); + TEST_SINGLE(cmplo_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmplo p6.b, p5/z, z30.b, z29.d"); TEST_SINGLE(cmplo_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmplo p6.h, p5/z, z30.h, z29.d"); TEST_SINGLE(cmplo_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmplo p6.s, p5/z, z30.s, z29.d"); - TEST_SINGLE(cmpls_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpls p6.b, p5/z, z30.b, z29.d"); + TEST_SINGLE(cmpls_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpls p6.b, p5/z, z30.b, z29.d"); TEST_SINGLE(cmpls_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpls p6.h, p5/z, z30.h, z29.d"); TEST_SINGLE(cmpls_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpls p6.s, p5/z, z30.s, z29.d"); - TEST_SINGLE(cmpne_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpne p6.b, p5/z, z30.b, z29.d"); + TEST_SINGLE(cmpne_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpne p6.b, p5/z, z30.b, z29.d"); TEST_SINGLE(cmpne_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpne p6.h, p5/z, z30.h, z29.d"); TEST_SINGLE(cmpne_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpne p6.s, p5/z, z30.s, z29.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE propagate break from previous partition") { - TEST_SINGLE(brkpa(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p12), "brkpa p15.b, p14/z, p13.b, p12.b"); - TEST_SINGLE(brkpas(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p12), "brkpas p15.b, p14/z, p13.b, p12.b"); - TEST_SINGLE(brkpb(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p12), "brkpb p15.b, p14/z, p13.b, p12.b"); - TEST_SINGLE(brkpbs(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p12), "brkpbs p15.b, p14/z, p13.b, p12.b"); + TEST_SINGLE(brkpa(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p12), "brkpa p15.b, p14/z, p13.b, p12.b"); + TEST_SINGLE(brkpas(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p12), "brkpas p15.b, p14/z, p13.b, p12.b"); + TEST_SINGLE(brkpb(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p12), "brkpb p15.b, p14/z, p13.b, p12.b"); + TEST_SINGLE(brkpbs(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p12), "brkpbs p15.b, p14/z, p13.b, p12.b"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE propagate break to next partition") { - TEST_SINGLE(brkn(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p15), "brkn p15.b, p14/z, p13.b, p15.b"); - TEST_SINGLE(brkns(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p15), "brkns p15.b, p14/z, p13.b, p15.b"); + TEST_SINGLE(brkn(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p15), "brkn p15.b, p14/z, p13.b, p15.b"); + TEST_SINGLE(brkns(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p15), "brkns p15.b, p14/z, p13.b, p15.b"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE partition break condition") { - TEST_SINGLE(brka(PReg::p15, PReg::p14.Zeroing(), PReg::p13), "brka p15.b, p14/z, p13.b"); - TEST_SINGLE(brka(PReg::p15, PReg::p14.Merging(), PReg::p13), "brka p15.b, p14/m, p13.b"); + TEST_SINGLE(brka(PReg::p15, PReg::p14.Zeroing(), PReg::p13), "brka p15.b, p14/z, p13.b"); + TEST_SINGLE(brka(PReg::p15, PReg::p14.Merging(), PReg::p13), "brka p15.b, p14/m, p13.b"); TEST_SINGLE(brkas(PReg::p15, PReg::p14.Zeroing(), PReg::p13), "brkas p15.b, p14/z, p13.b"); - TEST_SINGLE(brkb(PReg::p15, PReg::p14.Zeroing(), PReg::p13), "brkb p15.b, p14/z, p13.b"); - TEST_SINGLE(brkb(PReg::p15, PReg::p14.Merging(), PReg::p13), "brkb p15.b, p14/m, p13.b"); + TEST_SINGLE(brkb(PReg::p15, PReg::p14.Zeroing(), PReg::p13), "brkb p15.b, p14/z, p13.b"); + TEST_SINGLE(brkb(PReg::p15, PReg::p14.Merging(), PReg::p13), "brkb p15.b, p14/m, p13.b"); TEST_SINGLE(brkbs(PReg::p15, PReg::p14.Zeroing(), PReg::p13), "brkbs p15.b, p14/z, p13.b"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Predicate Misc") { - TEST_SINGLE(pnext(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p15), "pnext p15.b, p14, p15.b"); + TEST_SINGLE(pnext(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p15), "pnext p15.b, p14, p15.b"); TEST_SINGLE(pnext(SubRegSize::i16Bit, PReg::p15, PReg::p14, PReg::p15), "pnext p15.h, p14, p15.h"); TEST_SINGLE(pnext(SubRegSize::i32Bit, PReg::p15, PReg::p14, PReg::p15), "pnext p15.s, p14, p15.s"); TEST_SINGLE(pnext(SubRegSize::i64Bit, PReg::p15, PReg::p14, PReg::p15), "pnext p15.d, p14, p15.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE predicate test") { - TEST_SINGLE(ptest(PReg::p6, PReg::p5), "ptest p6, p5.b"); + TEST_SINGLE(ptest(PReg::p6, PReg::p5), "ptest p6, p5.b"); TEST_SINGLE(ptest(PReg::p15, PReg::p14), "ptest p15, p14.b"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE predicate first active") { - TEST_SINGLE(pfirst(PReg::p6, PReg::p5, PReg::p6), "pfirst p6.b, p5, p6.b"); + TEST_SINGLE(pfirst(PReg::p6, PReg::p5, PReg::p6), "pfirst p6.b, p5, p6.b"); TEST_SINGLE(pfirst(PReg::p15, PReg::p14, PReg::p15), "pfirst p15.b, p14, p15.b"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE predicate zero") { - TEST_SINGLE(pfalse(PReg::p6), "pfalse p6.b"); + TEST_SINGLE(pfalse(PReg::p6), "pfalse p6.b"); TEST_SINGLE(pfalse(PReg::p15), "pfalse p15.b"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE predicate read from FFR (predicated)") { - TEST_SINGLE(rdffr(PReg::p6, PReg::p5.Zeroing()), "rdffr p6.b, p5/z"); - TEST_SINGLE(rdffr(PReg::p15, PReg::p14.Zeroing()), "rdffr p15.b, p14/z"); - TEST_SINGLE(rdffrs(PReg::p6, PReg::p5.Zeroing()), "rdffrs p6.b, p5/z"); + TEST_SINGLE(rdffr(PReg::p6, PReg::p5.Zeroing()), "rdffr p6.b, p5/z"); + TEST_SINGLE(rdffr(PReg::p15, PReg::p14.Zeroing()), "rdffr p15.b, p14/z"); + TEST_SINGLE(rdffrs(PReg::p6, PReg::p5.Zeroing()), "rdffrs p6.b, p5/z"); TEST_SINGLE(rdffrs(PReg::p15, PReg::p14.Zeroing()), "rdffrs p15.b, p14/z"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE predicate read from FFR (unpredicated)") { - TEST_SINGLE(rdffr(PReg::p6), "rdffr p6.b"); + TEST_SINGLE(rdffr(PReg::p6), "rdffr p6.b"); TEST_SINGLE(rdffr(PReg::p15), "rdffr p15.b"); } @@ -1977,74 +1949,74 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE predicate initialize") { } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer compare scalar count and limit") { - TEST_SINGLE(whilege(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilege p15.b, x30, x29"); + TEST_SINGLE(whilege(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilege p15.b, x30, x29"); TEST_SINGLE(whilege(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilege p15.h, x30, x29"); TEST_SINGLE(whilege(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilege p15.s, x30, x29"); TEST_SINGLE(whilege(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilege p15.d, x30, x29"); - TEST_SINGLE(whilege(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilege p15.b, w30, w29"); + TEST_SINGLE(whilege(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilege p15.b, w30, w29"); TEST_SINGLE(whilege(SubRegSize::i16Bit, PReg::p15, WReg::w30, WReg::w29), "whilege p15.h, w30, w29"); TEST_SINGLE(whilege(SubRegSize::i32Bit, PReg::p15, WReg::w30, WReg::w29), "whilege p15.s, w30, w29"); TEST_SINGLE(whilege(SubRegSize::i64Bit, PReg::p15, WReg::w30, WReg::w29), "whilege p15.d, w30, w29"); - TEST_SINGLE(whilegt(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilegt p15.b, x30, x29"); + TEST_SINGLE(whilegt(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilegt p15.b, x30, x29"); TEST_SINGLE(whilegt(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilegt p15.h, x30, x29"); TEST_SINGLE(whilegt(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilegt p15.s, x30, x29"); TEST_SINGLE(whilegt(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilegt p15.d, x30, x29"); - TEST_SINGLE(whilegt(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilegt p15.b, w30, w29"); + TEST_SINGLE(whilegt(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilegt p15.b, w30, w29"); TEST_SINGLE(whilegt(SubRegSize::i16Bit, PReg::p15, WReg::w30, WReg::w29), "whilegt p15.h, w30, w29"); TEST_SINGLE(whilegt(SubRegSize::i32Bit, PReg::p15, WReg::w30, WReg::w29), "whilegt p15.s, w30, w29"); TEST_SINGLE(whilegt(SubRegSize::i64Bit, PReg::p15, WReg::w30, WReg::w29), "whilegt p15.d, w30, w29"); - TEST_SINGLE(whilelt(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilelt p15.b, x30, x29"); + TEST_SINGLE(whilelt(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilelt p15.b, x30, x29"); TEST_SINGLE(whilelt(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilelt p15.h, x30, x29"); TEST_SINGLE(whilelt(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilelt p15.s, x30, x29"); TEST_SINGLE(whilelt(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilelt p15.d, x30, x29"); - TEST_SINGLE(whilelt(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilelt p15.b, w30, w29"); + TEST_SINGLE(whilelt(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilelt p15.b, w30, w29"); TEST_SINGLE(whilelt(SubRegSize::i16Bit, PReg::p15, WReg::w30, WReg::w29), "whilelt p15.h, w30, w29"); TEST_SINGLE(whilelt(SubRegSize::i32Bit, PReg::p15, WReg::w30, WReg::w29), "whilelt p15.s, w30, w29"); TEST_SINGLE(whilelt(SubRegSize::i64Bit, PReg::p15, WReg::w30, WReg::w29), "whilelt p15.d, w30, w29"); - TEST_SINGLE(whilele(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilele p15.b, x30, x29"); + TEST_SINGLE(whilele(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilele p15.b, x30, x29"); TEST_SINGLE(whilele(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilele p15.h, x30, x29"); TEST_SINGLE(whilele(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilele p15.s, x30, x29"); TEST_SINGLE(whilele(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilele p15.d, x30, x29"); - TEST_SINGLE(whilele(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilele p15.b, w30, w29"); + TEST_SINGLE(whilele(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilele p15.b, w30, w29"); TEST_SINGLE(whilele(SubRegSize::i16Bit, PReg::p15, WReg::w30, WReg::w29), "whilele p15.h, w30, w29"); TEST_SINGLE(whilele(SubRegSize::i32Bit, PReg::p15, WReg::w30, WReg::w29), "whilele p15.s, w30, w29"); TEST_SINGLE(whilele(SubRegSize::i64Bit, PReg::p15, WReg::w30, WReg::w29), "whilele p15.d, w30, w29"); - TEST_SINGLE(whilehs(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilehs p15.b, x30, x29"); + TEST_SINGLE(whilehs(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilehs p15.b, x30, x29"); TEST_SINGLE(whilehs(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilehs p15.h, x30, x29"); TEST_SINGLE(whilehs(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilehs p15.s, x30, x29"); TEST_SINGLE(whilehs(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilehs p15.d, x30, x29"); - TEST_SINGLE(whilehs(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilehs p15.b, w30, w29"); + TEST_SINGLE(whilehs(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilehs p15.b, w30, w29"); TEST_SINGLE(whilehs(SubRegSize::i16Bit, PReg::p15, WReg::w30, WReg::w29), "whilehs p15.h, w30, w29"); TEST_SINGLE(whilehs(SubRegSize::i32Bit, PReg::p15, WReg::w30, WReg::w29), "whilehs p15.s, w30, w29"); TEST_SINGLE(whilehs(SubRegSize::i64Bit, PReg::p15, WReg::w30, WReg::w29), "whilehs p15.d, w30, w29"); - TEST_SINGLE(whilehi(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilehi p15.b, x30, x29"); + TEST_SINGLE(whilehi(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilehi p15.b, x30, x29"); TEST_SINGLE(whilehi(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilehi p15.h, x30, x29"); TEST_SINGLE(whilehi(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilehi p15.s, x30, x29"); TEST_SINGLE(whilehi(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilehi p15.d, x30, x29"); - TEST_SINGLE(whilehi(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilehi p15.b, w30, w29"); + TEST_SINGLE(whilehi(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilehi p15.b, w30, w29"); TEST_SINGLE(whilehi(SubRegSize::i16Bit, PReg::p15, WReg::w30, WReg::w29), "whilehi p15.h, w30, w29"); TEST_SINGLE(whilehi(SubRegSize::i32Bit, PReg::p15, WReg::w30, WReg::w29), "whilehi p15.s, w30, w29"); TEST_SINGLE(whilehi(SubRegSize::i64Bit, PReg::p15, WReg::w30, WReg::w29), "whilehi p15.d, w30, w29"); - TEST_SINGLE(whilelo(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilelo p15.b, x30, x29"); + TEST_SINGLE(whilelo(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilelo p15.b, x30, x29"); TEST_SINGLE(whilelo(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilelo p15.h, x30, x29"); TEST_SINGLE(whilelo(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilelo p15.s, x30, x29"); TEST_SINGLE(whilelo(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilelo p15.d, x30, x29"); - TEST_SINGLE(whilelo(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilelo p15.b, w30, w29"); + TEST_SINGLE(whilelo(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilelo p15.b, w30, w29"); TEST_SINGLE(whilelo(SubRegSize::i16Bit, PReg::p15, WReg::w30, WReg::w29), "whilelo p15.h, w30, w29"); TEST_SINGLE(whilelo(SubRegSize::i32Bit, PReg::p15, WReg::w30, WReg::w29), "whilelo p15.s, w30, w29"); TEST_SINGLE(whilelo(SubRegSize::i64Bit, PReg::p15, WReg::w30, WReg::w29), "whilelo p15.d, w30, w29"); - TEST_SINGLE(whilels(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilels p15.b, x30, x29"); + TEST_SINGLE(whilels(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilels p15.b, x30, x29"); TEST_SINGLE(whilels(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilels p15.h, x30, x29"); TEST_SINGLE(whilels(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilels p15.s, x30, x29"); TEST_SINGLE(whilels(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilels p15.d, x30, x29"); - TEST_SINGLE(whilels(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilels p15.b, w30, w29"); + TEST_SINGLE(whilels(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilels p15.b, w30, w29"); TEST_SINGLE(whilels(SubRegSize::i16Bit, PReg::p15, WReg::w30, WReg::w29), "whilels p15.h, w30, w29"); TEST_SINGLE(whilels(SubRegSize::i32Bit, PReg::p15, WReg::w30, WReg::w29), "whilels p15.s, w30, w29"); TEST_SINGLE(whilels(SubRegSize::i64Bit, PReg::p15, WReg::w30, WReg::w29), "whilels p15.d, w30, w29"); @@ -2059,276 +2031,276 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE conditionally terminate sc } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE pointer conflict compare") { - TEST_SINGLE(whilewr(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilewr p15.b, x30, x29"); + TEST_SINGLE(whilewr(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilewr p15.b, x30, x29"); TEST_SINGLE(whilewr(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilewr p15.h, x30, x29"); TEST_SINGLE(whilewr(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilewr p15.s, x30, x29"); TEST_SINGLE(whilewr(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilewr p15.d, x30, x29"); - TEST_SINGLE(whilerw(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilerw p15.b, x30, x29"); + TEST_SINGLE(whilerw(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilerw p15.b, x30, x29"); TEST_SINGLE(whilerw(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilerw p15.h, x30, x29"); TEST_SINGLE(whilerw(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilerw p15.s, x30, x29"); TEST_SINGLE(whilerw(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilerw p15.d, x30, x29"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer add/subtract immediate (unpredicated)") { - TEST_SINGLE(add(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "add z30.b, z30.b, #0"); - TEST_SINGLE(add(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "add z30.b, z30.b, #127"); - TEST_SINGLE(add(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "add z30.b, z30.b, #255"); - - TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "add z30.h, z30.h, #0"); - TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "add z30.h, z30.h, #127"); - TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "add z30.h, z30.h, #255"); - TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "add z30.h, z30.h, #1, lsl #8"); + TEST_SINGLE(add(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "add z30.b, z30.b, #0"); + TEST_SINGLE(add(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "add z30.b, z30.b, #127"); + TEST_SINGLE(add(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "add z30.b, z30.b, #255"); + + TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "add z30.h, z30.h, #0"); + TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "add z30.h, z30.h, #127"); + TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "add z30.h, z30.h, #255"); + TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "add z30.h, z30.h, #1, lsl #8"); TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 32512), "add z30.h, z30.h, #127, lsl #8"); TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 65280), "add z30.h, z30.h, #255, lsl #8"); - TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "add z30.s, z30.s, #0"); - TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "add z30.s, z30.s, #127"); - TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "add z30.s, z30.s, #255"); - TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "add z30.s, z30.s, #1, lsl #8"); + TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "add z30.s, z30.s, #0"); + TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "add z30.s, z30.s, #127"); + TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "add z30.s, z30.s, #255"); + TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "add z30.s, z30.s, #1, lsl #8"); TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 32512), "add z30.s, z30.s, #127, lsl #8"); TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 65280), "add z30.s, z30.s, #255, lsl #8"); - TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "add z30.d, z30.d, #0"); - TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "add z30.d, z30.d, #127"); - TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "add z30.d, z30.d, #255"); - TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "add z30.d, z30.d, #1, lsl #8"); + TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "add z30.d, z30.d, #0"); + TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "add z30.d, z30.d, #127"); + TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "add z30.d, z30.d, #255"); + TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "add z30.d, z30.d, #1, lsl #8"); TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 32512), "add z30.d, z30.d, #127, lsl #8"); TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 65280), "add z30.d, z30.d, #255, lsl #8"); - TEST_SINGLE(sub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "sub z30.b, z30.b, #0"); - TEST_SINGLE(sub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "sub z30.b, z30.b, #127"); - TEST_SINGLE(sub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "sub z30.b, z30.b, #255"); + TEST_SINGLE(sub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "sub z30.b, z30.b, #0"); + TEST_SINGLE(sub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "sub z30.b, z30.b, #127"); + TEST_SINGLE(sub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "sub z30.b, z30.b, #255"); - TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "sub z30.h, z30.h, #0"); - TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "sub z30.h, z30.h, #127"); - TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "sub z30.h, z30.h, #255"); - TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "sub z30.h, z30.h, #1, lsl #8"); + TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "sub z30.h, z30.h, #0"); + TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "sub z30.h, z30.h, #127"); + TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "sub z30.h, z30.h, #255"); + TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "sub z30.h, z30.h, #1, lsl #8"); TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 32512), "sub z30.h, z30.h, #127, lsl #8"); TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 65280), "sub z30.h, z30.h, #255, lsl #8"); - TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "sub z30.s, z30.s, #0"); - TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "sub z30.s, z30.s, #127"); - TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "sub z30.s, z30.s, #255"); - TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "sub z30.s, z30.s, #1, lsl #8"); + TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "sub z30.s, z30.s, #0"); + TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "sub z30.s, z30.s, #127"); + TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "sub z30.s, z30.s, #255"); + TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "sub z30.s, z30.s, #1, lsl #8"); TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 32512), "sub z30.s, z30.s, #127, lsl #8"); TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 65280), "sub z30.s, z30.s, #255, lsl #8"); - TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "sub z30.d, z30.d, #0"); - TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "sub z30.d, z30.d, #127"); - TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "sub z30.d, z30.d, #255"); - TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "sub z30.d, z30.d, #1, lsl #8"); + TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "sub z30.d, z30.d, #0"); + TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "sub z30.d, z30.d, #127"); + TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "sub z30.d, z30.d, #255"); + TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "sub z30.d, z30.d, #1, lsl #8"); TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 32512), "sub z30.d, z30.d, #127, lsl #8"); TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 65280), "sub z30.d, z30.d, #255, lsl #8"); - TEST_SINGLE(subr(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "subr z30.b, z30.b, #0"); - TEST_SINGLE(subr(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "subr z30.b, z30.b, #127"); - TEST_SINGLE(subr(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "subr z30.b, z30.b, #255"); + TEST_SINGLE(subr(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "subr z30.b, z30.b, #0"); + TEST_SINGLE(subr(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "subr z30.b, z30.b, #127"); + TEST_SINGLE(subr(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "subr z30.b, z30.b, #255"); - TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "subr z30.h, z30.h, #0"); - TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "subr z30.h, z30.h, #127"); - TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "subr z30.h, z30.h, #255"); - TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "subr z30.h, z30.h, #1, lsl #8"); + TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "subr z30.h, z30.h, #0"); + TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "subr z30.h, z30.h, #127"); + TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "subr z30.h, z30.h, #255"); + TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "subr z30.h, z30.h, #1, lsl #8"); TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 32512), "subr z30.h, z30.h, #127, lsl #8"); TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 65280), "subr z30.h, z30.h, #255, lsl #8"); - TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "subr z30.s, z30.s, #0"); - TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "subr z30.s, z30.s, #127"); - TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "subr z30.s, z30.s, #255"); - TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "subr z30.s, z30.s, #1, lsl #8"); + TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "subr z30.s, z30.s, #0"); + TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "subr z30.s, z30.s, #127"); + TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "subr z30.s, z30.s, #255"); + TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "subr z30.s, z30.s, #1, lsl #8"); TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 32512), "subr z30.s, z30.s, #127, lsl #8"); TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 65280), "subr z30.s, z30.s, #255, lsl #8"); - TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "subr z30.d, z30.d, #0"); - TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "subr z30.d, z30.d, #127"); - TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "subr z30.d, z30.d, #255"); - TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "subr z30.d, z30.d, #1, lsl #8"); + TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "subr z30.d, z30.d, #0"); + TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "subr z30.d, z30.d, #127"); + TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "subr z30.d, z30.d, #255"); + TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "subr z30.d, z30.d, #1, lsl #8"); TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 32512), "subr z30.d, z30.d, #127, lsl #8"); TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 65280), "subr z30.d, z30.d, #255, lsl #8"); - TEST_SINGLE(sqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "sqadd z30.b, z30.b, #0"); - TEST_SINGLE(sqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "sqadd z30.b, z30.b, #127"); - TEST_SINGLE(sqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "sqadd z30.b, z30.b, #255"); + TEST_SINGLE(sqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "sqadd z30.b, z30.b, #0"); + TEST_SINGLE(sqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "sqadd z30.b, z30.b, #127"); + TEST_SINGLE(sqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "sqadd z30.b, z30.b, #255"); - TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "sqadd z30.h, z30.h, #0"); - TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "sqadd z30.h, z30.h, #127"); - TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "sqadd z30.h, z30.h, #255"); - TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "sqadd z30.h, z30.h, #1, lsl #8"); + TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "sqadd z30.h, z30.h, #0"); + TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "sqadd z30.h, z30.h, #127"); + TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "sqadd z30.h, z30.h, #255"); + TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "sqadd z30.h, z30.h, #1, lsl #8"); TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 32512), "sqadd z30.h, z30.h, #127, lsl #8"); TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 65280), "sqadd z30.h, z30.h, #255, lsl #8"); - TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "sqadd z30.s, z30.s, #0"); - TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "sqadd z30.s, z30.s, #127"); - TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "sqadd z30.s, z30.s, #255"); - TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "sqadd z30.s, z30.s, #1, lsl #8"); + TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "sqadd z30.s, z30.s, #0"); + TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "sqadd z30.s, z30.s, #127"); + TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "sqadd z30.s, z30.s, #255"); + TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "sqadd z30.s, z30.s, #1, lsl #8"); TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 32512), "sqadd z30.s, z30.s, #127, lsl #8"); TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 65280), "sqadd z30.s, z30.s, #255, lsl #8"); - TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "sqadd z30.d, z30.d, #0"); - TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "sqadd z30.d, z30.d, #127"); - TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "sqadd z30.d, z30.d, #255"); - TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "sqadd z30.d, z30.d, #1, lsl #8"); + TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "sqadd z30.d, z30.d, #0"); + TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "sqadd z30.d, z30.d, #127"); + TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "sqadd z30.d, z30.d, #255"); + TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "sqadd z30.d, z30.d, #1, lsl #8"); TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 32512), "sqadd z30.d, z30.d, #127, lsl #8"); TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 65280), "sqadd z30.d, z30.d, #255, lsl #8"); - TEST_SINGLE(uqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "uqadd z30.b, z30.b, #0"); - TEST_SINGLE(uqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "uqadd z30.b, z30.b, #127"); - TEST_SINGLE(uqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "uqadd z30.b, z30.b, #255"); + TEST_SINGLE(uqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "uqadd z30.b, z30.b, #0"); + TEST_SINGLE(uqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "uqadd z30.b, z30.b, #127"); + TEST_SINGLE(uqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "uqadd z30.b, z30.b, #255"); - TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "uqadd z30.h, z30.h, #0"); - TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "uqadd z30.h, z30.h, #127"); - TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "uqadd z30.h, z30.h, #255"); - TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "uqadd z30.h, z30.h, #1, lsl #8"); + TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "uqadd z30.h, z30.h, #0"); + TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "uqadd z30.h, z30.h, #127"); + TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "uqadd z30.h, z30.h, #255"); + TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "uqadd z30.h, z30.h, #1, lsl #8"); TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 32512), "uqadd z30.h, z30.h, #127, lsl #8"); TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 65280), "uqadd z30.h, z30.h, #255, lsl #8"); - TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "uqadd z30.s, z30.s, #0"); - TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "uqadd z30.s, z30.s, #127"); - TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "uqadd z30.s, z30.s, #255"); - TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "uqadd z30.s, z30.s, #1, lsl #8"); + TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "uqadd z30.s, z30.s, #0"); + TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "uqadd z30.s, z30.s, #127"); + TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "uqadd z30.s, z30.s, #255"); + TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "uqadd z30.s, z30.s, #1, lsl #8"); TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 32512), "uqadd z30.s, z30.s, #127, lsl #8"); TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 65280), "uqadd z30.s, z30.s, #255, lsl #8"); - TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "uqadd z30.d, z30.d, #0"); - TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "uqadd z30.d, z30.d, #127"); - TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "uqadd z30.d, z30.d, #255"); - TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "uqadd z30.d, z30.d, #1, lsl #8"); + TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "uqadd z30.d, z30.d, #0"); + TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "uqadd z30.d, z30.d, #127"); + TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "uqadd z30.d, z30.d, #255"); + TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "uqadd z30.d, z30.d, #1, lsl #8"); TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 32512), "uqadd z30.d, z30.d, #127, lsl #8"); TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 65280), "uqadd z30.d, z30.d, #255, lsl #8"); - TEST_SINGLE(sqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "sqsub z30.b, z30.b, #0"); - TEST_SINGLE(sqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "sqsub z30.b, z30.b, #127"); - TEST_SINGLE(sqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "sqsub z30.b, z30.b, #255"); + TEST_SINGLE(sqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "sqsub z30.b, z30.b, #0"); + TEST_SINGLE(sqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "sqsub z30.b, z30.b, #127"); + TEST_SINGLE(sqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "sqsub z30.b, z30.b, #255"); - TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "sqsub z30.h, z30.h, #0"); - TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "sqsub z30.h, z30.h, #127"); - TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "sqsub z30.h, z30.h, #255"); - TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "sqsub z30.h, z30.h, #1, lsl #8"); + TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "sqsub z30.h, z30.h, #0"); + TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "sqsub z30.h, z30.h, #127"); + TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "sqsub z30.h, z30.h, #255"); + TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "sqsub z30.h, z30.h, #1, lsl #8"); TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 32512), "sqsub z30.h, z30.h, #127, lsl #8"); TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 65280), "sqsub z30.h, z30.h, #255, lsl #8"); - TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "sqsub z30.s, z30.s, #0"); - TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "sqsub z30.s, z30.s, #127"); - TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "sqsub z30.s, z30.s, #255"); - TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "sqsub z30.s, z30.s, #1, lsl #8"); + TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "sqsub z30.s, z30.s, #0"); + TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "sqsub z30.s, z30.s, #127"); + TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "sqsub z30.s, z30.s, #255"); + TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "sqsub z30.s, z30.s, #1, lsl #8"); TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 32512), "sqsub z30.s, z30.s, #127, lsl #8"); TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 65280), "sqsub z30.s, z30.s, #255, lsl #8"); - TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "sqsub z30.d, z30.d, #0"); - TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "sqsub z30.d, z30.d, #127"); - TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "sqsub z30.d, z30.d, #255"); - TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "sqsub z30.d, z30.d, #1, lsl #8"); + TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "sqsub z30.d, z30.d, #0"); + TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "sqsub z30.d, z30.d, #127"); + TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "sqsub z30.d, z30.d, #255"); + TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "sqsub z30.d, z30.d, #1, lsl #8"); TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 32512), "sqsub z30.d, z30.d, #127, lsl #8"); TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 65280), "sqsub z30.d, z30.d, #255, lsl #8"); - TEST_SINGLE(uqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "uqsub z30.b, z30.b, #0"); - TEST_SINGLE(uqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "uqsub z30.b, z30.b, #127"); - TEST_SINGLE(uqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "uqsub z30.b, z30.b, #255"); + TEST_SINGLE(uqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "uqsub z30.b, z30.b, #0"); + TEST_SINGLE(uqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "uqsub z30.b, z30.b, #127"); + TEST_SINGLE(uqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "uqsub z30.b, z30.b, #255"); - TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "uqsub z30.h, z30.h, #0"); - TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "uqsub z30.h, z30.h, #127"); - TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "uqsub z30.h, z30.h, #255"); - TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "uqsub z30.h, z30.h, #1, lsl #8"); + TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "uqsub z30.h, z30.h, #0"); + TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "uqsub z30.h, z30.h, #127"); + TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "uqsub z30.h, z30.h, #255"); + TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "uqsub z30.h, z30.h, #1, lsl #8"); TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 32512), "uqsub z30.h, z30.h, #127, lsl #8"); TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 65280), "uqsub z30.h, z30.h, #255, lsl #8"); - TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "uqsub z30.s, z30.s, #0"); - TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "uqsub z30.s, z30.s, #127"); - TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "uqsub z30.s, z30.s, #255"); - TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "uqsub z30.s, z30.s, #1, lsl #8"); + TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "uqsub z30.s, z30.s, #0"); + TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "uqsub z30.s, z30.s, #127"); + TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "uqsub z30.s, z30.s, #255"); + TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "uqsub z30.s, z30.s, #1, lsl #8"); TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 32512), "uqsub z30.s, z30.s, #127, lsl #8"); TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 65280), "uqsub z30.s, z30.s, #255, lsl #8"); - TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "uqsub z30.d, z30.d, #0"); - TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "uqsub z30.d, z30.d, #127"); - TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "uqsub z30.d, z30.d, #255"); - TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "uqsub z30.d, z30.d, #1, lsl #8"); + TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "uqsub z30.d, z30.d, #0"); + TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "uqsub z30.d, z30.d, #127"); + TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "uqsub z30.d, z30.d, #255"); + TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "uqsub z30.d, z30.d, #1, lsl #8"); TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 32512), "uqsub z30.d, z30.d, #127, lsl #8"); TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 65280), "uqsub z30.d, z30.d, #255, lsl #8"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer min/max immediate (unpredicated)") { - TEST_SINGLE(smax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "smax z30.b, z30.b, #0"); - TEST_SINGLE(smax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, -128), "smax z30.b, z30.b, #-128"); - TEST_SINGLE(smax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "smax z30.b, z30.b, #127"); + TEST_SINGLE(smax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "smax z30.b, z30.b, #0"); + TEST_SINGLE(smax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, -128), "smax z30.b, z30.b, #-128"); + TEST_SINGLE(smax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "smax z30.b, z30.b, #127"); - TEST_SINGLE(smax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "smax z30.h, z30.h, #0"); - TEST_SINGLE(smax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, -128), "smax z30.h, z30.h, #-128"); - TEST_SINGLE(smax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "smax z30.h, z30.h, #127"); + TEST_SINGLE(smax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "smax z30.h, z30.h, #0"); + TEST_SINGLE(smax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, -128), "smax z30.h, z30.h, #-128"); + TEST_SINGLE(smax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "smax z30.h, z30.h, #127"); - TEST_SINGLE(smax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "smax z30.s, z30.s, #0"); - TEST_SINGLE(smax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, -128), "smax z30.s, z30.s, #-128"); - TEST_SINGLE(smax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "smax z30.s, z30.s, #127"); + TEST_SINGLE(smax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "smax z30.s, z30.s, #0"); + TEST_SINGLE(smax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, -128), "smax z30.s, z30.s, #-128"); + TEST_SINGLE(smax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "smax z30.s, z30.s, #127"); - TEST_SINGLE(smax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "smax z30.d, z30.d, #0"); - TEST_SINGLE(smax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, -128), "smax z30.d, z30.d, #-128"); - TEST_SINGLE(smax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "smax z30.d, z30.d, #127"); + TEST_SINGLE(smax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "smax z30.d, z30.d, #0"); + TEST_SINGLE(smax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, -128), "smax z30.d, z30.d, #-128"); + TEST_SINGLE(smax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "smax z30.d, z30.d, #127"); - TEST_SINGLE(smin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "smin z30.b, z30.b, #0"); - TEST_SINGLE(smin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, -128), "smin z30.b, z30.b, #-128"); - TEST_SINGLE(smin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "smin z30.b, z30.b, #127"); + TEST_SINGLE(smin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "smin z30.b, z30.b, #0"); + TEST_SINGLE(smin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, -128), "smin z30.b, z30.b, #-128"); + TEST_SINGLE(smin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "smin z30.b, z30.b, #127"); - TEST_SINGLE(smin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "smin z30.h, z30.h, #0"); - TEST_SINGLE(smin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, -128), "smin z30.h, z30.h, #-128"); - TEST_SINGLE(smin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "smin z30.h, z30.h, #127"); + TEST_SINGLE(smin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "smin z30.h, z30.h, #0"); + TEST_SINGLE(smin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, -128), "smin z30.h, z30.h, #-128"); + TEST_SINGLE(smin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "smin z30.h, z30.h, #127"); - TEST_SINGLE(smin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "smin z30.s, z30.s, #0"); - TEST_SINGLE(smin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, -128), "smin z30.s, z30.s, #-128"); - TEST_SINGLE(smin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "smin z30.s, z30.s, #127"); + TEST_SINGLE(smin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "smin z30.s, z30.s, #0"); + TEST_SINGLE(smin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, -128), "smin z30.s, z30.s, #-128"); + TEST_SINGLE(smin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "smin z30.s, z30.s, #127"); - TEST_SINGLE(smin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "smin z30.d, z30.d, #0"); - TEST_SINGLE(smin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, -128), "smin z30.d, z30.d, #-128"); - TEST_SINGLE(smin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "smin z30.d, z30.d, #127"); + TEST_SINGLE(smin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "smin z30.d, z30.d, #0"); + TEST_SINGLE(smin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, -128), "smin z30.d, z30.d, #-128"); + TEST_SINGLE(smin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "smin z30.d, z30.d, #127"); - TEST_SINGLE(umax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "umax z30.b, z30.b, #0"); - TEST_SINGLE(umax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "umax z30.b, z30.b, #127"); - TEST_SINGLE(umax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "umax z30.b, z30.b, #255"); + TEST_SINGLE(umax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "umax z30.b, z30.b, #0"); + TEST_SINGLE(umax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "umax z30.b, z30.b, #127"); + TEST_SINGLE(umax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "umax z30.b, z30.b, #255"); - TEST_SINGLE(umax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "umax z30.h, z30.h, #0"); - TEST_SINGLE(umax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "umax z30.h, z30.h, #127"); - TEST_SINGLE(umax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "umax z30.h, z30.h, #255"); + TEST_SINGLE(umax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "umax z30.h, z30.h, #0"); + TEST_SINGLE(umax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "umax z30.h, z30.h, #127"); + TEST_SINGLE(umax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "umax z30.h, z30.h, #255"); - TEST_SINGLE(umax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "umax z30.s, z30.s, #0"); - TEST_SINGLE(umax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "umax z30.s, z30.s, #127"); - TEST_SINGLE(umax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "umax z30.s, z30.s, #255"); + TEST_SINGLE(umax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "umax z30.s, z30.s, #0"); + TEST_SINGLE(umax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "umax z30.s, z30.s, #127"); + TEST_SINGLE(umax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "umax z30.s, z30.s, #255"); - TEST_SINGLE(umax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "umax z30.d, z30.d, #0"); - TEST_SINGLE(umax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "umax z30.d, z30.d, #127"); - TEST_SINGLE(umax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "umax z30.d, z30.d, #255"); + TEST_SINGLE(umax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "umax z30.d, z30.d, #0"); + TEST_SINGLE(umax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "umax z30.d, z30.d, #127"); + TEST_SINGLE(umax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "umax z30.d, z30.d, #255"); - TEST_SINGLE(umin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "umin z30.b, z30.b, #0"); - TEST_SINGLE(umin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "umin z30.b, z30.b, #127"); - TEST_SINGLE(umin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "umin z30.b, z30.b, #255"); + TEST_SINGLE(umin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "umin z30.b, z30.b, #0"); + TEST_SINGLE(umin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "umin z30.b, z30.b, #127"); + TEST_SINGLE(umin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "umin z30.b, z30.b, #255"); - TEST_SINGLE(umin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "umin z30.h, z30.h, #0"); - TEST_SINGLE(umin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "umin z30.h, z30.h, #127"); - TEST_SINGLE(umin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "umin z30.h, z30.h, #255"); + TEST_SINGLE(umin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "umin z30.h, z30.h, #0"); + TEST_SINGLE(umin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "umin z30.h, z30.h, #127"); + TEST_SINGLE(umin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "umin z30.h, z30.h, #255"); - TEST_SINGLE(umin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "umin z30.s, z30.s, #0"); - TEST_SINGLE(umin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "umin z30.s, z30.s, #127"); - TEST_SINGLE(umin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "umin z30.s, z30.s, #255"); + TEST_SINGLE(umin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "umin z30.s, z30.s, #0"); + TEST_SINGLE(umin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "umin z30.s, z30.s, #127"); + TEST_SINGLE(umin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "umin z30.s, z30.s, #255"); - TEST_SINGLE(umin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "umin z30.d, z30.d, #0"); - TEST_SINGLE(umin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "umin z30.d, z30.d, #127"); - TEST_SINGLE(umin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "umin z30.d, z30.d, #255"); + TEST_SINGLE(umin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "umin z30.d, z30.d, #0"); + TEST_SINGLE(umin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "umin z30.d, z30.d, #127"); + TEST_SINGLE(umin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "umin z30.d, z30.d, #255"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer multiply immediate (unpredicated)") { - TEST_SINGLE(mul(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "mul z30.b, z30.b, #0"); - TEST_SINGLE(mul(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, -128), "mul z30.b, z30.b, #-128"); - TEST_SINGLE(mul(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "mul z30.b, z30.b, #127"); + TEST_SINGLE(mul(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "mul z30.b, z30.b, #0"); + TEST_SINGLE(mul(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, -128), "mul z30.b, z30.b, #-128"); + TEST_SINGLE(mul(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "mul z30.b, z30.b, #127"); - TEST_SINGLE(mul(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "mul z30.h, z30.h, #0"); - TEST_SINGLE(mul(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, -128), "mul z30.h, z30.h, #-128"); - TEST_SINGLE(mul(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "mul z30.h, z30.h, #127"); + TEST_SINGLE(mul(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "mul z30.h, z30.h, #0"); + TEST_SINGLE(mul(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, -128), "mul z30.h, z30.h, #-128"); + TEST_SINGLE(mul(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "mul z30.h, z30.h, #127"); - TEST_SINGLE(mul(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "mul z30.s, z30.s, #0"); - TEST_SINGLE(mul(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, -128), "mul z30.s, z30.s, #-128"); - TEST_SINGLE(mul(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "mul z30.s, z30.s, #127"); + TEST_SINGLE(mul(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "mul z30.s, z30.s, #0"); + TEST_SINGLE(mul(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, -128), "mul z30.s, z30.s, #-128"); + TEST_SINGLE(mul(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "mul z30.s, z30.s, #127"); - TEST_SINGLE(mul(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "mul z30.d, z30.d, #0"); - TEST_SINGLE(mul(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, -128), "mul z30.d, z30.d, #-128"); - TEST_SINGLE(mul(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "mul z30.d, z30.d, #127"); + TEST_SINGLE(mul(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "mul z30.d, z30.d, #0"); + TEST_SINGLE(mul(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, -128), "mul z30.d, z30.d, #-128"); + TEST_SINGLE(mul(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "mul z30.d, z30.d, #127"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE broadcast integer immediate (unpredicated)") { @@ -2342,12 +2314,12 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE broadcast integer immediat TEST_SINGLE(dup_imm(SubRegSize::i32Bit, ZReg::z30, 127), "mov z30.s, #127"); TEST_SINGLE(dup_imm(SubRegSize::i64Bit, ZReg::z30, 127), "mov z30.d, #127"); - //TEST_SINGLE(dup_imm(SubRegSize::i8Bit, ZReg::z30, -32768), "mov z30.b, #-128"); + // TEST_SINGLE(dup_imm(SubRegSize::i8Bit, ZReg::z30, -32768), "mov z30.b, #-128"); TEST_SINGLE(dup_imm(SubRegSize::i16Bit, ZReg::z30, -32768), "mov z30.h, #-128, lsl #8"); TEST_SINGLE(dup_imm(SubRegSize::i32Bit, ZReg::z30, -32768), "mov z30.s, #-128, lsl #8"); TEST_SINGLE(dup_imm(SubRegSize::i64Bit, ZReg::z30, -32768), "mov z30.d, #-128, lsl #8"); - //TEST_SINGLE(dup_imm(SubRegSize::i8Bit, ZReg::z30, 32512), "mov z30.b, #127"); + // TEST_SINGLE(dup_imm(SubRegSize::i8Bit, ZReg::z30, 32512), "mov z30.b, #127"); TEST_SINGLE(dup_imm(SubRegSize::i16Bit, ZReg::z30, 32512), "mov z30.h, #127, lsl #8"); TEST_SINGLE(dup_imm(SubRegSize::i32Bit, ZReg::z30, 32512), "mov z30.s, #127, lsl #8"); TEST_SINGLE(dup_imm(SubRegSize::i64Bit, ZReg::z30, 32512), "mov z30.d, #127, lsl #8"); @@ -2432,7 +2404,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE broadcast floating-point i } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE predicate count") { - TEST_SINGLE(cntp(SubRegSize::i8Bit, XReg::x30, PReg::p15, PReg::p7), "cntp x30, p15, p7.b"); + TEST_SINGLE(cntp(SubRegSize::i8Bit, XReg::x30, PReg::p15, PReg::p7), "cntp x30, p15, p7.b"); TEST_SINGLE(cntp(SubRegSize::i16Bit, XReg::x30, PReg::p15, PReg::p7), "cntp x30, p15, p7.h"); TEST_SINGLE(cntp(SubRegSize::i32Bit, XReg::x30, PReg::p15, PReg::p7), "cntp x30, p15, p7.s"); TEST_SINGLE(cntp(SubRegSize::i64Bit, XReg::x30, PReg::p15, PReg::p7), "cntp x30, p15, p7.d"); @@ -2457,42 +2429,42 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE saturating inc/dec vector } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE saturating inc/dec register by predicate count") { - TEST_SINGLE(sqincp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "sqincp x30, p15.b"); + TEST_SINGLE(sqincp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "sqincp x30, p15.b"); TEST_SINGLE(sqincp(SubRegSize::i16Bit, XReg::x30, PReg::p15), "sqincp x30, p15.h"); TEST_SINGLE(sqincp(SubRegSize::i32Bit, XReg::x30, PReg::p15), "sqincp x30, p15.s"); TEST_SINGLE(sqincp(SubRegSize::i64Bit, XReg::x30, PReg::p15), "sqincp x30, p15.d"); - TEST_SINGLE(sqincp(SubRegSize::i8Bit, XReg::x30, PReg::p15, WReg::w30), "sqincp x30, p15.b, w30"); + TEST_SINGLE(sqincp(SubRegSize::i8Bit, XReg::x30, PReg::p15, WReg::w30), "sqincp x30, p15.b, w30"); TEST_SINGLE(sqincp(SubRegSize::i16Bit, XReg::x30, PReg::p15, WReg::w30), "sqincp x30, p15.h, w30"); TEST_SINGLE(sqincp(SubRegSize::i32Bit, XReg::x30, PReg::p15, WReg::w30), "sqincp x30, p15.s, w30"); TEST_SINGLE(sqincp(SubRegSize::i64Bit, XReg::x30, PReg::p15, WReg::w30), "sqincp x30, p15.d, w30"); - TEST_SINGLE(uqincp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "uqincp x30, p15.b"); + TEST_SINGLE(uqincp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "uqincp x30, p15.b"); TEST_SINGLE(uqincp(SubRegSize::i16Bit, XReg::x30, PReg::p15), "uqincp x30, p15.h"); TEST_SINGLE(uqincp(SubRegSize::i32Bit, XReg::x30, PReg::p15), "uqincp x30, p15.s"); TEST_SINGLE(uqincp(SubRegSize::i64Bit, XReg::x30, PReg::p15), "uqincp x30, p15.d"); - TEST_SINGLE(uqincp(SubRegSize::i8Bit, WReg::w30, PReg::p15), "uqincp w30, p15.b"); + TEST_SINGLE(uqincp(SubRegSize::i8Bit, WReg::w30, PReg::p15), "uqincp w30, p15.b"); TEST_SINGLE(uqincp(SubRegSize::i16Bit, WReg::w30, PReg::p15), "uqincp w30, p15.h"); TEST_SINGLE(uqincp(SubRegSize::i32Bit, WReg::w30, PReg::p15), "uqincp w30, p15.s"); TEST_SINGLE(uqincp(SubRegSize::i64Bit, WReg::w30, PReg::p15), "uqincp w30, p15.d"); - TEST_SINGLE(sqdecp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "sqdecp x30, p15.b"); + TEST_SINGLE(sqdecp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "sqdecp x30, p15.b"); TEST_SINGLE(sqdecp(SubRegSize::i16Bit, XReg::x30, PReg::p15), "sqdecp x30, p15.h"); TEST_SINGLE(sqdecp(SubRegSize::i32Bit, XReg::x30, PReg::p15), "sqdecp x30, p15.s"); TEST_SINGLE(sqdecp(SubRegSize::i64Bit, XReg::x30, PReg::p15), "sqdecp x30, p15.d"); - TEST_SINGLE(sqdecp(SubRegSize::i8Bit, XReg::x30, PReg::p15, WReg::w30), "sqdecp x30, p15.b, w30"); + TEST_SINGLE(sqdecp(SubRegSize::i8Bit, XReg::x30, PReg::p15, WReg::w30), "sqdecp x30, p15.b, w30"); TEST_SINGLE(sqdecp(SubRegSize::i16Bit, XReg::x30, PReg::p15, WReg::w30), "sqdecp x30, p15.h, w30"); TEST_SINGLE(sqdecp(SubRegSize::i32Bit, XReg::x30, PReg::p15, WReg::w30), "sqdecp x30, p15.s, w30"); TEST_SINGLE(sqdecp(SubRegSize::i64Bit, XReg::x30, PReg::p15, WReg::w30), "sqdecp x30, p15.d, w30"); - TEST_SINGLE(uqdecp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "uqdecp x30, p15.b"); + TEST_SINGLE(uqdecp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "uqdecp x30, p15.b"); TEST_SINGLE(uqdecp(SubRegSize::i16Bit, XReg::x30, PReg::p15), "uqdecp x30, p15.h"); TEST_SINGLE(uqdecp(SubRegSize::i32Bit, XReg::x30, PReg::p15), "uqdecp x30, p15.s"); TEST_SINGLE(uqdecp(SubRegSize::i64Bit, XReg::x30, PReg::p15), "uqdecp x30, p15.d"); - TEST_SINGLE(uqdecp(SubRegSize::i8Bit, WReg::w30, PReg::p15), "uqdecp w30, p15.b"); + TEST_SINGLE(uqdecp(SubRegSize::i8Bit, WReg::w30, PReg::p15), "uqdecp w30, p15.b"); TEST_SINGLE(uqdecp(SubRegSize::i16Bit, WReg::w30, PReg::p15), "uqdecp w30, p15.h"); TEST_SINGLE(uqdecp(SubRegSize::i32Bit, WReg::w30, PReg::p15), "uqdecp w30, p15.s"); TEST_SINGLE(uqdecp(SubRegSize::i64Bit, WReg::w30, PReg::p15), "uqdecp w30, p15.d"); @@ -2509,19 +2481,19 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE inc/dec vector by predicat } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE inc/dec register by predicate count") { - TEST_SINGLE(incp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "incp x30, p15.b"); + TEST_SINGLE(incp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "incp x30, p15.b"); TEST_SINGLE(incp(SubRegSize::i16Bit, XReg::x30, PReg::p15), "incp x30, p15.h"); TEST_SINGLE(incp(SubRegSize::i32Bit, XReg::x30, PReg::p15), "incp x30, p15.s"); TEST_SINGLE(incp(SubRegSize::i64Bit, XReg::x30, PReg::p15), "incp x30, p15.d"); - TEST_SINGLE(decp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "decp x30, p15.b"); + TEST_SINGLE(decp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "decp x30, p15.b"); TEST_SINGLE(decp(SubRegSize::i16Bit, XReg::x30, PReg::p15), "decp x30, p15.h"); TEST_SINGLE(decp(SubRegSize::i32Bit, XReg::x30, PReg::p15), "decp x30, p15.s"); TEST_SINGLE(decp(SubRegSize::i64Bit, XReg::x30, PReg::p15), "decp x30, p15.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE FFR write from predicate") { - TEST_SINGLE(wrffr(PReg::p7), "wrffr p7.b"); + TEST_SINGLE(wrffr(PReg::p7), "wrffr p7.b"); TEST_SINGLE(wrffr(PReg::p15), "wrffr p15.b"); } @@ -2530,13 +2502,13 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE FFR initialise") { } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Integer Multiply-Add - Unpredicated") { - TEST_SINGLE(cdot(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cdot z30.s, z29.b, z28.b, #0"); - TEST_SINGLE(cdot(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cdot z30.s, z29.b, z28.b, #90"); + TEST_SINGLE(cdot(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cdot z30.s, z29.b, z28.b, #0"); + TEST_SINGLE(cdot(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cdot z30.s, z29.b, z28.b, #90"); TEST_SINGLE(cdot(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "cdot z30.s, z29.b, z28.b, #180"); TEST_SINGLE(cdot(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "cdot z30.s, z29.b, z28.b, #270"); - TEST_SINGLE(cdot(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cdot z30.d, z29.h, z28.h, #0"); - TEST_SINGLE(cdot(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cdot z30.d, z29.h, z28.h, #90"); + TEST_SINGLE(cdot(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cdot z30.d, z29.h, z28.h, #0"); + TEST_SINGLE(cdot(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cdot z30.d, z29.h, z28.h, #90"); TEST_SINGLE(cdot(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "cdot z30.d, z29.h, z28.h, #180"); TEST_SINGLE(cdot(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "cdot z30.d, z29.h, z28.h, #270"); } @@ -2560,43 +2532,43 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 saturating multiply-add i } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 complex integer multiply-add") { - TEST_SINGLE(cmla(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cmla z30.b, z29.b, z28.b, #0"); - TEST_SINGLE(cmla(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cmla z30.b, z29.b, z28.b, #90"); - TEST_SINGLE(cmla(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "cmla z30.b, z29.b, z28.b, #180"); - TEST_SINGLE(cmla(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "cmla z30.b, z29.b, z28.b, #270"); + TEST_SINGLE(cmla(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cmla z30.b, z29.b, z28.b, #0"); + TEST_SINGLE(cmla(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cmla z30.b, z29.b, z28.b, #90"); + TEST_SINGLE(cmla(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "cmla z30.b, z29.b, z28.b, #180"); + TEST_SINGLE(cmla(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "cmla z30.b, z29.b, z28.b, #270"); - TEST_SINGLE(cmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cmla z30.h, z29.h, z28.h, #0"); - TEST_SINGLE(cmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cmla z30.h, z29.h, z28.h, #90"); + TEST_SINGLE(cmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cmla z30.h, z29.h, z28.h, #0"); + TEST_SINGLE(cmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cmla z30.h, z29.h, z28.h, #90"); TEST_SINGLE(cmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "cmla z30.h, z29.h, z28.h, #180"); TEST_SINGLE(cmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "cmla z30.h, z29.h, z28.h, #270"); - TEST_SINGLE(cmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cmla z30.s, z29.s, z28.s, #0"); - TEST_SINGLE(cmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cmla z30.s, z29.s, z28.s, #90"); + TEST_SINGLE(cmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cmla z30.s, z29.s, z28.s, #0"); + TEST_SINGLE(cmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cmla z30.s, z29.s, z28.s, #90"); TEST_SINGLE(cmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "cmla z30.s, z29.s, z28.s, #180"); TEST_SINGLE(cmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "cmla z30.s, z29.s, z28.s, #270"); - TEST_SINGLE(cmla(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cmla z30.d, z29.d, z28.d, #0"); - TEST_SINGLE(cmla(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cmla z30.d, z29.d, z28.d, #90"); + TEST_SINGLE(cmla(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cmla z30.d, z29.d, z28.d, #0"); + TEST_SINGLE(cmla(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cmla z30.d, z29.d, z28.d, #90"); TEST_SINGLE(cmla(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "cmla z30.d, z29.d, z28.d, #180"); TEST_SINGLE(cmla(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "cmla z30.d, z29.d, z28.d, #270"); - TEST_SINGLE(sqrdcmlah(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "sqrdcmlah z30.b, z29.b, z28.b, #0"); - TEST_SINGLE(sqrdcmlah(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "sqrdcmlah z30.b, z29.b, z28.b, #90"); - TEST_SINGLE(sqrdcmlah(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "sqrdcmlah z30.b, z29.b, z28.b, #180"); - TEST_SINGLE(sqrdcmlah(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "sqrdcmlah z30.b, z29.b, z28.b, #270"); + TEST_SINGLE(sqrdcmlah(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "sqrdcmlah z30.b, z29.b, z28.b, #0"); + TEST_SINGLE(sqrdcmlah(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "sqrdcmlah z30.b, z29.b, z28.b, #90"); + TEST_SINGLE(sqrdcmlah(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "sqrdcmlah z30.b, z29.b, z28.b, #180"); + TEST_SINGLE(sqrdcmlah(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "sqrdcmlah z30.b, z29.b, z28.b, #270"); - TEST_SINGLE(sqrdcmlah(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "sqrdcmlah z30.h, z29.h, z28.h, #0"); - TEST_SINGLE(sqrdcmlah(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "sqrdcmlah z30.h, z29.h, z28.h, #90"); + TEST_SINGLE(sqrdcmlah(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "sqrdcmlah z30.h, z29.h, z28.h, #0"); + TEST_SINGLE(sqrdcmlah(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "sqrdcmlah z30.h, z29.h, z28.h, #90"); TEST_SINGLE(sqrdcmlah(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "sqrdcmlah z30.h, z29.h, z28.h, #180"); TEST_SINGLE(sqrdcmlah(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "sqrdcmlah z30.h, z29.h, z28.h, #270"); - TEST_SINGLE(sqrdcmlah(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "sqrdcmlah z30.s, z29.s, z28.s, #0"); - TEST_SINGLE(sqrdcmlah(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "sqrdcmlah z30.s, z29.s, z28.s, #90"); + TEST_SINGLE(sqrdcmlah(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "sqrdcmlah z30.s, z29.s, z28.s, #0"); + TEST_SINGLE(sqrdcmlah(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "sqrdcmlah z30.s, z29.s, z28.s, #90"); TEST_SINGLE(sqrdcmlah(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "sqrdcmlah z30.s, z29.s, z28.s, #180"); TEST_SINGLE(sqrdcmlah(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "sqrdcmlah z30.s, z29.s, z28.s, #270"); - TEST_SINGLE(sqrdcmlah(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "sqrdcmlah z30.d, z29.d, z28.d, #0"); - TEST_SINGLE(sqrdcmlah(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "sqrdcmlah z30.d, z29.d, z28.d, #90"); + TEST_SINGLE(sqrdcmlah(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "sqrdcmlah z30.d, z29.d, z28.d, #0"); + TEST_SINGLE(sqrdcmlah(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "sqrdcmlah z30.d, z29.d, z28.d, #90"); TEST_SINGLE(sqrdcmlah(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "sqrdcmlah z30.d, z29.d, z28.d, #180"); TEST_SINGLE(sqrdcmlah(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "sqrdcmlah z30.d, z29.d, z28.d, #270"); } @@ -2654,12 +2626,12 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 saturating multiply-add l } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 saturating multiply-add high") { - TEST_SINGLE(sqrdmlah(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlah z30.b, z29.b, z28.b"); + TEST_SINGLE(sqrdmlah(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlah z30.b, z29.b, z28.b"); TEST_SINGLE(sqrdmlah(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlah z30.h, z29.h, z28.h"); TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlah z30.s, z29.s, z28.s"); TEST_SINGLE(sqrdmlah(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlah z30.d, z29.d, z28.d"); - TEST_SINGLE(sqrdmlsh(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlsh z30.b, z29.b, z28.b"); + TEST_SINGLE(sqrdmlsh(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlsh z30.b, z29.b, z28.b"); TEST_SINGLE(sqrdmlsh(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlsh z30.h, z29.h, z28.h"); TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlsh z30.s, z29.s, z28.s"); TEST_SINGLE(sqrdmlsh(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlsh z30.d, z29.d, z28.d"); @@ -2679,260 +2651,260 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer pairwise add and TEST_SINGLE(uadalp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uadalp z30.d, p6/m, z29.s"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer unary operations (predicated)") { - TEST_SINGLE(urecpe(ZReg::z30, PReg::p6.Merging(), ZReg::z29), "urecpe z30.s, p6/m, z29.s"); + TEST_SINGLE(urecpe(ZReg::z30, PReg::p6.Merging(), ZReg::z29), "urecpe z30.s, p6/m, z29.s"); TEST_SINGLE(ursqrte(ZReg::z30, PReg::p6.Merging(), ZReg::z29), "ursqrte z30.s, p6/m, z29.s"); - TEST_SINGLE(sqabs(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqabs z30.b, p6/m, z29.b"); + TEST_SINGLE(sqabs(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqabs z30.b, p6/m, z29.b"); TEST_SINGLE(sqabs(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqabs z30.h, p6/m, z29.h"); TEST_SINGLE(sqabs(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqabs z30.s, p6/m, z29.s"); TEST_SINGLE(sqabs(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqabs z30.d, p6/m, z29.d"); - TEST_SINGLE(sqneg(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqneg z30.b, p6/m, z29.b"); + TEST_SINGLE(sqneg(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqneg z30.b, p6/m, z29.b"); TEST_SINGLE(sqneg(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqneg z30.h, p6/m, z29.h"); TEST_SINGLE(sqneg(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqneg z30.s, p6/m, z29.s"); TEST_SINGLE(sqneg(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqneg z30.d, p6/m, z29.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 saturating/rounding bitwise shift left (predicated)") { - TEST_SINGLE(srshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshl z30.b, p6/m, z30.b, z29.b"); - TEST_SINGLE(srshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshl z30.h, p6/m, z30.h, z29.h"); - TEST_SINGLE(srshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshl z30.s, p6/m, z30.s, z29.s"); - TEST_SINGLE(srshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshl z30.d, p6/m, z30.d, z29.d"); - - TEST_SINGLE(urshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshl z30.b, p6/m, z30.b, z29.b"); - TEST_SINGLE(urshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshl z30.h, p6/m, z30.h, z29.h"); - TEST_SINGLE(urshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshl z30.s, p6/m, z30.s, z29.s"); - TEST_SINGLE(urshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshl z30.d, p6/m, z30.d, z29.d"); - - TEST_SINGLE(srshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshlr z30.b, p6/m, z30.b, z29.b"); - TEST_SINGLE(srshlr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshlr z30.h, p6/m, z30.h, z29.h"); - TEST_SINGLE(srshlr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshlr z30.s, p6/m, z30.s, z29.s"); - TEST_SINGLE(srshlr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshlr z30.d, p6/m, z30.d, z29.d"); - - TEST_SINGLE(urshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshlr z30.b, p6/m, z30.b, z29.b"); - TEST_SINGLE(urshlr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshlr z30.h, p6/m, z30.h, z29.h"); - TEST_SINGLE(urshlr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshlr z30.s, p6/m, z30.s, z29.s"); - TEST_SINGLE(urshlr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshlr z30.d, p6/m, z30.d, z29.d"); - - TEST_SINGLE(sqshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshl z30.b, p6/m, z30.b, z29.b"); - TEST_SINGLE(sqshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshl z30.h, p6/m, z30.h, z29.h"); - TEST_SINGLE(sqshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshl z30.s, p6/m, z30.s, z29.s"); - TEST_SINGLE(sqshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshl z30.d, p6/m, z30.d, z29.d"); - - TEST_SINGLE(uqshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshl z30.b, p6/m, z30.b, z29.b"); - TEST_SINGLE(uqshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshl z30.h, p6/m, z30.h, z29.h"); - TEST_SINGLE(uqshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshl z30.s, p6/m, z30.s, z29.s"); - TEST_SINGLE(uqshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshl z30.d, p6/m, z30.d, z29.d"); - - TEST_SINGLE(sqrshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshl z30.b, p6/m, z30.b, z29.b"); - TEST_SINGLE(sqrshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshl z30.h, p6/m, z30.h, z29.h"); - TEST_SINGLE(sqrshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshl z30.s, p6/m, z30.s, z29.s"); - TEST_SINGLE(sqrshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshl z30.d, p6/m, z30.d, z29.d"); - - TEST_SINGLE(uqrshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshl z30.b, p6/m, z30.b, z29.b"); - TEST_SINGLE(uqrshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshl z30.h, p6/m, z30.h, z29.h"); - TEST_SINGLE(uqrshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshl z30.s, p6/m, z30.s, z29.s"); - TEST_SINGLE(uqrshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshl z30.d, p6/m, z30.d, z29.d"); - - TEST_SINGLE(sqshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshlr z30.b, p6/m, z30.b, z29.b"); - TEST_SINGLE(sqshlr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshlr z30.h, p6/m, z30.h, z29.h"); - TEST_SINGLE(sqshlr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshlr z30.s, p6/m, z30.s, z29.s"); - TEST_SINGLE(sqshlr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshlr z30.d, p6/m, z30.d, z29.d"); - - TEST_SINGLE(uqshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshlr z30.b, p6/m, z30.b, z29.b"); - TEST_SINGLE(uqshlr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshlr z30.h, p6/m, z30.h, z29.h"); - TEST_SINGLE(uqshlr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshlr z30.s, p6/m, z30.s, z29.s"); - TEST_SINGLE(uqshlr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshlr z30.d, p6/m, z30.d, z29.d"); - - TEST_SINGLE(sqrshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshlr z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(srshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshl z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(srshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshl z30.h, p6/m, z30.h, z29.h"); + TEST_SINGLE(srshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshl z30.s, p6/m, z30.s, z29.s"); + TEST_SINGLE(srshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshl z30.d, p6/m, z30.d, z29.d"); + + TEST_SINGLE(urshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshl z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(urshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshl z30.h, p6/m, z30.h, z29.h"); + TEST_SINGLE(urshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshl z30.s, p6/m, z30.s, z29.s"); + TEST_SINGLE(urshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshl z30.d, p6/m, z30.d, z29.d"); + + TEST_SINGLE(srshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshlr z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(srshlr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshlr z30.h, p6/m, z30.h, z29.h"); + TEST_SINGLE(srshlr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshlr z30.s, p6/m, z30.s, z29.s"); + TEST_SINGLE(srshlr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshlr z30.d, p6/m, z30.d, z29.d"); + + TEST_SINGLE(urshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshlr z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(urshlr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshlr z30.h, p6/m, z30.h, z29.h"); + TEST_SINGLE(urshlr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshlr z30.s, p6/m, z30.s, z29.s"); + TEST_SINGLE(urshlr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshlr z30.d, p6/m, z30.d, z29.d"); + + TEST_SINGLE(sqshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshl z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(sqshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshl z30.h, p6/m, z30.h, z29.h"); + TEST_SINGLE(sqshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshl z30.s, p6/m, z30.s, z29.s"); + TEST_SINGLE(sqshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshl z30.d, p6/m, z30.d, z29.d"); + + TEST_SINGLE(uqshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshl z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(uqshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshl z30.h, p6/m, z30.h, z29.h"); + TEST_SINGLE(uqshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshl z30.s, p6/m, z30.s, z29.s"); + TEST_SINGLE(uqshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshl z30.d, p6/m, z30.d, z29.d"); + + TEST_SINGLE(sqrshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshl z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(sqrshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshl z30.h, p6/m, z30.h, z29.h"); + TEST_SINGLE(sqrshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshl z30.s, p6/m, z30.s, z29.s"); + TEST_SINGLE(sqrshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshl z30.d, p6/m, z30.d, z29.d"); + + TEST_SINGLE(uqrshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshl z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(uqrshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshl z30.h, p6/m, z30.h, z29.h"); + TEST_SINGLE(uqrshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshl z30.s, p6/m, z30.s, z29.s"); + TEST_SINGLE(uqrshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshl z30.d, p6/m, z30.d, z29.d"); + + TEST_SINGLE(sqshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshlr z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(sqshlr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshlr z30.h, p6/m, z30.h, z29.h"); + TEST_SINGLE(sqshlr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshlr z30.s, p6/m, z30.s, z29.s"); + TEST_SINGLE(sqshlr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshlr z30.d, p6/m, z30.d, z29.d"); + + TEST_SINGLE(uqshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshlr z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(uqshlr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshlr z30.h, p6/m, z30.h, z29.h"); + TEST_SINGLE(uqshlr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshlr z30.s, p6/m, z30.s, z29.s"); + TEST_SINGLE(uqshlr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshlr z30.d, p6/m, z30.d, z29.d"); + + TEST_SINGLE(sqrshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshlr z30.b, p6/m, z30.b, z29.b"); TEST_SINGLE(sqrshlr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshlr z30.h, p6/m, z30.h, z29.h"); TEST_SINGLE(sqrshlr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshlr z30.s, p6/m, z30.s, z29.s"); TEST_SINGLE(sqrshlr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshlr z30.d, p6/m, z30.d, z29.d"); - TEST_SINGLE(uqrshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshlr z30.b, p6/m, z30.b, z29.b"); + TEST_SINGLE(uqrshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshlr z30.b, p6/m, z30.b, z29.b"); TEST_SINGLE(uqrshlr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshlr z30.h, p6/m, z30.h, z29.h"); TEST_SINGLE(uqrshlr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshlr z30.s, p6/m, z30.s, z29.s"); TEST_SINGLE(uqrshlr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshlr z30.d, p6/m, z30.d, z29.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer halving add/subtract (predicated)") { - TEST_SINGLE(shadd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shadd z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(shadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shadd z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(shadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shadd z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(shadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shadd z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(shadd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shadd z30.q, p6/m, z30.q, z28.q"); - - TEST_SINGLE(uhadd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhadd z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(uhadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhadd z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(uhadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhadd z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(uhadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhadd z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(uhadd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhadd z30.q, p6/m, z30.q, z28.q"); - TEST_SINGLE(shsub(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsub z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(shsub(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsub z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(shsub(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsub z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(shsub(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsub z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(shsub(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsub z30.q, p6/m, z30.q, z28.q"); - - TEST_SINGLE(uhsub(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsub z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(uhsub(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsub z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(uhsub(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsub z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(uhsub(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsub z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(uhsub(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsub z30.q, p6/m, z30.q, z28.q"); - - TEST_SINGLE(srhadd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "srhadd z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(srhadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "srhadd z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(srhadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "srhadd z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(srhadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "srhadd z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(srhadd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "srhadd z30.q, p6/m, z30.q, z28.q"); - - TEST_SINGLE(urhadd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "urhadd z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(urhadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "urhadd z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(urhadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "urhadd z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(urhadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "urhadd z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(urhadd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "urhadd z30.q, p6/m, z30.q, z28.q"); - - TEST_SINGLE(shsubr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsubr z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(shsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsubr z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(shsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsubr z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(shsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsubr z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(shsubr(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsubr z30.q, p6/m, z30.q, z28.q"); - - TEST_SINGLE(uhsubr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsubr z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(uhsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsubr z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(uhsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsubr z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(uhsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsubr z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(uhsubr(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsubr z30.q, p6/m, z30.q, z28.q"); + TEST_SINGLE(shadd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shadd z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(shadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shadd z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(shadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shadd z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(shadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shadd z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(shadd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shadd z30.q, p6/m, z30.q, z28.q"); + + TEST_SINGLE(uhadd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhadd z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(uhadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhadd z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(uhadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhadd z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(uhadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhadd z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(uhadd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhadd z30.q, p6/m, z30.q, z28.q"); + TEST_SINGLE(shsub(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsub z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(shsub(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsub z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(shsub(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsub z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(shsub(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsub z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(shsub(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsub z30.q, p6/m, z30.q, z28.q"); + + TEST_SINGLE(uhsub(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsub z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(uhsub(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsub z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(uhsub(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsub z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(uhsub(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsub z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(uhsub(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsub z30.q, p6/m, z30.q, z28.q"); + + TEST_SINGLE(srhadd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "srhadd z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(srhadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "srhadd z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(srhadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "srhadd z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(srhadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "srhadd z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(srhadd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "srhadd z30.q, p6/m, z30.q, z28.q"); + + TEST_SINGLE(urhadd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "urhadd z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(urhadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "urhadd z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(urhadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "urhadd z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(urhadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "urhadd z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(urhadd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "urhadd z30.q, p6/m, z30.q, z28.q"); + + TEST_SINGLE(shsubr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsubr z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(shsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsubr z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(shsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsubr z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(shsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsubr z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(shsubr(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsubr z30.q, p6/m, z30.q, z28.q"); + + TEST_SINGLE(uhsubr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsubr z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(uhsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsubr z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(uhsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsubr z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(uhsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsubr z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(uhsubr(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsubr z30.q, p6/m, z30.q, z28.q"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer pairwise arithmetic") { - TEST_SINGLE(addp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "addp z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(addp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "addp z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(addp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "addp z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(addp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "addp z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(addp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "addp z30.q, p6/m, z30.q, z28.q"); - - TEST_SINGLE(smaxp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "smaxp z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(smaxp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "smaxp z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(smaxp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "smaxp z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(smaxp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "smaxp z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(smaxp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "smaxp z30.q, p6/m, z30.q, z28.q"); - - TEST_SINGLE(umaxp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "umaxp z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(umaxp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "umaxp z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(umaxp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "umaxp z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(umaxp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "umaxp z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(umaxp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "umaxp z30.q, p6/m, z30.q, z28.q"); - - - TEST_SINGLE(sminp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "sminp z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(sminp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "sminp z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(sminp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "sminp z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(sminp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "sminp z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(sminp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "sminp z30.q, p6/m, z30.q, z28.q"); - - - TEST_SINGLE(uminp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uminp z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(uminp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uminp z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(uminp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uminp z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(uminp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uminp z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(uminp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uminp z30.q, p6/m, z30.q, z28.q"); + TEST_SINGLE(addp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "addp z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(addp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "addp z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(addp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "addp z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(addp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "addp z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(addp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "addp z30.q, p6/m, z30.q, z28.q"); + + TEST_SINGLE(smaxp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "smaxp z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(smaxp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "smaxp z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(smaxp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "smaxp z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(smaxp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "smaxp z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(smaxp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "smaxp z30.q, p6/m, z30.q, z28.q"); + + TEST_SINGLE(umaxp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "umaxp z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(umaxp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "umaxp z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(umaxp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "umaxp z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(umaxp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "umaxp z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(umaxp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "umaxp z30.q, p6/m, z30.q, z28.q"); + + + TEST_SINGLE(sminp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "sminp z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(sminp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "sminp z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(sminp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "sminp z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(sminp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "sminp z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(sminp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "sminp z30.q, p6/m, z30.q, z28.q"); + + + TEST_SINGLE(uminp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uminp z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(uminp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uminp z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(uminp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uminp z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(uminp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uminp z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(uminp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uminp z30.q, p6/m, z30.q, z28.q"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 saturating add/subtract") { - TEST_SINGLE(sqadd(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqadd z30.b, p7/m, z30.b, z28.b"); + TEST_SINGLE(sqadd(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqadd z30.b, p7/m, z30.b, z28.b"); TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqadd z30.h, p7/m, z30.h, z28.h"); TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqadd z30.s, p7/m, z30.s, z28.s"); TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqadd z30.d, p7/m, z30.d, z28.d"); - TEST_SINGLE(uqadd(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqadd z30.b, p7/m, z30.b, z28.b"); + TEST_SINGLE(uqadd(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqadd z30.b, p7/m, z30.b, z28.b"); TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqadd z30.h, p7/m, z30.h, z28.h"); TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqadd z30.s, p7/m, z30.s, z28.s"); TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqadd z30.d, p7/m, z30.d, z28.d"); - TEST_SINGLE(sqsub(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsub z30.b, p7/m, z30.b, z28.b"); + TEST_SINGLE(sqsub(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsub z30.b, p7/m, z30.b, z28.b"); TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsub z30.h, p7/m, z30.h, z28.h"); TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsub z30.s, p7/m, z30.s, z28.s"); TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsub z30.d, p7/m, z30.d, z28.d"); - TEST_SINGLE(uqsub(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsub z30.b, p7/m, z30.b, z28.b"); + TEST_SINGLE(uqsub(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsub z30.b, p7/m, z30.b, z28.b"); TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsub z30.h, p7/m, z30.h, z28.h"); TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsub z30.s, p7/m, z30.s, z28.s"); TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsub z30.d, p7/m, z30.d, z28.d"); - TEST_SINGLE(suqadd(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "suqadd z30.b, p7/m, z30.b, z28.b"); + TEST_SINGLE(suqadd(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "suqadd z30.b, p7/m, z30.b, z28.b"); TEST_SINGLE(suqadd(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "suqadd z30.h, p7/m, z30.h, z28.h"); TEST_SINGLE(suqadd(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "suqadd z30.s, p7/m, z30.s, z28.s"); TEST_SINGLE(suqadd(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "suqadd z30.d, p7/m, z30.d, z28.d"); - TEST_SINGLE(usqadd(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "usqadd z30.b, p7/m, z30.b, z28.b"); + TEST_SINGLE(usqadd(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "usqadd z30.b, p7/m, z30.b, z28.b"); TEST_SINGLE(usqadd(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "usqadd z30.h, p7/m, z30.h, z28.h"); TEST_SINGLE(usqadd(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "usqadd z30.s, p7/m, z30.s, z28.s"); TEST_SINGLE(usqadd(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "usqadd z30.d, p7/m, z30.d, z28.d"); - TEST_SINGLE(sqsubr(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsubr z30.b, p7/m, z30.b, z28.b"); + TEST_SINGLE(sqsubr(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsubr z30.b, p7/m, z30.b, z28.b"); TEST_SINGLE(sqsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsubr z30.h, p7/m, z30.h, z28.h"); TEST_SINGLE(sqsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsubr z30.s, p7/m, z30.s, z28.s"); TEST_SINGLE(sqsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsubr z30.d, p7/m, z30.d, z28.d"); - TEST_SINGLE(uqsubr(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsubr z30.b, p7/m, z30.b, z28.b"); + TEST_SINGLE(uqsubr(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsubr z30.b, p7/m, z30.b, z28.b"); TEST_SINGLE(uqsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsubr z30.h, p7/m, z30.h, z28.h"); TEST_SINGLE(uqsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsubr z30.s, p7/m, z30.s, z28.s"); TEST_SINGLE(uqsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsubr z30.d, p7/m, z30.d, z28.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer add/subtract long") { - //TEST_SINGLE(saddlb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlb z30.b, z29.b, z28.b"); + // TEST_SINGLE(saddlb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlb z30.b, z29.b, z28.b"); TEST_SINGLE(saddlb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlb z30.h, z29.b, z28.b"); TEST_SINGLE(saddlb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlb z30.s, z29.h, z28.h"); TEST_SINGLE(saddlb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlb z30.d, z29.s, z28.s"); - //TEST_SINGLE(saddlt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlt z30.b, z29.b, z28.b"); + // TEST_SINGLE(saddlt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlt z30.b, z29.b, z28.b"); TEST_SINGLE(saddlt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlt z30.h, z29.b, z28.b"); TEST_SINGLE(saddlt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlt z30.s, z29.h, z28.h"); TEST_SINGLE(saddlt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlt z30.d, z29.s, z28.s"); - //TEST_SINGLE(uaddlb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlb z30.b, z29.b, z28.b"); + // TEST_SINGLE(uaddlb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlb z30.b, z29.b, z28.b"); TEST_SINGLE(uaddlb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlb z30.h, z29.b, z28.b"); TEST_SINGLE(uaddlb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlb z30.s, z29.h, z28.h"); TEST_SINGLE(uaddlb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlb z30.d, z29.s, z28.s"); - //TEST_SINGLE(uaddlt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlt z30.b, z29.b, z28.b"); + // TEST_SINGLE(uaddlt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlt z30.b, z29.b, z28.b"); TEST_SINGLE(uaddlt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlt z30.h, z29.b, z28.b"); TEST_SINGLE(uaddlt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlt z30.s, z29.h, z28.h"); TEST_SINGLE(uaddlt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlt z30.d, z29.s, z28.s"); - //TEST_SINGLE(ssublb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublb z30.b, z29.b, z28.b"); + // TEST_SINGLE(ssublb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublb z30.b, z29.b, z28.b"); TEST_SINGLE(ssublb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublb z30.h, z29.b, z28.b"); TEST_SINGLE(ssublb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublb z30.s, z29.h, z28.h"); TEST_SINGLE(ssublb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublb z30.d, z29.s, z28.s"); - //TEST_SINGLE(ssublt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublt z30.b, z29.b, z28.b"); + // TEST_SINGLE(ssublt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublt z30.b, z29.b, z28.b"); TEST_SINGLE(ssublt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublt z30.h, z29.b, z28.b"); TEST_SINGLE(ssublt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublt z30.s, z29.h, z28.h"); TEST_SINGLE(ssublt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublt z30.d, z29.s, z28.s"); - //TEST_SINGLE(usublb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublb z30.b, z29.b, z28.b"); + // TEST_SINGLE(usublb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublb z30.b, z29.b, z28.b"); TEST_SINGLE(usublb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublb z30.h, z29.b, z28.b"); TEST_SINGLE(usublb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublb z30.s, z29.h, z28.h"); TEST_SINGLE(usublb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublb z30.d, z29.s, z28.s"); - //TEST_SINGLE(usublt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublt z30.b, z29.b, z28.b"); + // TEST_SINGLE(usublt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublt z30.b, z29.b, z28.b"); TEST_SINGLE(usublt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublt z30.h, z29.b, z28.b"); TEST_SINGLE(usublt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublt z30.s, z29.h, z28.h"); TEST_SINGLE(usublt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublt z30.d, z29.s, z28.s"); - //TEST_SINGLE(sabdlb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlb z30.b, z29.b, z28.b"); + // TEST_SINGLE(sabdlb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlb z30.b, z29.b, z28.b"); TEST_SINGLE(sabdlb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlb z30.h, z29.b, z28.b"); TEST_SINGLE(sabdlb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlb z30.s, z29.h, z28.h"); TEST_SINGLE(sabdlb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlb z30.d, z29.s, z28.s"); - //TEST_SINGLE(sabdlt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlt z30.b, z29.b, z28.b"); + // TEST_SINGLE(sabdlt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlt z30.b, z29.b, z28.b"); TEST_SINGLE(sabdlt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlt z30.h, z29.b, z28.b"); TEST_SINGLE(sabdlt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlt z30.s, z29.h, z28.h"); TEST_SINGLE(sabdlt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlt z30.d, z29.s, z28.s"); - //TEST_SINGLE(uabdlb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlb z30.b, z29.b, z28.b"); + // TEST_SINGLE(uabdlb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlb z30.b, z29.b, z28.b"); TEST_SINGLE(uabdlb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlb z30.h, z29.b, z28.b"); TEST_SINGLE(uabdlb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlb z30.s, z29.h, z28.h"); TEST_SINGLE(uabdlb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlb z30.d, z29.s, z28.s"); - //TEST_SINGLE(uabdlt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlt z30.b, z29.b, z28.b"); + // TEST_SINGLE(uabdlt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlt z30.b, z29.b, z28.b"); TEST_SINGLE(uabdlt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlt z30.h, z29.b, z28.b"); TEST_SINGLE(uabdlt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlt z30.s, z29.h, z28.h"); TEST_SINGLE(uabdlt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlt z30.d, z29.s, z28.s"); @@ -2971,73 +2943,73 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer add/subtract wide TEST_SINGLE(usubwt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usubwt z30.d, z29.d, z28.s"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer multiply long") { - //TEST_SINGLE(sqdmullb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullb z30.b, z29.b, z28.b"); + // TEST_SINGLE(sqdmullb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullb z30.b, z29.b, z28.b"); TEST_SINGLE(sqdmullb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullb z30.h, z29.b, z28.b"); TEST_SINGLE(sqdmullb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullb z30.s, z29.h, z28.h"); TEST_SINGLE(sqdmullb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullb z30.d, z29.s, z28.s"); - //TEST_SINGLE(sqdmullt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullt z30.b, z29.b, z28.b"); + // TEST_SINGLE(sqdmullt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullt z30.b, z29.b, z28.b"); TEST_SINGLE(sqdmullt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullt z30.h, z29.b, z28.b"); TEST_SINGLE(sqdmullt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullt z30.s, z29.h, z28.h"); TEST_SINGLE(sqdmullt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullt z30.d, z29.s, z28.s"); - //TEST_SINGLE(pmullb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullb z30.b, z29.b, z28.b"); + // TEST_SINGLE(pmullb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullb z30.b, z29.b, z28.b"); TEST_SINGLE(pmullb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullb z30.h, z29.b, z28.b"); TEST_SINGLE(pmullb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullb z30.s, z29.h, z28.h"); TEST_SINGLE(pmullb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullb z30.d, z29.s, z28.s"); - //TEST_SINGLE(pmullt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullt z30.b, z29.b, z28.b"); + // TEST_SINGLE(pmullt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullt z30.b, z29.b, z28.b"); TEST_SINGLE(pmullt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullt z30.h, z29.b, z28.b"); TEST_SINGLE(pmullt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullt z30.s, z29.h, z28.h"); TEST_SINGLE(pmullt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullt z30.d, z29.s, z28.s"); - //TEST_SINGLE(smullb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullb z30.b, z29.b, z28.b"); + // TEST_SINGLE(smullb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullb z30.b, z29.b, z28.b"); TEST_SINGLE(smullb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullb z30.h, z29.b, z28.b"); TEST_SINGLE(smullb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullb z30.s, z29.h, z28.h"); TEST_SINGLE(smullb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullb z30.d, z29.s, z28.s"); - //TEST_SINGLE(smullt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullt z30.b, z29.b, z28.b"); + // TEST_SINGLE(smullt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullt z30.b, z29.b, z28.b"); TEST_SINGLE(smullt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullt z30.h, z29.b, z28.b"); TEST_SINGLE(smullt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullt z30.s, z29.h, z28.h"); TEST_SINGLE(smullt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullt z30.d, z29.s, z28.s"); - //TEST_SINGLE(umullb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullb z30.b, z29.b, z28.b"); + // TEST_SINGLE(umullb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullb z30.b, z29.b, z28.b"); TEST_SINGLE(umullb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullb z30.h, z29.b, z28.b"); TEST_SINGLE(umullb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullb z30.s, z29.h, z28.h"); TEST_SINGLE(umullb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullb z30.d, z29.s, z28.s"); - //TEST_SINGLE(umullt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullt z30.b, z29.b, z28.b"); + // TEST_SINGLE(umullt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullt z30.b, z29.b, z28.b"); TEST_SINGLE(umullt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullt z30.h, z29.b, z28.b"); TEST_SINGLE(umullt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullt z30.s, z29.h, z28.h"); TEST_SINGLE(umullt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullt z30.d, z29.s, z28.s"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 bitwise shift left long") { - TEST_SINGLE(sshllb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "sshllb z30.h, z29.b, #0"); - TEST_SINGLE(sshllb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 7), "sshllb z30.h, z29.b, #7"); - TEST_SINGLE(sshllb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "sshllb z30.s, z29.h, #0"); + TEST_SINGLE(sshllb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "sshllb z30.h, z29.b, #0"); + TEST_SINGLE(sshllb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 7), "sshllb z30.h, z29.b, #7"); + TEST_SINGLE(sshllb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "sshllb z30.s, z29.h, #0"); TEST_SINGLE(sshllb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 15), "sshllb z30.s, z29.h, #15"); - TEST_SINGLE(sshllb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "sshllb z30.d, z29.s, #0"); + TEST_SINGLE(sshllb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "sshllb z30.d, z29.s, #0"); TEST_SINGLE(sshllb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 31), "sshllb z30.d, z29.s, #31"); - TEST_SINGLE(sshllt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "sshllt z30.h, z29.b, #0"); - TEST_SINGLE(sshllt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 7), "sshllt z30.h, z29.b, #7"); - TEST_SINGLE(sshllt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "sshllt z30.s, z29.h, #0"); + TEST_SINGLE(sshllt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "sshllt z30.h, z29.b, #0"); + TEST_SINGLE(sshllt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 7), "sshllt z30.h, z29.b, #7"); + TEST_SINGLE(sshllt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "sshllt z30.s, z29.h, #0"); TEST_SINGLE(sshllt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 15), "sshllt z30.s, z29.h, #15"); - TEST_SINGLE(sshllt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "sshllt z30.d, z29.s, #0"); + TEST_SINGLE(sshllt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "sshllt z30.d, z29.s, #0"); TEST_SINGLE(sshllt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 31), "sshllt z30.d, z29.s, #31"); - TEST_SINGLE(ushllb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "ushllb z30.h, z29.b, #0"); - TEST_SINGLE(ushllb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 7), "ushllb z30.h, z29.b, #7"); - TEST_SINGLE(ushllb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "ushllb z30.s, z29.h, #0"); + TEST_SINGLE(ushllb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "ushllb z30.h, z29.b, #0"); + TEST_SINGLE(ushllb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 7), "ushllb z30.h, z29.b, #7"); + TEST_SINGLE(ushllb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "ushllb z30.s, z29.h, #0"); TEST_SINGLE(ushllb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 15), "ushllb z30.s, z29.h, #15"); - TEST_SINGLE(ushllb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "ushllb z30.d, z29.s, #0"); + TEST_SINGLE(ushllb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "ushllb z30.d, z29.s, #0"); TEST_SINGLE(ushllb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 31), "ushllb z30.d, z29.s, #31"); - TEST_SINGLE(ushllt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "ushllt z30.h, z29.b, #0"); - TEST_SINGLE(ushllt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 7), "ushllt z30.h, z29.b, #7"); - TEST_SINGLE(ushllt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "ushllt z30.s, z29.h, #0"); + TEST_SINGLE(ushllt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "ushllt z30.h, z29.b, #0"); + TEST_SINGLE(ushllt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 7), "ushllt z30.h, z29.b, #7"); + TEST_SINGLE(ushllt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "ushllt z30.s, z29.h, #0"); TEST_SINGLE(ushllt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 15), "ushllt z30.s, z29.h, #15"); - TEST_SINGLE(ushllt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "ushllt z30.d, z29.s, #0"); + TEST_SINGLE(ushllt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "ushllt z30.d, z29.s, #0"); TEST_SINGLE(ushllt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 31), "ushllt z30.d, z29.s, #31"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer add/subtract interleaved long") { @@ -3054,74 +3026,74 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer add/subtract inte TEST_SINGLE(ssubltb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssubltb z30.d, z29.s, z28.s"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 bitwise exclusive-or interleaved") { - TEST_SINGLE(eorbt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eorbt z30.b, z29.b, z28.b"); + TEST_SINGLE(eorbt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eorbt z30.b, z29.b, z28.b"); TEST_SINGLE(eorbt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eorbt z30.h, z29.h, z28.h"); TEST_SINGLE(eorbt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eorbt z30.s, z29.s, z28.s"); TEST_SINGLE(eorbt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eorbt z30.d, z29.d, z28.d"); - TEST_SINGLE(eortb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eortb z30.b, z29.b, z28.b"); + TEST_SINGLE(eortb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eortb z30.b, z29.b, z28.b"); TEST_SINGLE(eortb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eortb z30.h, z29.h, z28.h"); TEST_SINGLE(eortb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eortb z30.s, z29.s, z28.s"); TEST_SINGLE(eortb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eortb z30.d, z29.d, z28.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer matrix multiply accumulate") { - TEST_SINGLE(smmla(ZReg::z30, ZReg::z29, ZReg::z28), "smmla z30.s, z29.b, z28.b"); + TEST_SINGLE(smmla(ZReg::z30, ZReg::z29, ZReg::z28), "smmla z30.s, z29.b, z28.b"); TEST_SINGLE(usmmla(ZReg::z30, ZReg::z29, ZReg::z28), "usmmla z30.s, z29.b, z28.b"); - TEST_SINGLE(ummla(ZReg::z30, ZReg::z29, ZReg::z28), "ummla z30.s, z29.b, z28.b"); + TEST_SINGLE(ummla(ZReg::z30, ZReg::z29, ZReg::z28), "ummla z30.s, z29.b, z28.b"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 bitwise permute") { - TEST_SINGLE(bext(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bext z30.b, z29.b, z28.b"); + TEST_SINGLE(bext(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bext z30.b, z29.b, z28.b"); TEST_SINGLE(bext(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bext z30.h, z29.h, z28.h"); TEST_SINGLE(bext(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bext z30.s, z29.s, z28.s"); TEST_SINGLE(bext(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bext z30.d, z29.d, z28.d"); - TEST_SINGLE(bdep(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bdep z30.b, z29.b, z28.b"); + TEST_SINGLE(bdep(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bdep z30.b, z29.b, z28.b"); TEST_SINGLE(bdep(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bdep z30.h, z29.h, z28.h"); TEST_SINGLE(bdep(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bdep z30.s, z29.s, z28.s"); TEST_SINGLE(bdep(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bdep z30.d, z29.d, z28.d"); - TEST_SINGLE(bgrp(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bgrp z30.b, z29.b, z28.b"); + TEST_SINGLE(bgrp(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bgrp z30.b, z29.b, z28.b"); TEST_SINGLE(bgrp(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bgrp z30.h, z29.h, z28.h"); TEST_SINGLE(bgrp(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bgrp z30.s, z29.s, z28.s"); TEST_SINGLE(bgrp(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bgrp z30.d, z29.d, z28.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 complex integer add") { - TEST_SINGLE(cadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "cadd z30.b, z30.b, z29.b, #90"); - TEST_SINGLE(cadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "cadd z30.h, z30.h, z29.h, #90"); - TEST_SINGLE(cadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "cadd z30.s, z30.s, z29.s, #90"); - TEST_SINGLE(cadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "cadd z30.d, z30.d, z29.d, #90"); - - TEST_SINGLE(cadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "cadd z30.b, z30.b, z29.b, #270"); - TEST_SINGLE(cadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "cadd z30.h, z30.h, z29.h, #270"); - TEST_SINGLE(cadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "cadd z30.s, z30.s, z29.s, #270"); - TEST_SINGLE(cadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "cadd z30.d, z30.d, z29.d, #270"); - - TEST_SINGLE(sqcadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "sqcadd z30.b, z30.b, z29.b, #90"); - TEST_SINGLE(sqcadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "sqcadd z30.h, z30.h, z29.h, #90"); - TEST_SINGLE(sqcadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "sqcadd z30.s, z30.s, z29.s, #90"); - TEST_SINGLE(sqcadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "sqcadd z30.d, z30.d, z29.d, #90"); - - TEST_SINGLE(sqcadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "sqcadd z30.b, z30.b, z29.b, #270"); + TEST_SINGLE(cadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "cadd z30.b, z30.b, z29.b, #90"); + TEST_SINGLE(cadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "cadd z30.h, z30.h, z29.h, #90"); + TEST_SINGLE(cadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "cadd z30.s, z30.s, z29.s, #90"); + TEST_SINGLE(cadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "cadd z30.d, z30.d, z29.d, #90"); + + TEST_SINGLE(cadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "cadd z30.b, z30.b, z29.b, #270"); + TEST_SINGLE(cadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "cadd z30.h, z30.h, z29.h, #270"); + TEST_SINGLE(cadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "cadd z30.s, z30.s, z29.s, #270"); + TEST_SINGLE(cadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "cadd z30.d, z30.d, z29.d, #270"); + + TEST_SINGLE(sqcadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "sqcadd z30.b, z30.b, z29.b, #90"); + TEST_SINGLE(sqcadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "sqcadd z30.h, z30.h, z29.h, #90"); + TEST_SINGLE(sqcadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "sqcadd z30.s, z30.s, z29.s, #90"); + TEST_SINGLE(sqcadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "sqcadd z30.d, z30.d, z29.d, #90"); + + TEST_SINGLE(sqcadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "sqcadd z30.b, z30.b, z29.b, #270"); TEST_SINGLE(sqcadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "sqcadd z30.h, z30.h, z29.h, #270"); TEST_SINGLE(sqcadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "sqcadd z30.s, z30.s, z29.s, #270"); TEST_SINGLE(sqcadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "sqcadd z30.d, z30.d, z29.d, #270"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer absolute difference and accumulate long") { - TEST_SINGLE(sabalb(SubRegSize::i16Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalb z28.h, z29.b, z30.b"); - TEST_SINGLE(sabalb(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalb z28.s, z29.h, z30.h"); - TEST_SINGLE(sabalb(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalb z28.d, z29.s, z30.s"); + TEST_SINGLE(sabalb(SubRegSize::i16Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalb z28.h, z29.b, z30.b"); + TEST_SINGLE(sabalb(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalb z28.s, z29.h, z30.h"); + TEST_SINGLE(sabalb(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalb z28.d, z29.s, z30.s"); - TEST_SINGLE(sabalt(SubRegSize::i16Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalt z28.h, z29.b, z30.b"); - TEST_SINGLE(sabalt(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalt z28.s, z29.h, z30.h"); - TEST_SINGLE(sabalt(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalt z28.d, z29.s, z30.s"); + TEST_SINGLE(sabalt(SubRegSize::i16Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalt z28.h, z29.b, z30.b"); + TEST_SINGLE(sabalt(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalt z28.s, z29.h, z30.h"); + TEST_SINGLE(sabalt(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalt z28.d, z29.s, z30.s"); - TEST_SINGLE(uabalb(SubRegSize::i16Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalb z28.h, z29.b, z30.b"); - TEST_SINGLE(uabalb(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalb z28.s, z29.h, z30.h"); - TEST_SINGLE(uabalb(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalb z28.d, z29.s, z30.s"); + TEST_SINGLE(uabalb(SubRegSize::i16Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalb z28.h, z29.b, z30.b"); + TEST_SINGLE(uabalb(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalb z28.s, z29.h, z30.h"); + TEST_SINGLE(uabalb(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalb z28.d, z29.s, z30.s"); - TEST_SINGLE(uabalt(SubRegSize::i16Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalt z28.h, z29.b, z30.b"); - TEST_SINGLE(uabalt(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalt z28.s, z29.h, z30.h"); - TEST_SINGLE(uabalt(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalt z28.d, z29.s, z30.s"); + TEST_SINGLE(uabalt(SubRegSize::i16Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalt z28.h, z29.b, z30.b"); + TEST_SINGLE(uabalt(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalt z28.s, z29.h, z30.h"); + TEST_SINGLE(uabalt(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalt z28.d, z29.s, z30.s"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer add/subtract long with carry") { TEST_SINGLE(adclb(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "adclb z28.s, z29.s, z30.s"); @@ -3137,76 +3109,76 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer add/subtract long TEST_SINGLE(sbclt(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sbclt z28.d, z29.d, z30.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 bitwise shift right and accumulate") { - TEST_SINGLE(ssra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "ssra z30.b, z29.b, #1"); - TEST_SINGLE(ssra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "ssra z30.b, z29.b, #8"); - TEST_SINGLE(ssra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "ssra z30.h, z29.h, #1"); - TEST_SINGLE(ssra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "ssra z30.h, z29.h, #16"); - TEST_SINGLE(ssra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "ssra z30.s, z29.s, #1"); - TEST_SINGLE(ssra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "ssra z30.s, z29.s, #32"); - TEST_SINGLE(ssra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "ssra z30.d, z29.d, #1"); - TEST_SINGLE(ssra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "ssra z30.d, z29.d, #64"); - - TEST_SINGLE(usra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "usra z30.b, z29.b, #1"); - TEST_SINGLE(usra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "usra z30.b, z29.b, #8"); - TEST_SINGLE(usra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "usra z30.h, z29.h, #1"); - TEST_SINGLE(usra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "usra z30.h, z29.h, #16"); - TEST_SINGLE(usra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "usra z30.s, z29.s, #1"); - TEST_SINGLE(usra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "usra z30.s, z29.s, #32"); - TEST_SINGLE(usra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "usra z30.d, z29.d, #1"); - TEST_SINGLE(usra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "usra z30.d, z29.d, #64"); - - TEST_SINGLE(srsra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "srsra z30.b, z29.b, #1"); - TEST_SINGLE(srsra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "srsra z30.b, z29.b, #8"); - TEST_SINGLE(srsra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "srsra z30.h, z29.h, #1"); + TEST_SINGLE(ssra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "ssra z30.b, z29.b, #1"); + TEST_SINGLE(ssra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "ssra z30.b, z29.b, #8"); + TEST_SINGLE(ssra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "ssra z30.h, z29.h, #1"); + TEST_SINGLE(ssra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "ssra z30.h, z29.h, #16"); + TEST_SINGLE(ssra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "ssra z30.s, z29.s, #1"); + TEST_SINGLE(ssra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "ssra z30.s, z29.s, #32"); + TEST_SINGLE(ssra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "ssra z30.d, z29.d, #1"); + TEST_SINGLE(ssra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "ssra z30.d, z29.d, #64"); + + TEST_SINGLE(usra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "usra z30.b, z29.b, #1"); + TEST_SINGLE(usra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "usra z30.b, z29.b, #8"); + TEST_SINGLE(usra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "usra z30.h, z29.h, #1"); + TEST_SINGLE(usra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "usra z30.h, z29.h, #16"); + TEST_SINGLE(usra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "usra z30.s, z29.s, #1"); + TEST_SINGLE(usra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "usra z30.s, z29.s, #32"); + TEST_SINGLE(usra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "usra z30.d, z29.d, #1"); + TEST_SINGLE(usra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "usra z30.d, z29.d, #64"); + + TEST_SINGLE(srsra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "srsra z30.b, z29.b, #1"); + TEST_SINGLE(srsra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "srsra z30.b, z29.b, #8"); + TEST_SINGLE(srsra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "srsra z30.h, z29.h, #1"); TEST_SINGLE(srsra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "srsra z30.h, z29.h, #16"); - TEST_SINGLE(srsra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "srsra z30.s, z29.s, #1"); + TEST_SINGLE(srsra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "srsra z30.s, z29.s, #1"); TEST_SINGLE(srsra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "srsra z30.s, z29.s, #32"); - TEST_SINGLE(srsra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "srsra z30.d, z29.d, #1"); + TEST_SINGLE(srsra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "srsra z30.d, z29.d, #1"); TEST_SINGLE(srsra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "srsra z30.d, z29.d, #64"); - TEST_SINGLE(ursra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "ursra z30.b, z29.b, #1"); - TEST_SINGLE(ursra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "ursra z30.b, z29.b, #8"); - TEST_SINGLE(ursra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "ursra z30.h, z29.h, #1"); + TEST_SINGLE(ursra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "ursra z30.b, z29.b, #1"); + TEST_SINGLE(ursra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "ursra z30.b, z29.b, #8"); + TEST_SINGLE(ursra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "ursra z30.h, z29.h, #1"); TEST_SINGLE(ursra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "ursra z30.h, z29.h, #16"); - TEST_SINGLE(ursra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "ursra z30.s, z29.s, #1"); + TEST_SINGLE(ursra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "ursra z30.s, z29.s, #1"); TEST_SINGLE(ursra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "ursra z30.s, z29.s, #32"); - TEST_SINGLE(ursra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "ursra z30.d, z29.d, #1"); + TEST_SINGLE(ursra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "ursra z30.d, z29.d, #1"); TEST_SINGLE(ursra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "ursra z30.d, z29.d, #64"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 bitwise shift and insert") { - TEST_SINGLE(sri(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "sri z30.b, z29.b, #1");\ - TEST_SINGLE(sri(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 4), "sri z30.b, z29.b, #4"); - TEST_SINGLE(sri(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "sri z30.b, z29.b, #8"); - TEST_SINGLE(sri(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "sri z30.h, z29.h, #1"); + TEST_SINGLE(sri(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "sri z30.b, z29.b, #1"); + TEST_SINGLE(sri(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 4), "sri z30.b, z29.b, #4"); + TEST_SINGLE(sri(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "sri z30.b, z29.b, #8"); + TEST_SINGLE(sri(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "sri z30.h, z29.h, #1"); TEST_SINGLE(sri(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 15), "sri z30.h, z29.h, #15"); TEST_SINGLE(sri(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "sri z30.h, z29.h, #16"); - TEST_SINGLE(sri(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "sri z30.s, z29.s, #1"); + TEST_SINGLE(sri(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "sri z30.s, z29.s, #1"); TEST_SINGLE(sri(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 15), "sri z30.s, z29.s, #15"); TEST_SINGLE(sri(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "sri z30.s, z29.s, #32"); - TEST_SINGLE(sri(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "sri z30.d, z29.d, #1"); + TEST_SINGLE(sri(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "sri z30.d, z29.d, #1"); TEST_SINGLE(sri(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 31), "sri z30.d, z29.d, #31"); TEST_SINGLE(sri(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "sri z30.d, z29.d, #64"); - TEST_SINGLE(sli(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 0), "sli z30.b, z29.b, #0"); - TEST_SINGLE(sli(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 4), "sli z30.b, z29.b, #4"); - TEST_SINGLE(sli(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 7), "sli z30.b, z29.b, #7"); - TEST_SINGLE(sli(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "sli z30.h, z29.h, #0"); - TEST_SINGLE(sli(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 7), "sli z30.h, z29.h, #7"); + TEST_SINGLE(sli(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 0), "sli z30.b, z29.b, #0"); + TEST_SINGLE(sli(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 4), "sli z30.b, z29.b, #4"); + TEST_SINGLE(sli(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 7), "sli z30.b, z29.b, #7"); + TEST_SINGLE(sli(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "sli z30.h, z29.h, #0"); + TEST_SINGLE(sli(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 7), "sli z30.h, z29.h, #7"); TEST_SINGLE(sli(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 15), "sli z30.h, z29.h, #15"); - TEST_SINGLE(sli(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "sli z30.s, z29.s, #0"); + TEST_SINGLE(sli(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "sli z30.s, z29.s, #0"); TEST_SINGLE(sli(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 15), "sli z30.s, z29.s, #15"); TEST_SINGLE(sli(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 31), "sli z30.s, z29.s, #31"); - TEST_SINGLE(sli(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "sli z30.d, z29.d, #0"); + TEST_SINGLE(sli(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "sli z30.d, z29.d, #0"); TEST_SINGLE(sli(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 31), "sli z30.d, z29.d, #31"); TEST_SINGLE(sli(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 63), "sli z30.d, z29.d, #63"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer absolute difference and accumulate") { - TEST_SINGLE(saba(SubRegSize::i8Bit, ZReg::z28, ZReg::z29, ZReg::z30), "saba z28.b, z29.b, z30.b"); + TEST_SINGLE(saba(SubRegSize::i8Bit, ZReg::z28, ZReg::z29, ZReg::z30), "saba z28.b, z29.b, z30.b"); TEST_SINGLE(saba(SubRegSize::i16Bit, ZReg::z28, ZReg::z29, ZReg::z30), "saba z28.h, z29.h, z30.h"); TEST_SINGLE(saba(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "saba z28.s, z29.s, z30.s"); TEST_SINGLE(saba(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "saba z28.d, z29.d, z30.d"); - TEST_SINGLE(uaba(SubRegSize::i8Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uaba z28.b, z29.b, z30.b"); + TEST_SINGLE(uaba(SubRegSize::i8Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uaba z28.b, z29.b, z30.b"); TEST_SINGLE(uaba(SubRegSize::i16Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uaba z28.h, z29.h, z30.h"); TEST_SINGLE(uaba(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uaba z28.s, z29.s, z30.s"); TEST_SINGLE(uaba(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uaba z28.d, z29.d, z30.d"); @@ -3215,32 +3187,32 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 saturating extract narrow TEST_SINGLE(sqxtnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "sqxtnb z30.b, z29.h"); TEST_SINGLE(sqxtnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "sqxtnb z30.h, z29.s"); TEST_SINGLE(sqxtnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "sqxtnb z30.s, z29.d"); - //TEST_SINGLE(sqxtnb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sqxtnb z30.d, z29.q"); + // TEST_SINGLE(sqxtnb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sqxtnb z30.d, z29.q"); TEST_SINGLE(sqxtnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "sqxtnt z30.b, z29.h"); TEST_SINGLE(sqxtnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "sqxtnt z30.h, z29.s"); TEST_SINGLE(sqxtnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "sqxtnt z30.s, z29.d"); - //TEST_SINGLE(sqxtnt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sqxtnt z30.d, z29.q"); + // TEST_SINGLE(sqxtnt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sqxtnt z30.d, z29.q"); TEST_SINGLE(uqxtnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "uqxtnb z30.b, z29.h"); TEST_SINGLE(uqxtnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "uqxtnb z30.h, z29.s"); TEST_SINGLE(uqxtnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "uqxtnb z30.s, z29.d"); - //TEST_SINGLE(uqxtnb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "uqxtnb z30.d, z29.q"); + // TEST_SINGLE(uqxtnb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "uqxtnb z30.d, z29.q"); TEST_SINGLE(uqxtnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "uqxtnt z30.b, z29.h"); TEST_SINGLE(uqxtnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "uqxtnt z30.h, z29.s"); TEST_SINGLE(uqxtnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "uqxtnt z30.s, z29.d"); - //TEST_SINGLE(uqxtnt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "uqxtnt z30.d, z29.q"); + // TEST_SINGLE(uqxtnt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "uqxtnt z30.d, z29.q"); TEST_SINGLE(sqxtunb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "sqxtunb z30.b, z29.h"); TEST_SINGLE(sqxtunb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "sqxtunb z30.h, z29.s"); TEST_SINGLE(sqxtunb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "sqxtunb z30.s, z29.d"); - //TEST_SINGLE(sqxtunb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sqxtunb z30.d, z29.q"); + // TEST_SINGLE(sqxtunb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sqxtunb z30.d, z29.q"); TEST_SINGLE(sqxtunt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "sqxtunt z30.b, z29.h"); TEST_SINGLE(sqxtunt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "sqxtunt z30.h, z29.s"); TEST_SINGLE(sqxtunt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "sqxtunt z30.s, z29.d"); - //TEST_SINGLE(sqxtunt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sqxtunt z30.d, z29.q"); + // TEST_SINGLE(sqxtunt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sqxtunt z30.d, z29.q"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 bitwise shift right narrow") { TEST_SINGLE(sqshrunb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "sqshrunb z30.b, z29.h, #1"); @@ -3356,41 +3328,41 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 bitwise shift right narro TEST_SINGLE(uqrshrnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "uqrshrnt z30.s, z29.d, #32"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer add/subtract narrow high part") { - TEST_SINGLE(addhnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnb z30.b, z29.h, z28.h"); - TEST_SINGLE(addhnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnb z30.h, z29.s, z28.s"); - TEST_SINGLE(addhnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnb z30.s, z29.d, z28.d"); + TEST_SINGLE(addhnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnb z30.b, z29.h, z28.h"); + TEST_SINGLE(addhnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnb z30.h, z29.s, z28.s"); + TEST_SINGLE(addhnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnb z30.s, z29.d, z28.d"); - TEST_SINGLE(addhnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnt z30.b, z29.h, z28.h"); - TEST_SINGLE(addhnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnt z30.h, z29.s, z28.s"); - TEST_SINGLE(addhnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnt z30.s, z29.d, z28.d"); + TEST_SINGLE(addhnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnt z30.b, z29.h, z28.h"); + TEST_SINGLE(addhnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnt z30.h, z29.s, z28.s"); + TEST_SINGLE(addhnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnt z30.s, z29.d, z28.d"); - TEST_SINGLE(raddhnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "raddhnb z30.b, z29.h, z28.h"); + TEST_SINGLE(raddhnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "raddhnb z30.b, z29.h, z28.h"); TEST_SINGLE(raddhnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "raddhnb z30.h, z29.s, z28.s"); TEST_SINGLE(raddhnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "raddhnb z30.s, z29.d, z28.d"); - TEST_SINGLE(raddhnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "raddhnt z30.b, z29.h, z28.h"); + TEST_SINGLE(raddhnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "raddhnt z30.b, z29.h, z28.h"); TEST_SINGLE(raddhnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "raddhnt z30.h, z29.s, z28.s"); TEST_SINGLE(raddhnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "raddhnt z30.s, z29.d, z28.d"); - TEST_SINGLE(subhnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnb z30.b, z29.h, z28.h"); - TEST_SINGLE(subhnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnb z30.h, z29.s, z28.s"); - TEST_SINGLE(subhnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnb z30.s, z29.d, z28.d"); + TEST_SINGLE(subhnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnb z30.b, z29.h, z28.h"); + TEST_SINGLE(subhnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnb z30.h, z29.s, z28.s"); + TEST_SINGLE(subhnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnb z30.s, z29.d, z28.d"); - TEST_SINGLE(subhnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnt z30.b, z29.h, z28.h"); - TEST_SINGLE(subhnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnt z30.h, z29.s, z28.s"); - TEST_SINGLE(subhnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnt z30.s, z29.d, z28.d"); + TEST_SINGLE(subhnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnt z30.b, z29.h, z28.h"); + TEST_SINGLE(subhnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnt z30.h, z29.s, z28.s"); + TEST_SINGLE(subhnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnt z30.s, z29.d, z28.d"); - TEST_SINGLE(rsubhnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "rsubhnb z30.b, z29.h, z28.h"); + TEST_SINGLE(rsubhnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "rsubhnb z30.b, z29.h, z28.h"); TEST_SINGLE(rsubhnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "rsubhnb z30.h, z29.s, z28.s"); TEST_SINGLE(rsubhnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "rsubhnb z30.s, z29.d, z28.d"); - TEST_SINGLE(rsubhnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "rsubhnt z30.b, z29.h, z28.h"); + TEST_SINGLE(rsubhnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "rsubhnt z30.b, z29.h, z28.h"); TEST_SINGLE(rsubhnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "rsubhnt z30.h, z29.s, z28.s"); TEST_SINGLE(rsubhnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "rsubhnt z30.s, z29.d, z28.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 Histogram Computation") { - TEST_SINGLE(histcnt(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29, ZReg::z28), "histcnt z30.s, p6/z, z29.s, z28.s"); - TEST_SINGLE(histcnt(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29, ZReg::z28), "histcnt z30.d, p6/z, z29.d, z28.d"); + TEST_SINGLE(histcnt(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29, ZReg::z28), "histcnt z30.s, p6/z, z29.s, z28.s"); + TEST_SINGLE(histcnt(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29, ZReg::z28), "histcnt z30.d, p6/z, z29.d, z28.d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 Histogram Computation - Segment") { TEST_SINGLE(histseg(ZReg::z30, ZReg::z29, ZReg::z28), "histseg z30.b, z29.b, z28.b"); @@ -3469,15 +3441,15 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point multiply-ad // TEST_SINGLE(bfmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlalb z30.s, z29.h, z28.h"); // TEST_SINGLE(bfmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlalb z30.s, z29.h, z28.h"); // TEST_SINGLE(bfmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlalb z30.s, z29.h, z28.h"); - + // TEST_SINGLE(bfmlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlalt z30.s, z29.h, z28.h"); // TEST_SINGLE(bfmlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlalt z30.s, z29.h, z28.h"); // TEST_SINGLE(bfmlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlalt z30.s, z29.h, z28.h"); - + // TEST_SINGLE(bfmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlslb z30.s, z29.h, z28.h"); // TEST_SINGLE(bfmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlslb z30.s, z29.h, z28.h"); // TEST_SINGLE(bfmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlslb z30.s, z29.h, z28.h"); - + // TEST_SINGLE(bfmlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlslt z30.s, z29.h, z28.h"); // TEST_SINGLE(bfmlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlslt z30.s, z29.h, z28.h"); // TEST_SINGLE(bfmlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlslt z30.s, z29.h, z28.h"); @@ -3487,112 +3459,112 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point arithmetic TEST_SINGLE(ftmad(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, ZReg::z28, 7), "ftmad z30.s, z30.s, z28.s, #7"); TEST_SINGLE(ftmad(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, ZReg::z28, 7), "ftmad z30.d, z30.d, z28.d, #7"); - //TEST_SINGLE(fadd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fadd z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(fadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fadd z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(fadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fadd z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(fadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fadd z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(fadd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fadd z30.q, p6/m, z30.q, z28.q"); - - //TEST_SINGLE(fsub(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsub z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(fsub(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsub z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(fsub(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsub z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(fsub(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsub z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(fsub(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsub z30.q, p6/m, z30.q, z28.q"); - - //TEST_SINGLE(fmul(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmul z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(fmul(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmul z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(fmul(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmul z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(fmul(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmul z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(fmul(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmul z30.q, p6/m, z30.q, z28.q"); - - //TEST_SINGLE(fsubr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsubr z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(fsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsubr z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(fsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsubr z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(fsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsubr z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(fsubr(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsubr z30.q, p6/m, z30.q, z28.q"); - - //TEST_SINGLE(fmaxnm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnm z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(fmaxnm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnm z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(fmaxnm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnm z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(fmaxnm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnm z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(fmaxnm(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnm z30.q, p6/m, z30.q, z28.q"); - - //TEST_SINGLE(fminnm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnm z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(fminnm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnm z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(fminnm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnm z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(fminnm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnm z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(fminnm(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnm z30.q, p6/m, z30.q, z28.q"); - - //TEST_SINGLE(fmax(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(fmax(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(fmax(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(fmax(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(fmax(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.q, p6/m, z30.q, z28.q"); - - //TEST_SINGLE(fmin(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(fmin(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(fmin(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(fmin(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(fmin(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.q, p6/m, z30.q, z28.q"); - - //TEST_SINGLE(fabd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fabd z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(fabd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fabd z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(fabd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fabd z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(fabd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fabd z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(fabd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fabd z30.q, p6/m, z30.q, z28.q"); - - //TEST_SINGLE(fscale(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fscale z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(fscale(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fscale z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(fscale(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fscale z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(fscale(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fscale z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(fscale(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fscale z30.q, p6/m, z30.q, z28.q"); - - //TEST_SINGLE(fmulx(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmulx z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(fmulx(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmulx z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(fmulx(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmulx z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(fmulx(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmulx z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(fmulx(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmulx z30.q, p6/m, z30.q, z28.q"); - - //TEST_SINGLE(fdiv(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdiv z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(fdiv(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdiv z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(fdiv(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdiv z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(fdiv(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdiv z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(fdiv(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdiv z30.q, p6/m, z30.q, z28.q"); - - //TEST_SINGLE(fdivr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdivr z30.b, p6/m, z30.b, z28.b"); - TEST_SINGLE(fdivr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdivr z30.h, p6/m, z30.h, z28.h"); - TEST_SINGLE(fdivr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdivr z30.s, p6/m, z30.s, z28.s"); - TEST_SINGLE(fdivr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdivr z30.d, p6/m, z30.d, z28.d"); - //TEST_SINGLE(fdivr(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdivr z30.q, p6/m, z30.q, z28.q"); + // TEST_SINGLE(fadd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fadd z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(fadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fadd z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(fadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fadd z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(fadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fadd z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(fadd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fadd z30.q, p6/m, z30.q, z28.q"); + + // TEST_SINGLE(fsub(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsub z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(fsub(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsub z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(fsub(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsub z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(fsub(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsub z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(fsub(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsub z30.q, p6/m, z30.q, z28.q"); + + // TEST_SINGLE(fmul(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmul z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(fmul(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmul z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(fmul(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmul z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(fmul(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmul z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(fmul(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmul z30.q, p6/m, z30.q, z28.q"); + + // TEST_SINGLE(fsubr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsubr z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(fsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsubr z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(fsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsubr z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(fsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsubr z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(fsubr(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsubr z30.q, p6/m, z30.q, z28.q"); + + // TEST_SINGLE(fmaxnm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnm z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(fmaxnm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnm z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(fmaxnm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnm z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(fmaxnm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnm z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(fmaxnm(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnm z30.q, p6/m, z30.q, z28.q"); + + // TEST_SINGLE(fminnm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnm z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(fminnm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnm z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(fminnm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnm z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(fminnm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnm z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(fminnm(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnm z30.q, p6/m, z30.q, z28.q"); + + // TEST_SINGLE(fmax(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(fmax(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(fmax(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(fmax(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(fmax(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.q, p6/m, z30.q, z28.q"); + + // TEST_SINGLE(fmin(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(fmin(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(fmin(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(fmin(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(fmin(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.q, p6/m, z30.q, z28.q"); + + // TEST_SINGLE(fabd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fabd z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(fabd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fabd z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(fabd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fabd z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(fabd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fabd z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(fabd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fabd z30.q, p6/m, z30.q, z28.q"); + + // TEST_SINGLE(fscale(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fscale z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(fscale(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fscale z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(fscale(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fscale z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(fscale(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fscale z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(fscale(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fscale z30.q, p6/m, z30.q, z28.q"); + + // TEST_SINGLE(fmulx(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmulx z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(fmulx(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmulx z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(fmulx(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmulx z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(fmulx(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmulx z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(fmulx(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmulx z30.q, p6/m, z30.q, z28.q"); + + // TEST_SINGLE(fdiv(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdiv z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(fdiv(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdiv z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(fdiv(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdiv z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(fdiv(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdiv z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(fdiv(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdiv z30.q, p6/m, z30.q, z28.q"); + + // TEST_SINGLE(fdivr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdivr z30.b, p6/m, z30.b, z28.b"); + TEST_SINGLE(fdivr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdivr z30.h, p6/m, z30.h, z28.h"); + TEST_SINGLE(fdivr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdivr z30.s, p6/m, z30.s, z28.s"); + TEST_SINGLE(fdivr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdivr z30.d, p6/m, z30.d, z28.d"); + // TEST_SINGLE(fdivr(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdivr z30.q, p6/m, z30.q, z28.q"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point arithmetic with immediate (predicated)") { - TEST_SINGLE(fadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fadd z30.h, p6/m, z30.h, #0.5"); - TEST_SINGLE(fadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fadd z30.s, p6/m, z30.s, #0.5"); - TEST_SINGLE(fadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fadd z30.d, p6/m, z30.d, #0.5"); - TEST_SINGLE(fadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fadd z30.h, p6/m, z30.h, #1.0"); - TEST_SINGLE(fadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fadd z30.s, p6/m, z30.s, #1.0"); - TEST_SINGLE(fadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fadd z30.d, p6/m, z30.d, #1.0"); - - TEST_SINGLE(fsub(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsub z30.h, p6/m, z30.h, #0.5"); - TEST_SINGLE(fsub(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsub z30.s, p6/m, z30.s, #0.5"); - TEST_SINGLE(fsub(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsub z30.d, p6/m, z30.d, #0.5"); - TEST_SINGLE(fsub(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsub z30.h, p6/m, z30.h, #1.0"); - TEST_SINGLE(fsub(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsub z30.s, p6/m, z30.s, #1.0"); - TEST_SINGLE(fsub(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsub z30.d, p6/m, z30.d, #1.0"); - - TEST_SINGLE(fsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsubr z30.h, p6/m, z30.h, #0.5"); - TEST_SINGLE(fsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsubr z30.s, p6/m, z30.s, #0.5"); - TEST_SINGLE(fsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsubr z30.d, p6/m, z30.d, #0.5"); - TEST_SINGLE(fsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsubr z30.h, p6/m, z30.h, #1.0"); - TEST_SINGLE(fsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsubr z30.s, p6/m, z30.s, #1.0"); - TEST_SINGLE(fsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsubr z30.d, p6/m, z30.d, #1.0"); - - TEST_SINGLE(fmul(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_0_5), "fmul z30.h, p6/m, z30.h, #0.5"); - TEST_SINGLE(fmul(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_0_5), "fmul z30.s, p6/m, z30.s, #0.5"); - TEST_SINGLE(fmul(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_0_5), "fmul z30.d, p6/m, z30.d, #0.5"); - TEST_SINGLE(fmul(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_2_0), "fmul z30.h, p6/m, z30.h, #2.0"); - TEST_SINGLE(fmul(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_2_0), "fmul z30.s, p6/m, z30.s, #2.0"); - TEST_SINGLE(fmul(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_2_0), "fmul z30.d, p6/m, z30.d, #2.0"); + TEST_SINGLE(fadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fadd z30.h, p6/m, z30.h, #0.5"); + TEST_SINGLE(fadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fadd z30.s, p6/m, z30.s, #0.5"); + TEST_SINGLE(fadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fadd z30.d, p6/m, z30.d, #0.5"); + TEST_SINGLE(fadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fadd z30.h, p6/m, z30.h, #1.0"); + TEST_SINGLE(fadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fadd z30.s, p6/m, z30.s, #1.0"); + TEST_SINGLE(fadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fadd z30.d, p6/m, z30.d, #1.0"); + + TEST_SINGLE(fsub(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsub z30.h, p6/m, z30.h, #0.5"); + TEST_SINGLE(fsub(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsub z30.s, p6/m, z30.s, #0.5"); + TEST_SINGLE(fsub(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsub z30.d, p6/m, z30.d, #0.5"); + TEST_SINGLE(fsub(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsub z30.h, p6/m, z30.h, #1.0"); + TEST_SINGLE(fsub(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsub z30.s, p6/m, z30.s, #1.0"); + TEST_SINGLE(fsub(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsub z30.d, p6/m, z30.d, #1.0"); + + TEST_SINGLE(fsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsubr z30.h, p6/m, z30.h, #0.5"); + TEST_SINGLE(fsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsubr z30.s, p6/m, z30.s, #0.5"); + TEST_SINGLE(fsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsubr z30.d, p6/m, z30.d, #0.5"); + TEST_SINGLE(fsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsubr z30.h, p6/m, z30.h, #1.0"); + TEST_SINGLE(fsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsubr z30.s, p6/m, z30.s, #1.0"); + TEST_SINGLE(fsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsubr z30.d, p6/m, z30.d, #1.0"); + + TEST_SINGLE(fmul(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_0_5), "fmul z30.h, p6/m, z30.h, #0.5"); + TEST_SINGLE(fmul(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_0_5), "fmul z30.s, p6/m, z30.s, #0.5"); + TEST_SINGLE(fmul(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_0_5), "fmul z30.d, p6/m, z30.d, #0.5"); + TEST_SINGLE(fmul(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_2_0), "fmul z30.h, p6/m, z30.h, #2.0"); + TEST_SINGLE(fmul(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_2_0), "fmul z30.s, p6/m, z30.s, #2.0"); + TEST_SINGLE(fmul(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_2_0), "fmul z30.d, p6/m, z30.d, #2.0"); TEST_SINGLE(fmaxnm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmaxnm z30.h, p6/m, z30.h, #0.0"); TEST_SINGLE(fmaxnm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmaxnm z30.s, p6/m, z30.s, #0.0"); @@ -3608,462 +3580,324 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point arithmetic TEST_SINGLE(fminnm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fminnm z30.s, p6/m, z30.s, #1.0"); TEST_SINGLE(fminnm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fminnm z30.d, p6/m, z30.d, #1.0"); - TEST_SINGLE(fmax(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmax z30.h, p6/m, z30.h, #0.0"); - TEST_SINGLE(fmax(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmax z30.s, p6/m, z30.s, #0.0"); - TEST_SINGLE(fmax(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmax z30.d, p6/m, z30.d, #0.0"); - TEST_SINGLE(fmax(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmax z30.h, p6/m, z30.h, #1.0"); - TEST_SINGLE(fmax(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmax z30.s, p6/m, z30.s, #1.0"); - TEST_SINGLE(fmax(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmax z30.d, p6/m, z30.d, #1.0"); + TEST_SINGLE(fmax(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmax z30.h, p6/m, z30.h, #0.0"); + TEST_SINGLE(fmax(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmax z30.s, p6/m, z30.s, #0.0"); + TEST_SINGLE(fmax(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmax z30.d, p6/m, z30.d, #0.0"); + TEST_SINGLE(fmax(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmax z30.h, p6/m, z30.h, #1.0"); + TEST_SINGLE(fmax(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmax z30.s, p6/m, z30.s, #1.0"); + TEST_SINGLE(fmax(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmax z30.d, p6/m, z30.d, #1.0"); - TEST_SINGLE(fmin(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmin z30.h, p6/m, z30.h, #0.0"); - TEST_SINGLE(fmin(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmin z30.s, p6/m, z30.s, #0.0"); - TEST_SINGLE(fmin(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmin z30.d, p6/m, z30.d, #0.0"); - TEST_SINGLE(fmin(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmin z30.h, p6/m, z30.h, #1.0"); - TEST_SINGLE(fmin(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmin z30.s, p6/m, z30.s, #1.0"); - TEST_SINGLE(fmin(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmin z30.d, p6/m, z30.d, #1.0"); + TEST_SINGLE(fmin(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmin z30.h, p6/m, z30.h, #0.0"); + TEST_SINGLE(fmin(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmin z30.s, p6/m, z30.s, #0.0"); + TEST_SINGLE(fmin(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmin z30.d, p6/m, z30.d, #0.0"); + TEST_SINGLE(fmin(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmin z30.h, p6/m, z30.h, #1.0"); + TEST_SINGLE(fmin(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmin z30.s, p6/m, z30.s, #1.0"); + TEST_SINGLE(fmin(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmin z30.d, p6/m, z30.d, #1.0"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Memory - 32-bit Gather and Unsized Contiguous") { - TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ld1b {z30.s}, p6/z, [x30, z31.s, uxtw]"); - TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ld1b {z30.s}, p6/z, [x30, z31.s, sxtw]"); - TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ld1b {z30.d}, p6/z, [x30, z31.d, uxtw]"); - TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ld1b {z30.d}, p6/z, [x30, z31.d, sxtw]"); - TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "ld1b {z30.d}, p6/z, [x30, z31.d]"); - - TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ld1b {z30.s}, p6/z, [z31.s]"); - TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), - "ld1b {z30.s}, p6/z, [z31.s, #31]"); - TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ld1b {z30.d}, p6/z, [z31.d]"); - TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), - "ld1b {z30.d}, p6/z, [z31.d, #31]"); - - TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ld1sb {z30.s}, p6/z, [x30, z31.s, uxtw]"); - TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ld1sb {z30.s}, p6/z, [x30, z31.s, sxtw]"); - TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ld1sb {z30.d}, p6/z, [x30, z31.d, uxtw]"); - TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ld1sb {z30.d}, p6/z, [x30, z31.d, sxtw]"); - TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "ld1sb {z30.d}, p6/z, [x30, z31.d]"); - - TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ld1sb {z30.s}, p6/z, [z31.s]"); - TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), - "ld1sb {z30.s}, p6/z, [z31.s, #31]"); - TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ld1sb {z30.d}, p6/z, [z31.d]"); - TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), - "ld1sb {z30.d}, p6/z, [z31.d, #31]"); - - TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ld1d {z30.d}, p6/z, [x30, z31.d, uxtw]"); - TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ld1d {z30.d}, p6/z, [x30, z31.d, sxtw]"); - TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 3)), - "ld1d {z30.d}, p6/z, [x30, z31.d, uxtw #3]"); - TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 3)), - "ld1d {z30.d}, p6/z, [x30, z31.d, sxtw #3]"); - TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 3)), - "ld1d {z30.d}, p6/z, [x30, z31.d, lsl #3]"); - TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "ld1d {z30.d}, p6/z, [x30, z31.d]"); - - TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ld1d {z30.d}, p6/z, [z31.d]"); - TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 248)), - "ld1d {z30.d}, p6/z, [z31.d, #248]"); - - TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), - "ld1h {z30.s}, p6/z, [x30, z31.s, uxtw #1]"); - TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), - "ld1h {z30.s}, p6/z, [x30, z31.s, sxtw #1]"); - TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), - "ld1h {z30.d}, p6/z, [x30, z31.d, uxtw #1]"); - TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), - "ld1h {z30.d}, p6/z, [x30, z31.d, sxtw #1]"); - TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 1)), - "ld1h {z30.d}, p6/z, [x30, z31.d, lsl #1]"); - TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "ld1h {z30.d}, p6/z, [x30, z31.d]"); - - TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ld1h {z30.s}, p6/z, [x30, z31.s, uxtw]"); - TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ld1h {z30.s}, p6/z, [x30, z31.s, sxtw]"); - TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ld1h {z30.d}, p6/z, [x30, z31.d, uxtw]"); - TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ld1h {z30.d}, p6/z, [x30, z31.d, sxtw]"); - - TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ld1h {z30.s}, p6/z, [z31.s]"); - TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), - "ld1h {z30.s}, p6/z, [z31.s, #62]"); - TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ld1h {z30.d}, p6/z, [z31.d]"); - TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), - "ld1h {z30.d}, p6/z, [z31.d, #62]"); - - TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), - "ld1sh {z30.s}, p6/z, [x30, z31.s, uxtw #1]"); - TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), - "ld1sh {z30.s}, p6/z, [x30, z31.s, sxtw #1]"); - TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), - "ld1sh {z30.d}, p6/z, [x30, z31.d, uxtw #1]"); - TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), - "ld1sh {z30.d}, p6/z, [x30, z31.d, sxtw #1]"); - TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 1)), - "ld1sh {z30.d}, p6/z, [x30, z31.d, lsl #1]"); - TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "ld1sh {z30.d}, p6/z, [x30, z31.d]"); - - TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ld1sh {z30.s}, p6/z, [x30, z31.s, uxtw]"); - TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ld1sh {z30.s}, p6/z, [x30, z31.s, sxtw]"); - TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ld1sh {z30.d}, p6/z, [x30, z31.d, uxtw]"); - TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ld1sh {z30.d}, p6/z, [x30, z31.d, sxtw]"); - - TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ld1sh {z30.s}, p6/z, [z31.s]"); - TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), - "ld1sh {z30.s}, p6/z, [z31.s, #62]"); - TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ld1sh {z30.d}, p6/z, [z31.d]"); - TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), - "ld1sh {z30.d}, p6/z, [z31.d, #62]"); - - TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), - "ld1w {z30.s}, p6/z, [x30, z31.s, uxtw #2]"); - TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), - "ld1w {z30.s}, p6/z, [x30, z31.s, sxtw #2]"); - TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), - "ld1w {z30.d}, p6/z, [x30, z31.d, uxtw #2]"); - TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), - "ld1w {z30.d}, p6/z, [x30, z31.d, sxtw #2]"); - TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 2)), - "ld1w {z30.d}, p6/z, [x30, z31.d, lsl #2]"); - - TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ld1w {z30.s}, p6/z, [x30, z31.s, uxtw]"); - TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ld1w {z30.s}, p6/z, [x30, z31.s, sxtw]"); - TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ld1w {z30.d}, p6/z, [x30, z31.d, uxtw]"); - TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ld1w {z30.d}, p6/z, [x30, z31.d, sxtw]"); - TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "ld1w {z30.d}, p6/z, [x30, z31.d]"); - - TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ld1w {z30.s}, p6/z, [z31.s]"); - TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), - "ld1w {z30.s}, p6/z, [z31.s, #124]"); - TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ld1w {z30.d}, p6/z, [z31.d]"); - TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), - "ld1w {z30.d}, p6/z, [z31.d, #124]"); - - TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ld1sw {z30.d}, p6/z, [x30, z31.d, uxtw]"); - TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ld1sw {z30.d}, p6/z, [x30, z31.d, sxtw]"); - TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), - "ld1sw {z30.d}, p6/z, [x30, z31.d, uxtw #2]"); - TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), - "ld1sw {z30.d}, p6/z, [x30, z31.d, sxtw #2]"); - TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 2)), - "ld1sw {z30.d}, p6/z, [x30, z31.d, lsl #2]"); - TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "ld1sw {z30.d}, p6/z, [x30, z31.d]"); - - TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ld1sw {z30.d}, p6/z, [z31.d]"); - TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), - "ld1sw {z30.d}, p6/z, [z31.d, #124]"); - - TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ldff1b {z30.s}, p6/z, [x30, z31.s, uxtw]"); - TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ldff1b {z30.s}, p6/z, [x30, z31.s, sxtw]"); - TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ldff1b {z30.d}, p6/z, [x30, z31.d, uxtw]"); - TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ldff1b {z30.d}, p6/z, [x30, z31.d, sxtw]"); - TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "ldff1b {z30.d}, p6/z, [x30, z31.d]"); - - TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ldff1b {z30.s}, p6/z, [z31.s]"); - TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), - "ldff1b {z30.s}, p6/z, [z31.s, #31]"); - TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ldff1b {z30.d}, p6/z, [z31.d]"); - TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), - "ldff1b {z30.d}, p6/z, [z31.d, #31]"); - - TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ldff1sb {z30.s}, p6/z, [x30, z31.s, uxtw]"); - TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ldff1sb {z30.s}, p6/z, [x30, z31.s, sxtw]"); - TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ldff1sb {z30.d}, p6/z, [x30, z31.d, uxtw]"); - TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ldff1sb {z30.d}, p6/z, [x30, z31.d, sxtw]"); - TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "ldff1sb {z30.d}, p6/z, [x30, z31.d]"); - - TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ldff1sb {z30.s}, p6/z, [z31.s]"); - TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), - "ldff1sb {z30.s}, p6/z, [z31.s, #31]"); - TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ldff1sb {z30.d}, p6/z, [z31.d]"); - TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), - "ldff1sb {z30.d}, p6/z, [z31.d, #31]"); - - TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ldff1d {z30.d}, p6/z, [x30, z31.d, uxtw]"); - TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ldff1d {z30.d}, p6/z, [x30, z31.d, sxtw]"); - TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 3)), - "ldff1d {z30.d}, p6/z, [x30, z31.d, uxtw #3]"); - TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 3)), - "ldff1d {z30.d}, p6/z, [x30, z31.d, sxtw #3]"); - TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 3)), - "ldff1d {z30.d}, p6/z, [x30, z31.d, lsl #3]"); - TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "ldff1d {z30.d}, p6/z, [x30, z31.d]"); - - TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ldff1d {z30.d}, p6/z, [z31.d]"); - TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 248)), - "ldff1d {z30.d}, p6/z, [z31.d, #248]"); - - TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), - "ldff1h {z30.s}, p6/z, [x30, z31.s, uxtw #1]"); - TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), - "ldff1h {z30.s}, p6/z, [x30, z31.s, sxtw #1]"); - TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), - "ldff1h {z30.d}, p6/z, [x30, z31.d, uxtw #1]"); - TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), - "ldff1h {z30.d}, p6/z, [x30, z31.d, sxtw #1]"); - TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 1)), - "ldff1h {z30.d}, p6/z, [x30, z31.d, lsl #1]"); - TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "ldff1h {z30.d}, p6/z, [x30, z31.d]"); - - TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ldff1h {z30.s}, p6/z, [x30, z31.s, uxtw]"); - TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ldff1h {z30.s}, p6/z, [x30, z31.s, sxtw]"); - TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ldff1h {z30.d}, p6/z, [x30, z31.d, uxtw]"); - TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ldff1h {z30.d}, p6/z, [x30, z31.d, sxtw]"); - - TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ldff1h {z30.s}, p6/z, [z31.s]"); - TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), - "ldff1h {z30.s}, p6/z, [z31.s, #62]"); - TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ldff1h {z30.d}, p6/z, [z31.d]"); - TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), - "ldff1h {z30.d}, p6/z, [z31.d, #62]"); - - TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), - "ldff1sh {z30.s}, p6/z, [x30, z31.s, uxtw #1]"); - TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), - "ldff1sh {z30.s}, p6/z, [x30, z31.s, sxtw #1]"); - TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), - "ldff1sh {z30.d}, p6/z, [x30, z31.d, uxtw #1]"); - TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), - "ldff1sh {z30.d}, p6/z, [x30, z31.d, sxtw #1]"); - TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 1)), - "ldff1sh {z30.d}, p6/z, [x30, z31.d, lsl #1]"); - TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "ldff1sh {z30.d}, p6/z, [x30, z31.d]"); - - TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ldff1sh {z30.s}, p6/z, [x30, z31.s, uxtw]"); - TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ldff1sh {z30.s}, p6/z, [x30, z31.s, sxtw]"); - TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ldff1sh {z30.d}, p6/z, [x30, z31.d, uxtw]"); - TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ldff1sh {z30.d}, p6/z, [x30, z31.d, sxtw]"); - - TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ldff1sh {z30.s}, p6/z, [z31.s]"); - TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), - "ldff1sh {z30.s}, p6/z, [z31.s, #62]"); - TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ldff1sh {z30.d}, p6/z, [z31.d]"); - TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), - "ldff1sh {z30.d}, p6/z, [z31.d, #62]"); - - TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), - "ldff1w {z30.s}, p6/z, [x30, z31.s, uxtw #2]"); - TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), - "ldff1w {z30.s}, p6/z, [x30, z31.s, sxtw #2]"); - TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), - "ldff1w {z30.d}, p6/z, [x30, z31.d, uxtw #2]"); - TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), - "ldff1w {z30.d}, p6/z, [x30, z31.d, sxtw #2]"); - TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 2)), - "ldff1w {z30.d}, p6/z, [x30, z31.d, lsl #2]"); - - TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ldff1w {z30.s}, p6/z, [x30, z31.s, uxtw]"); - TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ldff1w {z30.s}, p6/z, [x30, z31.s, sxtw]"); - TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ldff1w {z30.d}, p6/z, [x30, z31.d, uxtw]"); - TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ldff1w {z30.d}, p6/z, [x30, z31.d, sxtw]"); - TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "ldff1w {z30.d}, p6/z, [x30, z31.d]"); - - TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ldff1w {z30.s}, p6/z, [z31.s]"); - TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), - "ldff1w {z30.s}, p6/z, [z31.s, #124]"); - TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ldff1w {z30.d}, p6/z, [z31.d]"); - TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), - "ldff1w {z30.d}, p6/z, [z31.d, #124]"); - - TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "ldff1sw {z30.d}, p6/z, [x30, z31.d, uxtw]"); - TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "ldff1sw {z30.d}, p6/z, [x30, z31.d, sxtw]"); - TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), - "ldff1sw {z30.d}, p6/z, [x30, z31.d, uxtw #2]"); - TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), - "ldff1sw {z30.d}, p6/z, [x30, z31.d, sxtw #2]"); - TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 2)), - "ldff1sw {z30.d}, p6/z, [x30, z31.d, lsl #2]"); - TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "ldff1sw {z30.d}, p6/z, [x30, z31.d]"); - - TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), - "ldff1sw {z30.d}, p6/z, [z31.d]"); - TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), - "ldff1sw {z30.d}, p6/z, [z31.d, #124]"); + TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ld1b {z30.s}, p6/z, [x30, z31.s, uxtw]"); + TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ld1b {z30.s}, p6/z, [x30, z31.s, sxtw]"); + TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ld1b {z30.d}, p6/z, [x30, z31.d, uxtw]"); + TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ld1b {z30.d}, p6/z, [x30, z31.d, sxtw]"); + TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), + "ld1b {z30.d}, p6/z, [x30, z31.d]"); + + TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1b {z30.s}, p6/z, [z31.s]"); + TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), "ld1b {z30.s}, p6/z, [z31.s, #31]"); + TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1b {z30.d}, p6/z, [z31.d]"); + TEST_SINGLE(ld1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), "ld1b {z30.d}, p6/z, [z31.d, #31]"); + + TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ld1sb {z30.s}, p6/z, [x30, z31.s, uxtw]"); + TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ld1sb {z30.s}, p6/z, [x30, z31.s, sxtw]"); + TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ld1sb {z30.d}, p6/z, [x30, z31.d, uxtw]"); + TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ld1sb {z30.d}, p6/z, [x30, z31.d, sxtw]"); + TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), + "ld1sb {z30.d}, p6/z, [x30, z31.d]"); + + TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1sb {z30.s}, p6/z, [z31.s]"); + TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), "ld1sb {z30.s}, p6/z, [z31.s, #31]"); + TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1sb {z30.d}, p6/z, [z31.d]"); + TEST_SINGLE(ld1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), "ld1sb {z30.d}, p6/z, [z31.d, #31]"); + + TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "ld1d {z30.d}, p6/z, " + "[x30, z31.d, uxtw]"); + TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "ld1d {z30.d}, p6/z, " + "[x30, z31.d, sxtw]"); + TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 3)), "ld1d {z30.d}, p6/z, " + "[x30, z31.d, uxtw #3]"); + TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 3)), "ld1d {z30.d}, p6/z, " + "[x30, z31.d, sxtw #3]"); + TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 3)), "ld1d {z30.d}, p6/z, [x30, " + "z31.d, lsl #3]"); + TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), "ld1d {z30.d}, p6/z, " + "[x30, z31.d]"); + + TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1d {z30.d}, p6/z, [z31.d]"); + TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 248)), "ld1d {z30.d}, p6/z, [z31.d, #248]"); + + TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), + "ld1h {z30.s}, p6/z, [x30, z31.s, uxtw #1]"); + TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), + "ld1h {z30.s}, p6/z, [x30, z31.s, sxtw #1]"); + TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), + "ld1h {z30.d}, p6/z, [x30, z31.d, uxtw #1]"); + TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), + "ld1h {z30.d}, p6/z, [x30, z31.d, sxtw #1]"); + TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 1)), + "ld1h {z30.d}, p6/z, [x30, z31.d, lsl #1]"); + TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), + "ld1h {z30.d}, p6/z, [x30, z31.d]"); + + TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ld1h {z30.s}, p6/z, [x30, z31.s, uxtw]"); + TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ld1h {z30.s}, p6/z, [x30, z31.s, sxtw]"); + TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ld1h {z30.d}, p6/z, [x30, z31.d, uxtw]"); + TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ld1h {z30.d}, p6/z, [x30, z31.d, sxtw]"); + + TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1h {z30.s}, p6/z, [z31.s]"); + TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), "ld1h {z30.s}, p6/z, [z31.s, #62]"); + TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1h {z30.d}, p6/z, [z31.d]"); + TEST_SINGLE(ld1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), "ld1h {z30.d}, p6/z, [z31.d, #62]"); + + TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), + "ld1sh {z30.s}, p6/z, [x30, z31.s, uxtw #1]"); + TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), + "ld1sh {z30.s}, p6/z, [x30, z31.s, sxtw #1]"); + TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), + "ld1sh {z30.d}, p6/z, [x30, z31.d, uxtw #1]"); + TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), + "ld1sh {z30.d}, p6/z, [x30, z31.d, sxtw #1]"); + TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 1)), + "ld1sh {z30.d}, p6/z, [x30, z31.d, lsl #1]"); + TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), + "ld1sh {z30.d}, p6/z, [x30, z31.d]"); + + TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ld1sh {z30.s}, p6/z, [x30, z31.s, uxtw]"); + TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ld1sh {z30.s}, p6/z, [x30, z31.s, sxtw]"); + TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ld1sh {z30.d}, p6/z, [x30, z31.d, uxtw]"); + TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ld1sh {z30.d}, p6/z, [x30, z31.d, sxtw]"); + + TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1sh {z30.s}, p6/z, [z31.s]"); + TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), "ld1sh {z30.s}, p6/z, [z31.s, #62]"); + TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1sh {z30.d}, p6/z, [z31.d]"); + TEST_SINGLE(ld1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), "ld1sh {z30.d}, p6/z, [z31.d, #62]"); + + TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), + "ld1w {z30.s}, p6/z, [x30, z31.s, uxtw #2]"); + TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), + "ld1w {z30.s}, p6/z, [x30, z31.s, sxtw #2]"); + TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), + "ld1w {z30.d}, p6/z, [x30, z31.d, uxtw #2]"); + TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), + "ld1w {z30.d}, p6/z, [x30, z31.d, sxtw #2]"); + TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 2)), + "ld1w {z30.d}, p6/z, [x30, z31.d, lsl #2]"); + + TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ld1w {z30.s}, p6/z, [x30, z31.s, uxtw]"); + TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ld1w {z30.s}, p6/z, [x30, z31.s, sxtw]"); + TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ld1w {z30.d}, p6/z, [x30, z31.d, uxtw]"); + TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ld1w {z30.d}, p6/z, [x30, z31.d, sxtw]"); + TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), + "ld1w {z30.d}, p6/z, [x30, z31.d]"); + + TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1w {z30.s}, p6/z, [z31.s]"); + TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), "ld1w {z30.s}, p6/z, [z31.s, #124]"); + TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1w {z30.d}, p6/z, [z31.d]"); + TEST_SINGLE(ld1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), "ld1w {z30.d}, p6/z, [z31.d, #124]"); + + TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "ld1sw {z30.d}, p6/z, " + "[x30, z31.d, uxtw]"); + TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "ld1sw {z30.d}, p6/z, " + "[x30, z31.d, sxtw]"); + TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), "ld1sw {z30.d}, p6/z, " + "[x30, z31.d, uxtw #2]"); + TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), "ld1sw {z30.d}, p6/z, " + "[x30, z31.d, sxtw #2]"); + TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 2)), "ld1sw {z30.d}, p6/z, " + "[x30, z31.d, lsl #2]"); + TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), "ld1sw {z30.d}, p6/z, " + "[x30, z31.d]"); + + TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1sw {z30.d}, p6/z, [z31.d]"); + TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), "ld1sw {z30.d}, p6/z, [z31.d, #124]"); + + TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ldff1b {z30.s}, p6/z, [x30, z31.s, uxtw]"); + TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ldff1b {z30.s}, p6/z, [x30, z31.s, sxtw]"); + TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ldff1b {z30.d}, p6/z, [x30, z31.d, uxtw]"); + TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ldff1b {z30.d}, p6/z, [x30, z31.d, sxtw]"); + TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), + "ldff1b {z30.d}, p6/z, [x30, z31.d]"); + + TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1b {z30.s}, p6/z, [z31.s]"); + TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), "ldff1b {z30.s}, p6/z, [z31.s, " + "#31]"); + TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1b {z30.d}, p6/z, [z31.d]"); + TEST_SINGLE(ldff1b(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), "ldff1b {z30.d}, p6/z, [z31.d, " + "#31]"); + + TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ldff1sb {z30.s}, p6/z, [x30, z31.s, uxtw]"); + TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ldff1sb {z30.s}, p6/z, [x30, z31.s, sxtw]"); + TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ldff1sb {z30.d}, p6/z, [x30, z31.d, uxtw]"); + TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ldff1sb {z30.d}, p6/z, [x30, z31.d, sxtw]"); + TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), + "ldff1sb {z30.d}, p6/z, [x30, z31.d]"); + + TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1sb {z30.s}, p6/z, [z31.s]"); + TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), "ldff1sb {z30.s}, p6/z, [z31.s, " + "#31]"); + TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1sb {z30.d}, p6/z, [z31.d]"); + TEST_SINGLE(ldff1sb(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), "ldff1sb {z30.d}, p6/z, [z31.d, " + "#31]"); + + TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "ldff1d {z30.d}, p6/z, " + "[x30, z31.d, uxtw]"); + TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "ldff1d {z30.d}, p6/z, " + "[x30, z31.d, sxtw]"); + TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 3)), "ldff1d {z30.d}, p6/z, " + "[x30, z31.d, uxtw #3]"); + TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 3)), "ldff1d {z30.d}, p6/z, " + "[x30, z31.d, sxtw #3]"); + TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 3)), "ldff1d {z30.d}, p6/z, " + "[x30, z31.d, lsl #3]"); + TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), "ldff1d {z30.d}, p6/z, " + "[x30, z31.d]"); + + TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1d {z30.d}, p6/z, [z31.d]"); + TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 248)), "ldff1d {z30.d}, p6/z, [z31.d, #248]"); + + TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), + "ldff1h {z30.s}, p6/z, [x30, z31.s, uxtw #1]"); + TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), + "ldff1h {z30.s}, p6/z, [x30, z31.s, sxtw #1]"); + TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), + "ldff1h {z30.d}, p6/z, [x30, z31.d, uxtw #1]"); + TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), + "ldff1h {z30.d}, p6/z, [x30, z31.d, sxtw #1]"); + TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 1)), + "ldff1h {z30.d}, p6/z, [x30, z31.d, lsl #1]"); + TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), + "ldff1h {z30.d}, p6/z, [x30, z31.d]"); + + TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ldff1h {z30.s}, p6/z, [x30, z31.s, uxtw]"); + TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ldff1h {z30.s}, p6/z, [x30, z31.s, sxtw]"); + TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ldff1h {z30.d}, p6/z, [x30, z31.d, uxtw]"); + TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ldff1h {z30.d}, p6/z, [x30, z31.d, sxtw]"); + + TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1h {z30.s}, p6/z, [z31.s]"); + TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), "ldff1h {z30.s}, p6/z, [z31.s, " + "#62]"); + TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1h {z30.d}, p6/z, [z31.d]"); + TEST_SINGLE(ldff1h(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), "ldff1h {z30.d}, p6/z, [z31.d, " + "#62]"); + + TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), + "ldff1sh {z30.s}, p6/z, [x30, z31.s, uxtw #1]"); + TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), + "ldff1sh {z30.s}, p6/z, [x30, z31.s, sxtw #1]"); + TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), + "ldff1sh {z30.d}, p6/z, [x30, z31.d, uxtw #1]"); + TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), + "ldff1sh {z30.d}, p6/z, [x30, z31.d, sxtw #1]"); + TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 1)), + "ldff1sh {z30.d}, p6/z, [x30, z31.d, lsl #1]"); + TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), + "ldff1sh {z30.d}, p6/z, [x30, z31.d]"); + + TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ldff1sh {z30.s}, p6/z, [x30, z31.s, uxtw]"); + TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ldff1sh {z30.s}, p6/z, [x30, z31.s, sxtw]"); + TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ldff1sh {z30.d}, p6/z, [x30, z31.d, uxtw]"); + TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ldff1sh {z30.d}, p6/z, [x30, z31.d, sxtw]"); + + TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1sh {z30.s}, p6/z, [z31.s]"); + TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), "ldff1sh {z30.s}, p6/z, [z31.s, " + "#62]"); + TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1sh {z30.d}, p6/z, [z31.d]"); + TEST_SINGLE(ldff1sh(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), "ldff1sh {z30.d}, p6/z, [z31.d, " + "#62]"); + + TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), + "ldff1w {z30.s}, p6/z, [x30, z31.s, uxtw #2]"); + TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), + "ldff1w {z30.s}, p6/z, [x30, z31.s, sxtw #2]"); + TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), + "ldff1w {z30.d}, p6/z, [x30, z31.d, uxtw #2]"); + TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), + "ldff1w {z30.d}, p6/z, [x30, z31.d, sxtw #2]"); + TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 2)), + "ldff1w {z30.d}, p6/z, [x30, z31.d, lsl #2]"); + + TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ldff1w {z30.s}, p6/z, [x30, z31.s, uxtw]"); + TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ldff1w {z30.s}, p6/z, [x30, z31.s, sxtw]"); + TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), + "ldff1w {z30.d}, p6/z, [x30, z31.d, uxtw]"); + TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), + "ldff1w {z30.d}, p6/z, [x30, z31.d, sxtw]"); + TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), + "ldff1w {z30.d}, p6/z, [x30, z31.d]"); + + TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1w {z30.s}, p6/z, [z31.s]"); + TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), "ldff1w {z30.s}, p6/z, [z31.s, " + "#124]"); + TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1w {z30.d}, p6/z, [z31.d]"); + TEST_SINGLE(ldff1w(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), "ldff1w {z30.d}, p6/z, [z31.d, " + "#124]"); + + TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "ldff1sw {z30.d}, " + "p6/z, [x30, z31.d, " + "uxtw]"); + TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "ldff1sw {z30.d}, " + "p6/z, [x30, z31.d, " + "sxtw]"); + TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), "ldff1sw {z30.d}, " + "p6/z, [x30, z31.d, " + "uxtw #2]"); + TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), "ldff1sw {z30.d}, " + "p6/z, [x30, z31.d, " + "sxtw #2]"); + TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 2)), "ldff1sw {z30.d}, p6/z, " + "[x30, z31.d, lsl #2]"); + TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), "ldff1sw {z30.d}, " + "p6/z, [x30, z31.d]"); + + TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1sw {z30.d}, p6/z, [z31.d]"); + TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), "ldff1sw {z30.d}, p6/z, [z31.d, #124]"); TEST_SINGLE(ldr(PReg::p6, XReg::x29, 0), "ldr p6, [x29]"); TEST_SINGLE(ldr(PReg::p6, XReg::x29, -256), "ldr p6, [x29, #-256, mul vl]"); @@ -4075,133 +3909,148 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Memory - 32-bit Gather and } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load and broadcast element") { - TEST_SINGLE(ld1rb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rb {z30.b}, p6/z, [x29]"); - TEST_SINGLE(ld1rb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rb {z30.b}, p6/z, [x29, #31]"); - TEST_SINGLE(ld1rb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rb {z30.b}, p6/z, [x29, #63]"); + TEST_SINGLE(ld1rb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rb {z30.b}, p6/z, [x29]"); + TEST_SINGLE(ld1rb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rb {z30.b}, p6/z, [x29, #31]"); + TEST_SINGLE(ld1rb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rb {z30.b}, p6/z, [x29, #63]"); - TEST_SINGLE(ld1rb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rb {z30.h}, p6/z, [x29]"); - TEST_SINGLE(ld1rb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rb {z30.h}, p6/z, [x29, #31]"); - TEST_SINGLE(ld1rb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rb {z30.h}, p6/z, [x29, #63]"); + TEST_SINGLE(ld1rb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rb {z30.h}, p6/z, [x29]"); + TEST_SINGLE(ld1rb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rb {z30.h}, p6/z, [x29, #31]"); + TEST_SINGLE(ld1rb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rb {z30.h}, p6/z, [x29, #63]"); - TEST_SINGLE(ld1rb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rb {z30.s}, p6/z, [x29]"); - TEST_SINGLE(ld1rb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rb {z30.s}, p6/z, [x29, #31]"); - TEST_SINGLE(ld1rb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rb {z30.s}, p6/z, [x29, #63]"); + TEST_SINGLE(ld1rb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rb {z30.s}, p6/z, [x29]"); + TEST_SINGLE(ld1rb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rb {z30.s}, p6/z, [x29, #31]"); + TEST_SINGLE(ld1rb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rb {z30.s}, p6/z, [x29, #63]"); - TEST_SINGLE(ld1rb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rb {z30.d}, p6/z, [x29]"); - TEST_SINGLE(ld1rb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rb {z30.d}, p6/z, [x29, #31]"); - TEST_SINGLE(ld1rb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rb {z30.d}, p6/z, [x29, #63]"); + TEST_SINGLE(ld1rb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rb {z30.d}, p6/z, [x29]"); + TEST_SINGLE(ld1rb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rb {z30.d}, p6/z, [x29, #31]"); + TEST_SINGLE(ld1rb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rb {z30.d}, p6/z, [x29, #63]"); - TEST_SINGLE(ld1rsb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsb {z30.h}, p6/z, [x29]"); + TEST_SINGLE(ld1rsb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsb {z30.h}, p6/z, [x29]"); TEST_SINGLE(ld1rsb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rsb {z30.h}, p6/z, [x29, #31]"); TEST_SINGLE(ld1rsb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rsb {z30.h}, p6/z, [x29, #63]"); - TEST_SINGLE(ld1rsb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsb {z30.s}, p6/z, [x29]"); + TEST_SINGLE(ld1rsb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsb {z30.s}, p6/z, [x29]"); TEST_SINGLE(ld1rsb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rsb {z30.s}, p6/z, [x29, #31]"); TEST_SINGLE(ld1rsb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rsb {z30.s}, p6/z, [x29, #63]"); - TEST_SINGLE(ld1rsb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsb {z30.d}, p6/z, [x29]"); + TEST_SINGLE(ld1rsb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsb {z30.d}, p6/z, [x29]"); TEST_SINGLE(ld1rsb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rsb {z30.d}, p6/z, [x29, #31]"); TEST_SINGLE(ld1rsb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rsb {z30.d}, p6/z, [x29, #63]"); - TEST_SINGLE(ld1rh(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rh {z30.h}, p6/z, [x29]"); - TEST_SINGLE(ld1rh(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 64), "ld1rh {z30.h}, p6/z, [x29, #64]"); - TEST_SINGLE(ld1rh(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 126), "ld1rh {z30.h}, p6/z, [x29, #126]"); + TEST_SINGLE(ld1rh(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rh {z30.h}, p6/z, [x29]"); + TEST_SINGLE(ld1rh(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 64), "ld1rh {z30.h}, p6/z, [x29, #64]"); + TEST_SINGLE(ld1rh(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 126), "ld1rh {z30.h}, p6/z, [x29, #126]"); - TEST_SINGLE(ld1rh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rh {z30.s}, p6/z, [x29]"); - TEST_SINGLE(ld1rh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 64), "ld1rh {z30.s}, p6/z, [x29, #64]"); - TEST_SINGLE(ld1rh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 126), "ld1rh {z30.s}, p6/z, [x29, #126]"); + TEST_SINGLE(ld1rh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rh {z30.s}, p6/z, [x29]"); + TEST_SINGLE(ld1rh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 64), "ld1rh {z30.s}, p6/z, [x29, #64]"); + TEST_SINGLE(ld1rh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 126), "ld1rh {z30.s}, p6/z, [x29, #126]"); - TEST_SINGLE(ld1rh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rh {z30.d}, p6/z, [x29]"); - TEST_SINGLE(ld1rh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 64), "ld1rh {z30.d}, p6/z, [x29, #64]"); - TEST_SINGLE(ld1rh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 126), "ld1rh {z30.d}, p6/z, [x29, #126]"); + TEST_SINGLE(ld1rh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rh {z30.d}, p6/z, [x29]"); + TEST_SINGLE(ld1rh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 64), "ld1rh {z30.d}, p6/z, [x29, #64]"); + TEST_SINGLE(ld1rh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 126), "ld1rh {z30.d}, p6/z, [x29, #126]"); - TEST_SINGLE(ld1rsh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsh {z30.s}, p6/z, [x29]"); - TEST_SINGLE(ld1rsh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 64), "ld1rsh {z30.s}, p6/z, [x29, #64]"); + TEST_SINGLE(ld1rsh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsh {z30.s}, p6/z, [x29]"); + TEST_SINGLE(ld1rsh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 64), "ld1rsh {z30.s}, p6/z, [x29, #64]"); TEST_SINGLE(ld1rsh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 126), "ld1rsh {z30.s}, p6/z, [x29, #126]"); - TEST_SINGLE(ld1rsh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsh {z30.d}, p6/z, [x29]"); - TEST_SINGLE(ld1rsh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 64), "ld1rsh {z30.d}, p6/z, [x29, #64]"); + TEST_SINGLE(ld1rsh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsh {z30.d}, p6/z, [x29]"); + TEST_SINGLE(ld1rsh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 64), "ld1rsh {z30.d}, p6/z, [x29, #64]"); TEST_SINGLE(ld1rsh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 126), "ld1rsh {z30.d}, p6/z, [x29, #126]"); - TEST_SINGLE(ld1rw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rw {z30.s}, p6/z, [x29]"); - TEST_SINGLE(ld1rw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 128), "ld1rw {z30.s}, p6/z, [x29, #128]"); - TEST_SINGLE(ld1rw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 252), "ld1rw {z30.s}, p6/z, [x29, #252]"); + TEST_SINGLE(ld1rw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rw {z30.s}, p6/z, [x29]"); + TEST_SINGLE(ld1rw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 128), "ld1rw {z30.s}, p6/z, [x29, #128]"); + TEST_SINGLE(ld1rw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 252), "ld1rw {z30.s}, p6/z, [x29, #252]"); - TEST_SINGLE(ld1rw(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rw {z30.d}, p6/z, [x29]"); - TEST_SINGLE(ld1rw(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 128), "ld1rw {z30.d}, p6/z, [x29, #128]"); - TEST_SINGLE(ld1rw(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 252), "ld1rw {z30.d}, p6/z, [x29, #252]"); + TEST_SINGLE(ld1rw(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rw {z30.d}, p6/z, [x29]"); + TEST_SINGLE(ld1rw(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 128), "ld1rw {z30.d}, p6/z, [x29, #128]"); + TEST_SINGLE(ld1rw(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 252), "ld1rw {z30.d}, p6/z, [x29, #252]"); - TEST_SINGLE(ld1rsw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsw {z30.d}, p6/z, [x29]"); + TEST_SINGLE(ld1rsw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsw {z30.d}, p6/z, [x29]"); TEST_SINGLE(ld1rsw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 128), "ld1rsw {z30.d}, p6/z, [x29, #128]"); TEST_SINGLE(ld1rsw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 252), "ld1rsw {z30.d}, p6/z, [x29, #252]"); - TEST_SINGLE(ld1rd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rd {z30.d}, p6/z, [x29]"); + TEST_SINGLE(ld1rd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rd {z30.d}, p6/z, [x29]"); TEST_SINGLE(ld1rd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 256), "ld1rd {z30.d}, p6/z, [x29, #256]"); TEST_SINGLE(ld1rd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 504), "ld1rd {z30.d}, p6/z, [x29, #504]"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load multiple structures (scalar plus scalar)") { - TEST_SINGLE(ld2b(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2b {z31.b, z0.b}, p6/z, [x29, x30]"); + TEST_SINGLE(ld2b(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2b {z31.b, z0.b}, p6/z, [x29, x30]"); TEST_SINGLE(ld2b(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2b {z26.b, z27.b}, p6/z, [x29, x30]"); - TEST_SINGLE(ld3b(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3b {z31.b, z0.b, z1.b}, p6/z, [x29, x30]"); - TEST_SINGLE(ld3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3b {z26.b, z27.b, z28.b}, p6/z, [x29, x30]"); - TEST_SINGLE(ld4b(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4b {z31.b, z0.b, z1.b, z2.b}, p6/z, [x29, x30]"); - TEST_SINGLE(ld4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4b {z26.b, z27.b, z28.b, z29.b}, p6/z, [x29, x30]"); - - TEST_SINGLE(ld2h(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2h {z31.h, z0.h}, p6/z, [x29, x30, lsl #1]"); + TEST_SINGLE(ld3b(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3b {z31.b, z0.b, z1.b}, p6/z, [x29, x30]"); + TEST_SINGLE(ld3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3b {z26.b, z27.b, z28.b}, p6/z, [x29, " + "x30]"); + TEST_SINGLE(ld4b(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4b {z31.b, z0.b, z1.b, z2.b}, " + "p6/z, [x29, x30]"); + TEST_SINGLE(ld4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4b {z26.b, z27.b, z28.b, " + "z29.b}, p6/z, [x29, x30]"); + + TEST_SINGLE(ld2h(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2h {z31.h, z0.h}, p6/z, [x29, x30, lsl #1]"); TEST_SINGLE(ld2h(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2h {z26.h, z27.h}, p6/z, [x29, x30, lsl #1]"); - TEST_SINGLE(ld3h(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3h {z31.h, z0.h, z1.h}, p6/z, [x29, x30, lsl #1]"); - TEST_SINGLE(ld3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3h {z26.h, z27.h, z28.h}, p6/z, [x29, x30, lsl #1]"); - TEST_SINGLE(ld4h(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4h {z31.h, z0.h, z1.h, z2.h}, p6/z, [x29, x30, lsl #1]"); - TEST_SINGLE(ld4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4h {z26.h, z27.h, z28.h, z29.h}, p6/z, [x29, x30, lsl #1]"); - - TEST_SINGLE(ld2w(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2w {z31.s, z0.s}, p6/z, [x29, x30, lsl #2]"); + TEST_SINGLE(ld3h(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3h {z31.h, z0.h, z1.h}, p6/z, [x29, x30, lsl " + "#1]"); + TEST_SINGLE(ld3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3h {z26.h, z27.h, z28.h}, p6/z, [x29, x30, " + "lsl #1]"); + TEST_SINGLE(ld4h(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4h {z31.h, z0.h, z1.h, z2.h}, " + "p6/z, [x29, x30, lsl #1]"); + TEST_SINGLE(ld4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4h {z26.h, z27.h, z28.h, " + "z29.h}, p6/z, [x29, x30, lsl #1]"); + + TEST_SINGLE(ld2w(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2w {z31.s, z0.s}, p6/z, [x29, x30, lsl #2]"); TEST_SINGLE(ld2w(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2w {z26.s, z27.s}, p6/z, [x29, x30, lsl #2]"); - TEST_SINGLE(ld3w(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3w {z31.s, z0.s, z1.s}, p6/z, [x29, x30, lsl #2]"); - TEST_SINGLE(ld3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3w {z26.s, z27.s, z28.s}, p6/z, [x29, x30, lsl #2]"); - TEST_SINGLE(ld4w(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4w {z31.s, z0.s, z1.s, z2.s}, p6/z, [x29, x30, lsl #2]"); - TEST_SINGLE(ld4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4w {z26.s, z27.s, z28.s, z29.s}, p6/z, [x29, x30, lsl #2]"); - - TEST_SINGLE(ld2d(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2d {z31.d, z0.d}, p6/z, [x29, x30, lsl #3]"); + TEST_SINGLE(ld3w(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3w {z31.s, z0.s, z1.s}, p6/z, [x29, x30, lsl " + "#2]"); + TEST_SINGLE(ld3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3w {z26.s, z27.s, z28.s}, p6/z, [x29, x30, " + "lsl #2]"); + TEST_SINGLE(ld4w(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4w {z31.s, z0.s, z1.s, z2.s}, " + "p6/z, [x29, x30, lsl #2]"); + TEST_SINGLE(ld4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4w {z26.s, z27.s, z28.s, " + "z29.s}, p6/z, [x29, x30, lsl #2]"); + + TEST_SINGLE(ld2d(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2d {z31.d, z0.d}, p6/z, [x29, x30, lsl #3]"); TEST_SINGLE(ld2d(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2d {z26.d, z27.d}, p6/z, [x29, x30, lsl #3]"); - TEST_SINGLE(ld3d(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3d {z31.d, z0.d, z1.d}, p6/z, [x29, x30, lsl #3]"); - TEST_SINGLE(ld3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3d {z26.d, z27.d, z28.d}, p6/z, [x29, x30, lsl #3]"); - TEST_SINGLE(ld4d(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4d {z31.d, z0.d, z1.d, z2.d}, p6/z, [x29, x30, lsl #3]"); - TEST_SINGLE(ld4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4d {z26.d, z27.d, z28.d, z29.d}, p6/z, [x29, x30, lsl #3]"); + TEST_SINGLE(ld3d(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3d {z31.d, z0.d, z1.d}, p6/z, [x29, x30, lsl " + "#3]"); + TEST_SINGLE(ld3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3d {z26.d, z27.d, z28.d}, p6/z, [x29, x30, " + "lsl #3]"); + TEST_SINGLE(ld4d(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4d {z31.d, z0.d, z1.d, z2.d}, " + "p6/z, [x29, x30, lsl #3]"); + TEST_SINGLE(ld4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4d {z26.d, z27.d, z28.d, " + "z29.d}, p6/z, [x29, x30, lsl #3]"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load and broadcast quadword (scalar plus immediate)") { - TEST_SINGLE(ld1rqb(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rqb {z30.b}, p6/z, [x29]"); + TEST_SINGLE(ld1rqb(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rqb {z30.b}, p6/z, [x29]"); TEST_SINGLE(ld1rqb(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -128), "ld1rqb {z30.b}, p6/z, [x29, #-128]"); - TEST_SINGLE(ld1rqb(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 112), "ld1rqb {z30.b}, p6/z, [x29, #112]"); + TEST_SINGLE(ld1rqb(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 112), "ld1rqb {z30.b}, p6/z, [x29, #112]"); - TEST_SINGLE(ld1rob(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rob {z30.b}, p6/z, [x29]"); + TEST_SINGLE(ld1rob(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rob {z30.b}, p6/z, [x29]"); TEST_SINGLE(ld1rob(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -256), "ld1rob {z30.b}, p6/z, [x29, #-256]"); - TEST_SINGLE(ld1rob(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 224), "ld1rob {z30.b}, p6/z, [x29, #224]"); + TEST_SINGLE(ld1rob(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 224), "ld1rob {z30.b}, p6/z, [x29, #224]"); - TEST_SINGLE(ld1rqh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rqh {z30.h}, p6/z, [x29]"); + TEST_SINGLE(ld1rqh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rqh {z30.h}, p6/z, [x29]"); TEST_SINGLE(ld1rqh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -128), "ld1rqh {z30.h}, p6/z, [x29, #-128]"); - TEST_SINGLE(ld1rqh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 112), "ld1rqh {z30.h}, p6/z, [x29, #112]"); + TEST_SINGLE(ld1rqh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 112), "ld1rqh {z30.h}, p6/z, [x29, #112]"); - TEST_SINGLE(ld1roh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1roh {z30.h}, p6/z, [x29]"); + TEST_SINGLE(ld1roh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1roh {z30.h}, p6/z, [x29]"); TEST_SINGLE(ld1roh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -256), "ld1roh {z30.h}, p6/z, [x29, #-256]"); - TEST_SINGLE(ld1roh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 224), "ld1roh {z30.h}, p6/z, [x29, #224]"); + TEST_SINGLE(ld1roh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 224), "ld1roh {z30.h}, p6/z, [x29, #224]"); - TEST_SINGLE(ld1rqw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rqw {z30.s}, p6/z, [x29]"); + TEST_SINGLE(ld1rqw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rqw {z30.s}, p6/z, [x29]"); TEST_SINGLE(ld1rqw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -128), "ld1rqw {z30.s}, p6/z, [x29, #-128]"); - TEST_SINGLE(ld1rqw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 112), "ld1rqw {z30.s}, p6/z, [x29, #112]"); + TEST_SINGLE(ld1rqw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 112), "ld1rqw {z30.s}, p6/z, [x29, #112]"); - TEST_SINGLE(ld1row(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1row {z30.s}, p6/z, [x29]"); + TEST_SINGLE(ld1row(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1row {z30.s}, p6/z, [x29]"); TEST_SINGLE(ld1row(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -256), "ld1row {z30.s}, p6/z, [x29, #-256]"); - TEST_SINGLE(ld1row(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 224), "ld1row {z30.s}, p6/z, [x29, #224]"); + TEST_SINGLE(ld1row(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 224), "ld1row {z30.s}, p6/z, [x29, #224]"); - TEST_SINGLE(ld1rqd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rqd {z30.d}, p6/z, [x29]"); + TEST_SINGLE(ld1rqd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rqd {z30.d}, p6/z, [x29]"); TEST_SINGLE(ld1rqd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -128), "ld1rqd {z30.d}, p6/z, [x29, #-128]"); - TEST_SINGLE(ld1rqd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 112), "ld1rqd {z30.d}, p6/z, [x29, #112]"); + TEST_SINGLE(ld1rqd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 112), "ld1rqd {z30.d}, p6/z, [x29, #112]"); - TEST_SINGLE(ld1rod(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rod {z30.d}, p6/z, [x29]"); + TEST_SINGLE(ld1rod(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rod {z30.d}, p6/z, [x29]"); TEST_SINGLE(ld1rod(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -256), "ld1rod {z30.d}, p6/z, [x29, #-256]"); - TEST_SINGLE(ld1rod(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 224), "ld1rod {z30.d}, p6/z, [x29, #224]"); + TEST_SINGLE(ld1rod(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 224), "ld1rod {z30.d}, p6/z, [x29, #224]"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load and broadcast quadword (scalar plus scalar)") { @@ -4231,65 +4080,89 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load and broadcast quadwor } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load multiple structures (scalar plus immediate)") { - TEST_SINGLE(ld2b(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, 0), "ld2b {z31.b, z0.b}, p6/z, [x29]"); + TEST_SINGLE(ld2b(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, 0), "ld2b {z31.b, z0.b}, p6/z, [x29]"); TEST_SINGLE(ld2b(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 0), "ld2b {z26.b, z27.b}, p6/z, [x29]"); TEST_SINGLE(ld2b(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, -16), "ld2b {z26.b, z27.b}, p6/z, [x29, #-16, mul vl]"); TEST_SINGLE(ld2b(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 14), "ld2b {z26.b, z27.b}, p6/z, [x29, #14, mul vl]"); - TEST_SINGLE(ld2h(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, 0), "ld2h {z31.h, z0.h}, p6/z, [x29]"); + TEST_SINGLE(ld2h(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, 0), "ld2h {z31.h, z0.h}, p6/z, [x29]"); TEST_SINGLE(ld2h(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 0), "ld2h {z26.h, z27.h}, p6/z, [x29]"); TEST_SINGLE(ld2h(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, -16), "ld2h {z26.h, z27.h}, p6/z, [x29, #-16, mul vl]"); TEST_SINGLE(ld2h(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 14), "ld2h {z26.h, z27.h}, p6/z, [x29, #14, mul vl]"); - TEST_SINGLE(ld2w(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, 0), "ld2w {z31.s, z0.s}, p6/z, [x29]"); + TEST_SINGLE(ld2w(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, 0), "ld2w {z31.s, z0.s}, p6/z, [x29]"); TEST_SINGLE(ld2w(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 0), "ld2w {z26.s, z27.s}, p6/z, [x29]"); TEST_SINGLE(ld2w(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, -16), "ld2w {z26.s, z27.s}, p6/z, [x29, #-16, mul vl]"); TEST_SINGLE(ld2w(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 14), "ld2w {z26.s, z27.s}, p6/z, [x29, #14, mul vl]"); - TEST_SINGLE(ld2d(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, 0), "ld2d {z31.d, z0.d}, p6/z, [x29]"); + TEST_SINGLE(ld2d(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, 0), "ld2d {z31.d, z0.d}, p6/z, [x29]"); TEST_SINGLE(ld2d(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 0), "ld2d {z26.d, z27.d}, p6/z, [x29]"); TEST_SINGLE(ld2d(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, -16), "ld2d {z26.d, z27.d}, p6/z, [x29, #-16, mul vl]"); TEST_SINGLE(ld2d(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 14), "ld2d {z26.d, z27.d}, p6/z, [x29, #14, mul vl]"); - TEST_SINGLE(ld3b(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, 0), "ld3b {z31.b, z0.b, z1.b}, p6/z, [x29]"); + TEST_SINGLE(ld3b(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, 0), "ld3b {z31.b, z0.b, z1.b}, p6/z, [x29]"); TEST_SINGLE(ld3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 0), "ld3b {z26.b, z27.b, z28.b}, p6/z, [x29]"); - TEST_SINGLE(ld3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, -24), "ld3b {z26.b, z27.b, z28.b}, p6/z, [x29, #-24, mul vl]"); - TEST_SINGLE(ld3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 21), "ld3b {z26.b, z27.b, z28.b}, p6/z, [x29, #21, mul vl]"); + TEST_SINGLE(ld3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, -24), "ld3b {z26.b, z27.b, z28.b}, p6/z, [x29, #-24, mul " + "vl]"); + TEST_SINGLE(ld3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 21), "ld3b {z26.b, z27.b, z28.b}, p6/z, [x29, #21, mul " + "vl]"); - TEST_SINGLE(ld3h(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, 0), "ld3h {z31.h, z0.h, z1.h}, p6/z, [x29]"); + TEST_SINGLE(ld3h(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, 0), "ld3h {z31.h, z0.h, z1.h}, p6/z, [x29]"); TEST_SINGLE(ld3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 0), "ld3h {z26.h, z27.h, z28.h}, p6/z, [x29]"); - TEST_SINGLE(ld3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, -24), "ld3h {z26.h, z27.h, z28.h}, p6/z, [x29, #-24, mul vl]"); - TEST_SINGLE(ld3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 21), "ld3h {z26.h, z27.h, z28.h}, p6/z, [x29, #21, mul vl]"); + TEST_SINGLE(ld3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, -24), "ld3h {z26.h, z27.h, z28.h}, p6/z, [x29, #-24, mul " + "vl]"); + TEST_SINGLE(ld3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 21), "ld3h {z26.h, z27.h, z28.h}, p6/z, [x29, #21, mul " + "vl]"); - TEST_SINGLE(ld3w(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, 0), "ld3w {z31.s, z0.s, z1.s}, p6/z, [x29]"); + TEST_SINGLE(ld3w(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, 0), "ld3w {z31.s, z0.s, z1.s}, p6/z, [x29]"); TEST_SINGLE(ld3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 0), "ld3w {z26.s, z27.s, z28.s}, p6/z, [x29]"); - TEST_SINGLE(ld3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, -24), "ld3w {z26.s, z27.s, z28.s}, p6/z, [x29, #-24, mul vl]"); - TEST_SINGLE(ld3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 21), "ld3w {z26.s, z27.s, z28.s}, p6/z, [x29, #21, mul vl]"); + TEST_SINGLE(ld3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, -24), "ld3w {z26.s, z27.s, z28.s}, p6/z, [x29, #-24, mul " + "vl]"); + TEST_SINGLE(ld3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 21), "ld3w {z26.s, z27.s, z28.s}, p6/z, [x29, #21, mul " + "vl]"); - TEST_SINGLE(ld3d(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, 0), "ld3d {z31.d, z0.d, z1.d}, p6/z, [x29]"); + TEST_SINGLE(ld3d(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, 0), "ld3d {z31.d, z0.d, z1.d}, p6/z, [x29]"); TEST_SINGLE(ld3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 0), "ld3d {z26.d, z27.d, z28.d}, p6/z, [x29]"); - TEST_SINGLE(ld3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, -24), "ld3d {z26.d, z27.d, z28.d}, p6/z, [x29, #-24, mul vl]"); - TEST_SINGLE(ld3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 21), "ld3d {z26.d, z27.d, z28.d}, p6/z, [x29, #21, mul vl]"); - - TEST_SINGLE(ld4b(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, 0), "ld4b {z31.b, z0.b, z1.b, z2.b}, p6/z, [x29]"); - TEST_SINGLE(ld4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 0), "ld4b {z26.b, z27.b, z28.b, z29.b}, p6/z, [x29]"); - TEST_SINGLE(ld4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, -32), "ld4b {z26.b, z27.b, z28.b, z29.b}, p6/z, [x29, #-32, mul vl]"); - TEST_SINGLE(ld4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 28), "ld4b {z26.b, z27.b, z28.b, z29.b}, p6/z, [x29, #28, mul vl]"); - - TEST_SINGLE(ld4h(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, 0), "ld4h {z31.h, z0.h, z1.h, z2.h}, p6/z, [x29]"); - TEST_SINGLE(ld4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 0), "ld4h {z26.h, z27.h, z28.h, z29.h}, p6/z, [x29]"); - TEST_SINGLE(ld4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, -32), "ld4h {z26.h, z27.h, z28.h, z29.h}, p6/z, [x29, #-32, mul vl]"); - TEST_SINGLE(ld4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 28), "ld4h {z26.h, z27.h, z28.h, z29.h}, p6/z, [x29, #28, mul vl]"); - - TEST_SINGLE(ld4w(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, 0), "ld4w {z31.s, z0.s, z1.s, z2.s}, p6/z, [x29]"); - TEST_SINGLE(ld4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 0), "ld4w {z26.s, z27.s, z28.s, z29.s}, p6/z, [x29]"); - TEST_SINGLE(ld4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, -32), "ld4w {z26.s, z27.s, z28.s, z29.s}, p6/z, [x29, #-32, mul vl]"); - TEST_SINGLE(ld4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 28), "ld4w {z26.s, z27.s, z28.s, z29.s}, p6/z, [x29, #28, mul vl]"); - - TEST_SINGLE(ld4d(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, 0), "ld4d {z31.d, z0.d, z1.d, z2.d}, p6/z, [x29]"); - TEST_SINGLE(ld4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 0), "ld4d {z26.d, z27.d, z28.d, z29.d}, p6/z, [x29]"); - TEST_SINGLE(ld4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, -32), "ld4d {z26.d, z27.d, z28.d, z29.d}, p6/z, [x29, #-32, mul vl]"); - TEST_SINGLE(ld4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 28), "ld4d {z26.d, z27.d, z28.d, z29.d}, p6/z, [x29, #28, mul vl]"); + TEST_SINGLE(ld3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, -24), "ld3d {z26.d, z27.d, z28.d}, p6/z, [x29, #-24, mul " + "vl]"); + TEST_SINGLE(ld3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 21), "ld3d {z26.d, z27.d, z28.d}, p6/z, [x29, #21, mul " + "vl]"); + + TEST_SINGLE(ld4b(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, 0), "ld4b {z31.b, z0.b, z1.b, z2.b}, p6/z, " + "[x29]"); + TEST_SINGLE(ld4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 0), "ld4b {z26.b, z27.b, z28.b, z29.b}, p6/z, " + "[x29]"); + TEST_SINGLE(ld4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, -32), "ld4b {z26.b, z27.b, z28.b, z29.b}, " + "p6/z, [x29, #-32, mul vl]"); + TEST_SINGLE(ld4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 28), "ld4b {z26.b, z27.b, z28.b, z29.b}, " + "p6/z, [x29, #28, mul vl]"); + + TEST_SINGLE(ld4h(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, 0), "ld4h {z31.h, z0.h, z1.h, z2.h}, p6/z, " + "[x29]"); + TEST_SINGLE(ld4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 0), "ld4h {z26.h, z27.h, z28.h, z29.h}, p6/z, " + "[x29]"); + TEST_SINGLE(ld4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, -32), "ld4h {z26.h, z27.h, z28.h, z29.h}, " + "p6/z, [x29, #-32, mul vl]"); + TEST_SINGLE(ld4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 28), "ld4h {z26.h, z27.h, z28.h, z29.h}, " + "p6/z, [x29, #28, mul vl]"); + + TEST_SINGLE(ld4w(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, 0), "ld4w {z31.s, z0.s, z1.s, z2.s}, p6/z, " + "[x29]"); + TEST_SINGLE(ld4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 0), "ld4w {z26.s, z27.s, z28.s, z29.s}, p6/z, " + "[x29]"); + TEST_SINGLE(ld4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, -32), "ld4w {z26.s, z27.s, z28.s, z29.s}, " + "p6/z, [x29, #-32, mul vl]"); + TEST_SINGLE(ld4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 28), "ld4w {z26.s, z27.s, z28.s, z29.s}, " + "p6/z, [x29, #28, mul vl]"); + + TEST_SINGLE(ld4d(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, 0), "ld4d {z31.d, z0.d, z1.d, z2.d}, p6/z, " + "[x29]"); + TEST_SINGLE(ld4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 0), "ld4d {z26.d, z27.d, z28.d, z29.d}, p6/z, " + "[x29]"); + TEST_SINGLE(ld4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, -32), "ld4d {z26.d, z27.d, z28.d, z29.d}, " + "p6/z, [x29, #-32, mul vl]"); + TEST_SINGLE(ld4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 28), "ld4d {z26.d, z27.d, z28.d, z29.d}, " + "p6/z, [x29, #28, mul vl]"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous load (scalar plus immediate)") { TEST_SINGLE(ld1b(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1b {z26.b}, p6/z, [x29]"); @@ -4311,38 +4184,38 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous load (scalar pl TEST_SINGLE(ld1sw(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sw {z26.d}, p6/z, [x29, #-8, mul vl]"); TEST_SINGLE(ld1sw(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sw {z26.d}, p6/z, [x29, #7, mul vl]"); - TEST_SINGLE(ld1w(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1w {z26.s}, p6/z, [x29]"); - TEST_SINGLE(ld1w(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1w {z26.d}, p6/z, [x29]"); + TEST_SINGLE(ld1w(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1w {z26.s}, p6/z, [x29]"); + TEST_SINGLE(ld1w(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1w {z26.d}, p6/z, [x29]"); TEST_SINGLE(ld1w(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1w {z26.s}, p6/z, [x29, #-8, mul vl]"); TEST_SINGLE(ld1w(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1w {z26.d}, p6/z, [x29, #-8, mul vl]"); - //TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1h {z26.b}, p6/z, [x29]"); + // TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1h {z26.b}, p6/z, [x29]"); TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1h {z26.h}, p6/z, [x29]"); TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1h {z26.s}, p6/z, [x29]"); TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1h {z26.d}, p6/z, [x29]"); - //TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1h {z26.b}, p6/z, [x29, #-8, mul vl]"); + // TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1h {z26.b}, p6/z, [x29, #-8, mul vl]"); TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1h {z26.h}, p6/z, [x29, #-8, mul vl]"); TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1h {z26.s}, p6/z, [x29, #-8, mul vl]"); TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1h {z26.d}, p6/z, [x29, #-8, mul vl]"); - //TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1h {z26.b}, p6/z, [x29, #7, mul vl]"); + // TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1h {z26.b}, p6/z, [x29, #7, mul vl]"); TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1h {z26.h}, p6/z, [x29, #7, mul vl]"); TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1h {z26.s}, p6/z, [x29, #7, mul vl]"); TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1h {z26.d}, p6/z, [x29, #7, mul vl]"); - //TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sh {z26.b}, p6/z, [x29]"); - //TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sh {z26.h}, p6/z, [x29]"); + // TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sh {z26.b}, p6/z, [x29]"); + // TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sh {z26.h}, p6/z, [x29]"); TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sh {z26.s}, p6/z, [x29]"); TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sh {z26.d}, p6/z, [x29]"); - //TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sh {z26.b}, p6/z, [x29, #-8, mul vl]"); - //TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sh {z26.h}, p6/z, [x29, #-8, mul vl]"); + // TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sh {z26.b}, p6/z, [x29, #-8, mul vl]"); + // TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sh {z26.h}, p6/z, [x29, #-8, mul vl]"); TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sh {z26.s}, p6/z, [x29, #-8, mul vl]"); TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sh {z26.d}, p6/z, [x29, #-8, mul vl]"); - //TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sh {z26.b}, p6/z, [x29, #7, mul vl]"); - //TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sh {z26.h}, p6/z, [x29, #7, mul vl]"); + // TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sh {z26.b}, p6/z, [x29, #7, mul vl]"); + // TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sh {z26.h}, p6/z, [x29, #7, mul vl]"); TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sh {z26.s}, p6/z, [x29, #7, mul vl]"); TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sh {z26.d}, p6/z, [x29, #7, mul vl]"); @@ -4350,17 +4223,17 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous load (scalar pl TEST_SINGLE(ld1sw(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sw {z26.d}, p6/z, [x29, #-8, mul vl]"); TEST_SINGLE(ld1sw(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sw {z26.d}, p6/z, [x29, #7, mul vl]"); - //TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sb {z26.b}, p6/z, [x29]"); + // TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sb {z26.b}, p6/z, [x29]"); TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sb {z26.h}, p6/z, [x29]"); TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sb {z26.s}, p6/z, [x29]"); TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sb {z26.d}, p6/z, [x29]"); - //TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sb {z26.b}, p6/z, [x29, #-8, mul vl]"); + // TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sb {z26.b}, p6/z, [x29, #-8, mul vl]"); TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sb {z26.h}, p6/z, [x29, #-8, mul vl]"); TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sb {z26.s}, p6/z, [x29, #-8, mul vl]"); TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sb {z26.d}, p6/z, [x29, #-8, mul vl]"); - //TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sb {z26.b}, p6/z, [x29, #7, mul vl]"); + // TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sb {z26.b}, p6/z, [x29, #7, mul vl]"); TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sb {z26.h}, p6/z, [x29, #7, mul vl]"); TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sb {z26.s}, p6/z, [x29, #7, mul vl]"); TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sb {z26.d}, p6/z, [x29, #7, mul vl]"); @@ -4375,13 +4248,13 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous store (scalar p TEST_SINGLE(st1b(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1b {z26.s}, p6, [x29, x28]"); TEST_SINGLE(st1b(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1b {z26.d}, p6, [x29, x28]"); - //TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1h {z26.b}, p6, [x29, x28, lsl #1]"); + // TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1h {z26.b}, p6, [x29, x28, lsl #1]"); TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1h {z26.h}, p6, [x29, x28, lsl #1]"); TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1h {z26.s}, p6, [x29, x28, lsl #1]"); TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1h {z26.d}, p6, [x29, x28, lsl #1]"); - //TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1w {z26.b}, p6, [x29, x28, lsl #2]"); - //TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1w {z26.h}, p6, [x29, x28, lsl #2]"); + // TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1w {z26.b}, p6, [x29, x28, lsl #2]"); + // TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1w {z26.h}, p6, [x29, x28, lsl #2]"); TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1w {z26.s}, p6, [x29, x28, lsl #2]"); TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1w {z26.d}, p6, [x29, x28, lsl #2]"); @@ -4393,18 +4266,18 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous load (scalar pl TEST_SINGLE(ld1b(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1b {z26.s}, p6/z, [x29, x30]"); TEST_SINGLE(ld1b(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1b {z26.d}, p6/z, [x29, x30]"); - //TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sb {z26.b}, p6/z, [x29, x30]"); + // TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sb {z26.b}, p6/z, [x29, x30]"); TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sb {z26.h}, p6/z, [x29, x30]"); TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sb {z26.s}, p6/z, [x29, x30]"); TEST_SINGLE(ld1sb(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sb {z26.d}, p6/z, [x29, x30]"); - //TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1h {z26.b}, p6/z, [x29, x30, lsl #1]"); + // TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1h {z26.b}, p6/z, [x29, x30, lsl #1]"); TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1h {z26.h}, p6/z, [x29, x30, lsl #1]"); TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1h {z26.s}, p6/z, [x29, x30, lsl #1]"); TEST_SINGLE(ld1h(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1h {z26.d}, p6/z, [x29, x30, lsl #1]"); - //TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sh {z26.b}, p6/z, [x29, x30, lsl #1]"); - //TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sh {z26.h}, p6/z, [x29, x30, lsl #1]"); + // TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sh {z26.b}, p6/z, [x29, x30, lsl #1]"); + // TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sh {z26.h}, p6/z, [x29, x30, lsl #1]"); TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sh {z26.s}, p6/z, [x29, x30, lsl #1]"); TEST_SINGLE(ld1sh(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sh {z26.d}, p6/z, [x29, x30, lsl #1]"); @@ -4416,7 +4289,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous load (scalar pl TEST_SINGLE(ld1d(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1d {z26.d}, p6/z, [x29, x30, lsl #3]"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous first-fault load (scalar plus scalar)") { - TEST_SINGLE(ldff1b(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1b {z26.b}, p6/z, [x29, x30]"); + TEST_SINGLE(ldff1b(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1b {z26.b}, p6/z, [x29, x30]"); TEST_SINGLE(ldff1b(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1b {z26.h}, p6/z, [x29, x30]"); TEST_SINGLE(ldff1b(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1b {z26.s}, p6/z, [x29, x30]"); TEST_SINGLE(ldff1b(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1b {z26.d}, p6/z, [x29, x30]"); @@ -4491,11 +4364,11 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer convert to floatin TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "scvtf z30.h, p6/m, z29.s"); TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "scvtf z30.h, p6/m, z29.d"); - //TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "scvtf z30.s, p6/m, z29.h"); + // TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "scvtf z30.s, p6/m, z29.h"); TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "scvtf z30.s, p6/m, z29.s"); TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "scvtf z30.s, p6/m, z29.d"); - //TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "scvtf z30.d, p6/m, z29.h"); + // TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "scvtf z30.d, p6/m, z29.h"); TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "scvtf z30.d, p6/m, z29.s"); TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "scvtf z30.d, p6/m, z29.d"); @@ -4503,11 +4376,11 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer convert to floatin TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "ucvtf z30.h, p6/m, z29.s"); TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "ucvtf z30.h, p6/m, z29.d"); - //TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "ucvtf z30.s, p6/m, z29.h"); + // TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "ucvtf z30.s, p6/m, z29.h"); TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "ucvtf z30.s, p6/m, z29.s"); TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "ucvtf z30.s, p6/m, z29.d"); - //TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "ucvtf z30.d, p6/m, z29.h"); + // TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "ucvtf z30.d, p6/m, z29.h"); TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "ucvtf z30.d, p6/m, z29.s"); TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "ucvtf z30.d, p6/m, z29.d"); } @@ -4518,8 +4391,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point convert to TEST_SINGLE(flogb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "flogb z30.d, p6/m, z29.d"); TEST_SINGLE(fcvtzs(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "fcvtzs z30.h, p6/m, z29.h"); - //TEST_SINGLE(fcvtzs(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "fcvtzs z30.h, p6/m, z29.s"); - //TEST_SINGLE(fcvtzs(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "fcvtzs z30.h, p6/m, z29.d"); + // TEST_SINGLE(fcvtzs(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "fcvtzs z30.h, p6/m, z29.s"); + // TEST_SINGLE(fcvtzs(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "fcvtzs z30.h, p6/m, z29.d"); TEST_SINGLE(fcvtzs(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "fcvtzs z30.s, p6/m, z29.h"); TEST_SINGLE(fcvtzs(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "fcvtzs z30.s, p6/m, z29.s"); @@ -4530,8 +4403,8 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point convert to TEST_SINGLE(fcvtzs(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "fcvtzs z30.d, p6/m, z29.d"); TEST_SINGLE(fcvtzu(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "fcvtzu z30.h, p6/m, z29.h"); - //TEST_SINGLE(fcvtzu(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "fcvtzu z30.h, p6/m, z29.s"); - //TEST_SINGLE(fcvtzu(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "fcvtzu z30.h, p6/m, z29.d"); + // TEST_SINGLE(fcvtzu(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "fcvtzu z30.h, p6/m, z29.s"); + // TEST_SINGLE(fcvtzu(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "fcvtzu z30.h, p6/m, z29.d"); TEST_SINGLE(fcvtzu(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "fcvtzu z30.s, p6/m, z29.h"); TEST_SINGLE(fcvtzu(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "fcvtzu z30.s, p6/m, z29.s"); @@ -4621,95 +4494,111 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point multiply-ac } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE store multiple structures (scalar plus scalar)") { - TEST_SINGLE(st2b(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, Reg::r30), "st2b {z31.b, z0.b}, p6, [x29, x30]"); - TEST_SINGLE(st2b(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, Reg::r30), "st2b {z26.b, z27.b}, p6, [x29, x30]"); - TEST_SINGLE(st3b(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, Reg::r30), "st3b {z31.b, z0.b, z1.b}, p6, [x29, x30]"); - TEST_SINGLE(st3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, Reg::r30), "st3b {z26.b, z27.b, z28.b}, p6, [x29, x30]"); - TEST_SINGLE(st4b(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, Reg::r30), "st4b {z31.b, z0.b, z1.b, z2.b}, p6, [x29, x30]"); - TEST_SINGLE(st4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, Reg::r30), "st4b {z26.b, z27.b, z28.b, z29.b}, p6, [x29, x30]"); - - TEST_SINGLE(st2h(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, Reg::r30), "st2h {z31.h, z0.h}, p6, [x29, x30, lsl #1]"); - TEST_SINGLE(st2h(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, Reg::r30), "st2h {z26.h, z27.h}, p6, [x29, x30, lsl #1]"); - TEST_SINGLE(st3h(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, Reg::r30), "st3h {z31.h, z0.h, z1.h}, p6, [x29, x30, lsl #1]"); - TEST_SINGLE(st3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, Reg::r30), "st3h {z26.h, z27.h, z28.h}, p6, [x29, x30, lsl #1]"); - TEST_SINGLE(st4h(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, Reg::r30), "st4h {z31.h, z0.h, z1.h, z2.h}, p6, [x29, x30, lsl #1]"); - TEST_SINGLE(st4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, Reg::r30), "st4h {z26.h, z27.h, z28.h, z29.h}, p6, [x29, x30, lsl #1]"); - - TEST_SINGLE(st2w(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, Reg::r30), "st2w {z31.s, z0.s}, p6, [x29, x30, lsl #2]"); - TEST_SINGLE(st2w(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, Reg::r30), "st2w {z26.s, z27.s}, p6, [x29, x30, lsl #2]"); - TEST_SINGLE(st3w(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, Reg::r30), "st3w {z31.s, z0.s, z1.s}, p6, [x29, x30, lsl #2]"); - TEST_SINGLE(st3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, Reg::r30), "st3w {z26.s, z27.s, z28.s}, p6, [x29, x30, lsl #2]"); - TEST_SINGLE(st4w(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, Reg::r30), "st4w {z31.s, z0.s, z1.s, z2.s}, p6, [x29, x30, lsl #2]"); - TEST_SINGLE(st4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, Reg::r30), "st4w {z26.s, z27.s, z28.s, z29.s}, p6, [x29, x30, lsl #2]"); - - TEST_SINGLE(st2d(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, Reg::r30), "st2d {z31.d, z0.d}, p6, [x29, x30, lsl #3]"); - TEST_SINGLE(st2d(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, Reg::r30), "st2d {z26.d, z27.d}, p6, [x29, x30, lsl #3]"); - TEST_SINGLE(st3d(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, Reg::r30), "st3d {z31.d, z0.d, z1.d}, p6, [x29, x30, lsl #3]"); - TEST_SINGLE(st3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, Reg::r30), "st3d {z26.d, z27.d, z28.d}, p6, [x29, x30, lsl #3]"); - TEST_SINGLE(st4d(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, Reg::r30), "st4d {z31.d, z0.d, z1.d, z2.d}, p6, [x29, x30, lsl #3]"); - TEST_SINGLE(st4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, Reg::r30), "st4d {z26.d, z27.d, z28.d, z29.d}, p6, [x29, x30, lsl #3]"); + TEST_SINGLE(st2b(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, Reg::r30), "st2b {z31.b, z0.b}, p6, [x29, x30]"); + TEST_SINGLE(st2b(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, Reg::r30), "st2b {z26.b, z27.b}, p6, [x29, x30]"); + TEST_SINGLE(st3b(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, Reg::r30), "st3b {z31.b, z0.b, z1.b}, p6, [x29, x30]"); + TEST_SINGLE(st3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, Reg::r30), "st3b {z26.b, z27.b, z28.b}, p6, [x29, x30]"); + TEST_SINGLE(st4b(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, Reg::r30), "st4b {z31.b, z0.b, z1.b, z2.b}, p6, [x29, " + "x30]"); + TEST_SINGLE(st4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, Reg::r30), "st4b {z26.b, z27.b, z28.b, z29.b}, p6, " + "[x29, x30]"); + + TEST_SINGLE(st2h(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, Reg::r30), "st2h {z31.h, z0.h}, p6, [x29, x30, lsl #1]"); + TEST_SINGLE(st2h(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, Reg::r30), "st2h {z26.h, z27.h}, p6, [x29, x30, lsl #1]"); + TEST_SINGLE(st3h(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, Reg::r30), "st3h {z31.h, z0.h, z1.h}, p6, [x29, x30, lsl #1]"); + TEST_SINGLE(st3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, Reg::r30), "st3h {z26.h, z27.h, z28.h}, p6, [x29, x30, lsl #1]"); + TEST_SINGLE(st4h(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, Reg::r30), "st4h {z31.h, z0.h, z1.h, z2.h}, p6, [x29, x30, " + "lsl #1]"); + TEST_SINGLE(st4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, Reg::r30), "st4h {z26.h, z27.h, z28.h, z29.h}, p6, " + "[x29, x30, lsl #1]"); + + TEST_SINGLE(st2w(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, Reg::r30), "st2w {z31.s, z0.s}, p6, [x29, x30, lsl #2]"); + TEST_SINGLE(st2w(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, Reg::r30), "st2w {z26.s, z27.s}, p6, [x29, x30, lsl #2]"); + TEST_SINGLE(st3w(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, Reg::r30), "st3w {z31.s, z0.s, z1.s}, p6, [x29, x30, lsl #2]"); + TEST_SINGLE(st3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, Reg::r30), "st3w {z26.s, z27.s, z28.s}, p6, [x29, x30, lsl #2]"); + TEST_SINGLE(st4w(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, Reg::r30), "st4w {z31.s, z0.s, z1.s, z2.s}, p6, [x29, x30, " + "lsl #2]"); + TEST_SINGLE(st4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, Reg::r30), "st4w {z26.s, z27.s, z28.s, z29.s}, p6, " + "[x29, x30, lsl #2]"); + + TEST_SINGLE(st2d(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, Reg::r30), "st2d {z31.d, z0.d}, p6, [x29, x30, lsl #3]"); + TEST_SINGLE(st2d(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, Reg::r30), "st2d {z26.d, z27.d}, p6, [x29, x30, lsl #3]"); + TEST_SINGLE(st3d(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, Reg::r30), "st3d {z31.d, z0.d, z1.d}, p6, [x29, x30, lsl #3]"); + TEST_SINGLE(st3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, Reg::r30), "st3d {z26.d, z27.d, z28.d}, p6, [x29, x30, lsl #3]"); + TEST_SINGLE(st4d(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, Reg::r30), "st4d {z31.d, z0.d, z1.d, z2.d}, p6, [x29, x30, " + "lsl #3]"); + TEST_SINGLE(st4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, Reg::r30), "st4d {z26.d, z27.d, z28.d, z29.d}, p6, " + "[x29, x30, lsl #3]"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE store multiple structures (scalar plus immediate)") { - TEST_SINGLE(st2b(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, 0), "st2b {z31.b, z0.b}, p6, [x29]"); + TEST_SINGLE(st2b(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, 0), "st2b {z31.b, z0.b}, p6, [x29]"); TEST_SINGLE(st2b(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, 0), "st2b {z26.b, z27.b}, p6, [x29]"); TEST_SINGLE(st2b(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, -16), "st2b {z26.b, z27.b}, p6, [x29, #-16, mul vl]"); TEST_SINGLE(st2b(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, 14), "st2b {z26.b, z27.b}, p6, [x29, #14, mul vl]"); - TEST_SINGLE(st2h(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, 0), "st2h {z31.h, z0.h}, p6, [x29]"); + TEST_SINGLE(st2h(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, 0), "st2h {z31.h, z0.h}, p6, [x29]"); TEST_SINGLE(st2h(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, 0), "st2h {z26.h, z27.h}, p6, [x29]"); TEST_SINGLE(st2h(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, -16), "st2h {z26.h, z27.h}, p6, [x29, #-16, mul vl]"); TEST_SINGLE(st2h(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, 14), "st2h {z26.h, z27.h}, p6, [x29, #14, mul vl]"); - TEST_SINGLE(st2w(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, 0), "st2w {z31.s, z0.s}, p6, [x29]"); + TEST_SINGLE(st2w(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, 0), "st2w {z31.s, z0.s}, p6, [x29]"); TEST_SINGLE(st2w(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, 0), "st2w {z26.s, z27.s}, p6, [x29]"); TEST_SINGLE(st2w(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, -16), "st2w {z26.s, z27.s}, p6, [x29, #-16, mul vl]"); TEST_SINGLE(st2w(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, 14), "st2w {z26.s, z27.s}, p6, [x29, #14, mul vl]"); - TEST_SINGLE(st2d(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, 0), "st2d {z31.d, z0.d}, p6, [x29]"); + TEST_SINGLE(st2d(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, 0), "st2d {z31.d, z0.d}, p6, [x29]"); TEST_SINGLE(st2d(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, 0), "st2d {z26.d, z27.d}, p6, [x29]"); TEST_SINGLE(st2d(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, -16), "st2d {z26.d, z27.d}, p6, [x29, #-16, mul vl]"); TEST_SINGLE(st2d(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, 14), "st2d {z26.d, z27.d}, p6, [x29, #14, mul vl]"); - TEST_SINGLE(st3b(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, 0), "st3b {z31.b, z0.b, z1.b}, p6, [x29]"); + TEST_SINGLE(st3b(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, 0), "st3b {z31.b, z0.b, z1.b}, p6, [x29]"); TEST_SINGLE(st3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, 0), "st3b {z26.b, z27.b, z28.b}, p6, [x29]"); TEST_SINGLE(st3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, -24), "st3b {z26.b, z27.b, z28.b}, p6, [x29, #-24, mul vl]"); TEST_SINGLE(st3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, 21), "st3b {z26.b, z27.b, z28.b}, p6, [x29, #21, mul vl]"); - TEST_SINGLE(st3h(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, 0), "st3h {z31.h, z0.h, z1.h}, p6, [x29]"); + TEST_SINGLE(st3h(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, 0), "st3h {z31.h, z0.h, z1.h}, p6, [x29]"); TEST_SINGLE(st3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, 0), "st3h {z26.h, z27.h, z28.h}, p6, [x29]"); TEST_SINGLE(st3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, -24), "st3h {z26.h, z27.h, z28.h}, p6, [x29, #-24, mul vl]"); TEST_SINGLE(st3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, 21), "st3h {z26.h, z27.h, z28.h}, p6, [x29, #21, mul vl]"); - TEST_SINGLE(st3w(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, 0), "st3w {z31.s, z0.s, z1.s}, p6, [x29]"); + TEST_SINGLE(st3w(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, 0), "st3w {z31.s, z0.s, z1.s}, p6, [x29]"); TEST_SINGLE(st3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, 0), "st3w {z26.s, z27.s, z28.s}, p6, [x29]"); TEST_SINGLE(st3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, -24), "st3w {z26.s, z27.s, z28.s}, p6, [x29, #-24, mul vl]"); TEST_SINGLE(st3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, 21), "st3w {z26.s, z27.s, z28.s}, p6, [x29, #21, mul vl]"); - TEST_SINGLE(st3d(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, 0), "st3d {z31.d, z0.d, z1.d}, p6, [x29]"); + TEST_SINGLE(st3d(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, 0), "st3d {z31.d, z0.d, z1.d}, p6, [x29]"); TEST_SINGLE(st3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, 0), "st3d {z26.d, z27.d, z28.d}, p6, [x29]"); TEST_SINGLE(st3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, -24), "st3d {z26.d, z27.d, z28.d}, p6, [x29, #-24, mul vl]"); TEST_SINGLE(st3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, 21), "st3d {z26.d, z27.d, z28.d}, p6, [x29, #21, mul vl]"); - TEST_SINGLE(st4b(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, 0), "st4b {z31.b, z0.b, z1.b, z2.b}, p6, [x29]"); + TEST_SINGLE(st4b(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, 0), "st4b {z31.b, z0.b, z1.b, z2.b}, p6, [x29]"); TEST_SINGLE(st4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 0), "st4b {z26.b, z27.b, z28.b, z29.b}, p6, [x29]"); - TEST_SINGLE(st4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, -32), "st4b {z26.b, z27.b, z28.b, z29.b}, p6, [x29, #-32, mul vl]"); - TEST_SINGLE(st4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 28), "st4b {z26.b, z27.b, z28.b, z29.b}, p6, [x29, #28, mul vl]"); + TEST_SINGLE(st4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, -32), "st4b {z26.b, z27.b, z28.b, z29.b}, p6, [x29, " + "#-32, mul vl]"); + TEST_SINGLE(st4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 28), "st4b {z26.b, z27.b, z28.b, z29.b}, p6, [x29, #28, " + "mul vl]"); - TEST_SINGLE(st4h(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, 0), "st4h {z31.h, z0.h, z1.h, z2.h}, p6, [x29]"); + TEST_SINGLE(st4h(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, 0), "st4h {z31.h, z0.h, z1.h, z2.h}, p6, [x29]"); TEST_SINGLE(st4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 0), "st4h {z26.h, z27.h, z28.h, z29.h}, p6, [x29]"); - TEST_SINGLE(st4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, -32), "st4h {z26.h, z27.h, z28.h, z29.h}, p6, [x29, #-32, mul vl]"); - TEST_SINGLE(st4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 28), "st4h {z26.h, z27.h, z28.h, z29.h}, p6, [x29, #28, mul vl]"); + TEST_SINGLE(st4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, -32), "st4h {z26.h, z27.h, z28.h, z29.h}, p6, [x29, " + "#-32, mul vl]"); + TEST_SINGLE(st4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 28), "st4h {z26.h, z27.h, z28.h, z29.h}, p6, [x29, #28, " + "mul vl]"); - TEST_SINGLE(st4w(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, 0), "st4w {z31.s, z0.s, z1.s, z2.s}, p6, [x29]"); + TEST_SINGLE(st4w(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, 0), "st4w {z31.s, z0.s, z1.s, z2.s}, p6, [x29]"); TEST_SINGLE(st4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 0), "st4w {z26.s, z27.s, z28.s, z29.s}, p6, [x29]"); - TEST_SINGLE(st4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, -32), "st4w {z26.s, z27.s, z28.s, z29.s}, p6, [x29, #-32, mul vl]"); - TEST_SINGLE(st4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 28), "st4w {z26.s, z27.s, z28.s, z29.s}, p6, [x29, #28, mul vl]"); + TEST_SINGLE(st4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, -32), "st4w {z26.s, z27.s, z28.s, z29.s}, p6, [x29, " + "#-32, mul vl]"); + TEST_SINGLE(st4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 28), "st4w {z26.s, z27.s, z28.s, z29.s}, p6, [x29, #28, " + "mul vl]"); - TEST_SINGLE(st4d(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, 0), "st4d {z31.d, z0.d, z1.d, z2.d}, p6, [x29]"); + TEST_SINGLE(st4d(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, 0), "st4d {z31.d, z0.d, z1.d, z2.d}, p6, [x29]"); TEST_SINGLE(st4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 0), "st4d {z26.d, z27.d, z28.d, z29.d}, p6, [x29]"); - TEST_SINGLE(st4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, -32), "st4d {z26.d, z27.d, z28.d, z29.d}, p6, [x29, #-32, mul vl]"); - TEST_SINGLE(st4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 28), "st4d {z26.d, z27.d, z28.d, z29.d}, p6, [x29, #28, mul vl]"); + TEST_SINGLE(st4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, -32), "st4d {z26.d, z27.d, z28.d, z29.d}, p6, [x29, " + "#-32, mul vl]"); + TEST_SINGLE(st4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 28), "st4d {z26.d, z27.d, z28.d, z29.d}, p6, [x29, #28, " + "mul vl]"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous store (scalar plus immediate)") { TEST_SINGLE(st1b(ZReg::z26, PReg::p6, Reg::r29, 0), "st1b {z26.b}, p6, [x29]"); @@ -4727,33 +4616,33 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous store (scalar p TEST_SINGLE(st1b(ZReg::z26, PReg::p6, Reg::r29, 7), "st1b {z26.s}, p6, [x29, #7, mul vl]"); TEST_SINGLE(st1b(ZReg::z26, PReg::p6, Reg::r29, 7), "st1b {z26.d}, p6, [x29, #7, mul vl]"); - //TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, 0), "st1h {z26.b}, p6, [x29]"); + // TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, 0), "st1h {z26.b}, p6, [x29]"); TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, 0), "st1h {z26.h}, p6, [x29]"); TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, 0), "st1h {z26.s}, p6, [x29]"); TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, 0), "st1h {z26.d}, p6, [x29]"); - //TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, -8), "st1h {z26.b}, p6, [x29, #-8, mul vl]"); + // TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, -8), "st1h {z26.b}, p6, [x29, #-8, mul vl]"); TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, -8), "st1h {z26.h}, p6, [x29, #-8, mul vl]"); TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, -8), "st1h {z26.s}, p6, [x29, #-8, mul vl]"); TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, -8), "st1h {z26.d}, p6, [x29, #-8, mul vl]"); - //TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, 7), "st1h {z26.b}, p6, [x29, #7, mul vl]"); + // TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, 7), "st1h {z26.b}, p6, [x29, #7, mul vl]"); TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, 7), "st1h {z26.h}, p6, [x29, #7, mul vl]"); TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, 7), "st1h {z26.s}, p6, [x29, #7, mul vl]"); TEST_SINGLE(st1h(ZReg::z26, PReg::p6, Reg::r29, 7), "st1h {z26.d}, p6, [x29, #7, mul vl]"); - //TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, 0), "st1w {z26.b}, p6, [x29]"); - //TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, 0), "st1w {z26.h}, p6, [x29]"); + // TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, 0), "st1w {z26.b}, p6, [x29]"); + // TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, 0), "st1w {z26.h}, p6, [x29]"); TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, 0), "st1w {z26.s}, p6, [x29]"); TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, 0), "st1w {z26.d}, p6, [x29]"); - //TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, -8), "st1w {z26.b}, p6, [x29, #-8, mul vl]"); - //TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, -8), "st1w {z26.h}, p6, [x29, #-8, mul vl]"); + // TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, -8), "st1w {z26.b}, p6, [x29, #-8, mul vl]"); + // TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, -8), "st1w {z26.h}, p6, [x29, #-8, mul vl]"); TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, -8), "st1w {z26.s}, p6, [x29, #-8, mul vl]"); TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, -8), "st1w {z26.d}, p6, [x29, #-8, mul vl]"); - //TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, 7), "st1w {z26.b}, p6, [x29, #7, mul vl]"); - //TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, 7), "st1w {z26.h}, p6, [x29, #7, mul vl]"); + // TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, 7), "st1w {z26.b}, p6, [x29, #7, mul vl]"); + // TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, 7), "st1w {z26.h}, p6, [x29, #7, mul vl]"); TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, 7), "st1w {z26.s}, p6, [x29, #7, mul vl]"); TEST_SINGLE(st1w(ZReg::z26, PReg::p6, Reg::r29, 7), "st1w {z26.d}, p6, [x29, #7, mul vl]"); @@ -4763,137 +4652,124 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous store (scalar p } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Scatters") { - TEST_SINGLE(st1b(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "st1b {z30.s}, p6, [x30, z31.s, uxtw]"); - TEST_SINGLE(st1b(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "st1b {z30.s}, p6, [x30, z31.s, sxtw]"); - TEST_SINGLE(st1b(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "st1b {z30.d}, p6, [x30, z31.d, uxtw]"); - TEST_SINGLE(st1b(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "st1b {z30.d}, p6, [x30, z31.d, sxtw]"); - TEST_SINGLE(st1b(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "st1b {z30.d}, p6, [x30, z31.d]"); - - TEST_SINGLE(st1b(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), - "st1b {z30.s}, p6, [z31.s]"); - TEST_SINGLE(st1b(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 31)), - "st1b {z30.s}, p6, [z31.s, #31]"); - TEST_SINGLE(st1b(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), - "st1b {z30.d}, p6, [z31.d]"); - TEST_SINGLE(st1b(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 31)), - "st1b {z30.d}, p6, [z31.d, #31]"); - - TEST_SINGLE(st1h(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), - "st1h {z30.s}, p6, [x30, z31.s, uxtw #1]"); - TEST_SINGLE(st1h(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), - "st1h {z30.s}, p6, [x30, z31.s, sxtw #1]"); - TEST_SINGLE(st1h(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), - "st1h {z30.d}, p6, [x30, z31.d, uxtw #1]"); - TEST_SINGLE(st1h(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), - "st1h {z30.d}, p6, [x30, z31.d, sxtw #1]"); - TEST_SINGLE(st1h(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 1)), - "st1h {z30.d}, p6, [x30, z31.d, lsl #1]"); - - TEST_SINGLE(st1h(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "st1h {z30.s}, p6, [x30, z31.s, uxtw]"); - TEST_SINGLE(st1h(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "st1h {z30.s}, p6, [x30, z31.s, sxtw]"); - TEST_SINGLE(st1h(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "st1h {z30.d}, p6, [x30, z31.d, uxtw]"); - TEST_SINGLE(st1h(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "st1h {z30.d}, p6, [x30, z31.d, sxtw]"); - TEST_SINGLE(st1h(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "st1h {z30.d}, p6, [x30, z31.d]"); - - TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), - "st1h {z30.s}, p6, [z31.s]"); - TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 62)), - "st1h {z30.s}, p6, [z31.s, #62]"); - TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), - "st1h {z30.d}, p6, [z31.d]"); - TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 62)), - "st1h {z30.d}, p6, [z31.d, #62]"); - - TEST_SINGLE(st1w(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), - "st1w {z30.s}, p6, [x30, z31.s, uxtw #2]"); - TEST_SINGLE(st1w(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), - "st1w {z30.s}, p6, [x30, z31.s, sxtw #2]"); - TEST_SINGLE(st1w(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), - "st1w {z30.d}, p6, [x30, z31.d, uxtw #2]"); - TEST_SINGLE(st1w(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), - "st1w {z30.d}, p6, [x30, z31.d, sxtw #2]"); - TEST_SINGLE(st1w(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 2)), - "st1w {z30.d}, p6, [x30, z31.d, lsl #2]"); - - TEST_SINGLE(st1w(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "st1w {z30.s}, p6, [x30, z31.s, uxtw]"); - TEST_SINGLE(st1w(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "st1w {z30.s}, p6, [x30, z31.s, sxtw]"); - TEST_SINGLE(st1w(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "st1w {z30.d}, p6, [x30, z31.d, uxtw]"); - TEST_SINGLE(st1w(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "st1w {z30.d}, p6, [x30, z31.d, sxtw]"); - TEST_SINGLE(st1w(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "st1w {z30.d}, p6, [x30, z31.d]"); - - TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), - "st1w {z30.s}, p6, [z31.s]"); - TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 124)), - "st1w {z30.s}, p6, [z31.s, #124]"); - TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), - "st1w {z30.d}, p6, [z31.d]"); - TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 124)), - "st1w {z30.d}, p6, [z31.d, #124]"); - - TEST_SINGLE(st1d(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 3)), - "st1d {z30.d}, p6, [x30, z31.d, uxtw #3]"); - TEST_SINGLE(st1d(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 3)), - "st1d {z30.d}, p6, [x30, z31.d, sxtw #3]"); - TEST_SINGLE(st1d(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 3)), - "st1d {z30.d}, p6, [x30, z31.d, lsl #3]"); - - TEST_SINGLE(st1d(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), - "st1d {z30.d}, p6, [x30, z31.d, uxtw]"); - TEST_SINGLE(st1d(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), - "st1d {z30.d}, p6, [x30, z31.d, sxtw]"); - TEST_SINGLE(st1d(ZReg::z30, PReg::p6, - SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), - "st1d {z30.d}, p6, [x30, z31.d]"); - - TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), - "st1d {z30.d}, p6, [z31.d]"); - TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 248)), - "st1d {z30.d}, p6, [z31.d, #248]"); + TEST_SINGLE(st1b(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "st1b {z30.s}, " + "p6, [x30, " + "z31.s, uxtw]"); + TEST_SINGLE(st1b(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "st1b {z30.s}, " + "p6, [x30, " + "z31.s, sxtw]"); + TEST_SINGLE(st1b(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "st1b {z30.d}, " + "p6, [x30, " + "z31.d, uxtw]"); + TEST_SINGLE(st1b(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "st1b {z30.d}, " + "p6, [x30, " + "z31.d, sxtw]"); + TEST_SINGLE(st1b(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), "st1b {z30.d}, " + "p6, [x30, " + "z31.d]"); + + TEST_SINGLE(st1b(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), "st1b {z30.s}, p6, [z31.s]"); + TEST_SINGLE(st1b(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 31)), "st1b {z30.s}, p6, [z31.s, #31]"); + TEST_SINGLE(st1b(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), "st1b {z30.d}, p6, [z31.d]"); + TEST_SINGLE(st1b(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 31)), "st1b {z30.d}, p6, [z31.d, #31]"); + + TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), "st1h {z30.s}, " + "p6, [x30, " + "z31.s, uxtw " + "#1]"); + TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), "st1h {z30.s}, " + "p6, [x30, " + "z31.s, sxtw " + "#1]"); + TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), "st1h {z30.d}, " + "p6, [x30, " + "z31.d, uxtw " + "#1]"); + TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), "st1h {z30.d}, " + "p6, [x30, " + "z31.d, sxtw " + "#1]"); + TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 1)), "st1h {z30.d}, " + "p6, [x30, " + "z31.d, lsl #1]"); + + TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "st1h {z30.s}, " + "p6, [x30, " + "z31.s, uxtw]"); + TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "st1h {z30.s}, " + "p6, [x30, " + "z31.s, sxtw]"); + TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "st1h {z30.d}, " + "p6, [x30, " + "z31.d, uxtw]"); + TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "st1h {z30.d}, " + "p6, [x30, " + "z31.d, sxtw]"); + TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), "st1h {z30.d}, " + "p6, [x30, " + "z31.d]"); + + TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), "st1h {z30.s}, p6, [z31.s]"); + TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 62)), "st1h {z30.s}, p6, [z31.s, #62]"); + TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), "st1h {z30.d}, p6, [z31.d]"); + TEST_SINGLE(st1h(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 62)), "st1h {z30.d}, p6, [z31.d, #62]"); + + TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), "st1w {z30.s}, " + "p6, [x30, " + "z31.s, uxtw " + "#2]"); + TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), "st1w {z30.s}, " + "p6, [x30, " + "z31.s, sxtw " + "#2]"); + TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), "st1w {z30.d}, " + "p6, [x30, " + "z31.d, uxtw " + "#2]"); + TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), "st1w {z30.d}, " + "p6, [x30, " + "z31.d, sxtw " + "#2]"); + TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 2)), "st1w {z30.d}, " + "p6, [x30, " + "z31.d, lsl #2]"); + + TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "st1w {z30.s}, " + "p6, [x30, " + "z31.s, uxtw]"); + TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "st1w {z30.s}, " + "p6, [x30, " + "z31.s, sxtw]"); + TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "st1w {z30.d}, " + "p6, [x30, " + "z31.d, uxtw]"); + TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "st1w {z30.d}, " + "p6, [x30, " + "z31.d, sxtw]"); + TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), "st1w {z30.d}, " + "p6, [x30, " + "z31.d]"); + + TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), "st1w {z30.s}, p6, [z31.s]"); + TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 124)), "st1w {z30.s}, p6, [z31.s, #124]"); + TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), "st1w {z30.d}, p6, [z31.d]"); + TEST_SINGLE(st1w(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 124)), "st1w {z30.d}, p6, [z31.d, #124]"); + + TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 3)), "st1d {z30.d}, p6, [x30, z31.d, " + "uxtw #3]"); + TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 3)), "st1d {z30.d}, p6, [x30, z31.d, " + "sxtw #3]"); + TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 3)), "st1d {z30.d}, p6, [x30, z31.d, lsl " + "#3]"); + + TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "st1d {z30.d}, p6, [x30, z31.d, " + "uxtw]"); + TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "st1d {z30.d}, p6, [x30, z31.d, " + "sxtw]"); + TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), "st1d {z30.d}, p6, [x30, z31.d]"); + + TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), "st1d {z30.d}, p6, [z31.d]"); + TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 248)), "st1d {z30.d}, p6, [z31.d, #248]"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Unsized Stores") { diff --git a/FEXCore/unittests/Emitter/Scalar_Tests.cpp b/FEXCore/unittests/Emitter/Scalar_Tests.cpp index 34cdac43f4..ebacc2fab8 100644 --- a/FEXCore/unittests/Emitter/Scalar_Tests.cpp +++ b/FEXCore/unittests/Emitter/Scalar_Tests.cpp @@ -64,227 +64,227 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar three } TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar two-register miscellaneous") { // Commented out lines showcase unallocated encodings. - TEST_SINGLE(suqadd(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "suqadd b30, b29"); + TEST_SINGLE(suqadd(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "suqadd b30, b29"); TEST_SINGLE(suqadd(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "suqadd h30, h29"); TEST_SINGLE(suqadd(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "suqadd s30, s29"); TEST_SINGLE(suqadd(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "suqadd d30, d29"); - TEST_SINGLE(sqabs(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "sqabs b30, b29"); + TEST_SINGLE(sqabs(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "sqabs b30, b29"); TEST_SINGLE(sqabs(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "sqabs h30, h29"); TEST_SINGLE(sqabs(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "sqabs s30, s29"); TEST_SINGLE(sqabs(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "sqabs d30, d29"); - //TEST_SINGLE(cmgt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "cmgt b30, b29, #0"); - //TEST_SINGLE(cmgt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "cmgt h30, h29, #0"); - //TEST_SINGLE(cmgt(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "cmgt s30, s29, #0"); + // TEST_SINGLE(cmgt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "cmgt b30, b29, #0"); + // TEST_SINGLE(cmgt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "cmgt h30, h29, #0"); + // TEST_SINGLE(cmgt(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "cmgt s30, s29, #0"); TEST_SINGLE(cmgt(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "cmgt d30, d29, #0"); - //TEST_SINGLE(cmeq(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "cmeq b30, b29, #0"); - //TEST_SINGLE(cmeq(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "cmeq h30, h29, #0"); - //TEST_SINGLE(cmeq(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "cmeq s30, s29, #0"); + // TEST_SINGLE(cmeq(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "cmeq b30, b29, #0"); + // TEST_SINGLE(cmeq(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "cmeq h30, h29, #0"); + // TEST_SINGLE(cmeq(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "cmeq s30, s29, #0"); TEST_SINGLE(cmeq(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "cmeq d30, d29, #0"); - //TEST_SINGLE(cmlt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "cmlt b30, b29, #0"); - //TEST_SINGLE(cmlt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "cmlt h30, h29, #0"); - //TEST_SINGLE(cmlt(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "cmlt s30, s29, #0"); + // TEST_SINGLE(cmlt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "cmlt b30, b29, #0"); + // TEST_SINGLE(cmlt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "cmlt h30, h29, #0"); + // TEST_SINGLE(cmlt(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "cmlt s30, s29, #0"); TEST_SINGLE(cmlt(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "cmlt d30, d29, #0"); - //TEST_SINGLE(abs(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "abs b30, b29"); - //TEST_SINGLE(abs(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "abs h30, h29"); - //TEST_SINGLE(abs(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "abs s30, s29"); + // TEST_SINGLE(abs(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "abs b30, b29"); + // TEST_SINGLE(abs(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "abs h30, h29"); + // TEST_SINGLE(abs(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "abs s30, s29"); TEST_SINGLE(abs(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "abs d30, d29"); - TEST_SINGLE(sqxtn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "sqxtn b30, h29"); + TEST_SINGLE(sqxtn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "sqxtn b30, h29"); TEST_SINGLE(sqxtn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "sqxtn h30, s29"); TEST_SINGLE(sqxtn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "sqxtn s30, d29"); - //TEST_SINGLE(sqxtn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "sqxtn d30, d29"); + // TEST_SINGLE(sqxtn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "sqxtn d30, d29"); - //TEST_SINGLE(fcvtns(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtns b30, b29"); - //TEST_SINGLE(fcvtns(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtns h30, h29"); + // TEST_SINGLE(fcvtns(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtns b30, b29"); + // TEST_SINGLE(fcvtns(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtns h30, h29"); TEST_SINGLE(fcvtns(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtns s30, s29"); TEST_SINGLE(fcvtns(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtns d30, d29"); - //TEST_SINGLE(fcvtms(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtms b30, b29"); - //TEST_SINGLE(fcvtms(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtms h30, h29"); + // TEST_SINGLE(fcvtms(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtms b30, b29"); + // TEST_SINGLE(fcvtms(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtms h30, h29"); TEST_SINGLE(fcvtms(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtms s30, s29"); TEST_SINGLE(fcvtms(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtms d30, d29"); - //TEST_SINGLE(fcvtas(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtas b30, b29"); - //TEST_SINGLE(fcvtas(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtas h30, h29"); + // TEST_SINGLE(fcvtas(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtas b30, b29"); + // TEST_SINGLE(fcvtas(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtas h30, h29"); TEST_SINGLE(fcvtas(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtas s30, s29"); TEST_SINGLE(fcvtas(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtas d30, d29"); - //TEST_SINGLE(scvtf(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "scvtf b30, b29"); - //TEST_SINGLE(scvtf(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "scvtf h30, h29"); + // TEST_SINGLE(scvtf(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "scvtf b30, b29"); + // TEST_SINGLE(scvtf(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "scvtf h30, h29"); TEST_SINGLE(scvtf(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "scvtf s30, s29"); TEST_SINGLE(scvtf(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "scvtf d30, d29"); - //TEST_SINGLE(fcmeq(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcmeq b30, b29"); - //TEST_SINGLE(fcmeq(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcmeq h30, h29"); + // TEST_SINGLE(fcmeq(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcmeq b30, b29"); + // TEST_SINGLE(fcmeq(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcmeq h30, h29"); TEST_SINGLE(fcmeq(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcmeq s30, s29, #0.0"); TEST_SINGLE(fcmeq(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcmeq d30, d29, #0.0"); - //TEST_SINGLE(fcmlt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcmlt b30, b29"); - //TEST_SINGLE(fcmlt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcmlt h30, h29"); + // TEST_SINGLE(fcmlt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcmlt b30, b29"); + // TEST_SINGLE(fcmlt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcmlt h30, h29"); TEST_SINGLE(fcmlt(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcmlt s30, s29, #0.0"); TEST_SINGLE(fcmlt(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcmlt d30, d29, #0.0"); - //TEST_SINGLE(fcvtps(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtps b30, b29"); - //TEST_SINGLE(fcvtps(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtps h30, h29"); + // TEST_SINGLE(fcvtps(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtps b30, b29"); + // TEST_SINGLE(fcvtps(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtps h30, h29"); TEST_SINGLE(fcvtps(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtps s30, s29"); TEST_SINGLE(fcvtps(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtps d30, d29"); - //TEST_SINGLE(fcvtzs(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtzs b30, b29"); - //TEST_SINGLE(fcvtzs(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtzs h30, h29"); + // TEST_SINGLE(fcvtzs(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtzs b30, b29"); + // TEST_SINGLE(fcvtzs(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtzs h30, h29"); TEST_SINGLE(fcvtzs(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtzs s30, s29"); TEST_SINGLE(fcvtzs(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtzs d30, d29"); - //TEST_SINGLE(frecpe(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "frecpe b30, b29"); - //TEST_SINGLE(frecpe(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "frecpe h30, h29"); + // TEST_SINGLE(frecpe(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "frecpe b30, b29"); + // TEST_SINGLE(frecpe(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "frecpe h30, h29"); TEST_SINGLE(frecpe(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "frecpe s30, s29"); TEST_SINGLE(frecpe(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "frecpe d30, d29"); - //TEST_SINGLE(frecpx(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "frecpx b30, b29"); - //TEST_SINGLE(frecpx(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "frecpx h30, h29"); + // TEST_SINGLE(frecpx(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "frecpx b30, b29"); + // TEST_SINGLE(frecpx(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "frecpx h30, h29"); TEST_SINGLE(frecpx(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "frecpx s30, s29"); TEST_SINGLE(frecpx(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "frecpx d30, d29"); - TEST_SINGLE(usqadd(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "usqadd b30, b29"); + TEST_SINGLE(usqadd(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "usqadd b30, b29"); TEST_SINGLE(usqadd(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "usqadd h30, h29"); TEST_SINGLE(usqadd(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "usqadd s30, s29"); TEST_SINGLE(usqadd(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "usqadd d30, d29"); - TEST_SINGLE(sqneg(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "sqneg b30, b29"); + TEST_SINGLE(sqneg(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "sqneg b30, b29"); TEST_SINGLE(sqneg(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "sqneg h30, h29"); TEST_SINGLE(sqneg(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "sqneg s30, s29"); TEST_SINGLE(sqneg(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "sqneg d30, d29"); - //TEST_SINGLE(cmge(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "cmge b30, b29"); - //TEST_SINGLE(cmge(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "cmge h30, h29"); - //TEST_SINGLE(cmge(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "cmge s30, s29"); + // TEST_SINGLE(cmge(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "cmge b30, b29"); + // TEST_SINGLE(cmge(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "cmge h30, h29"); + // TEST_SINGLE(cmge(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "cmge s30, s29"); TEST_SINGLE(cmge(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "cmge d30, d29, #0"); - //TEST_SINGLE(cmle(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "cmle b30, b29"); - //TEST_SINGLE(cmle(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "cmle h30, h29"); - //TEST_SINGLE(cmle(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "cmle s30, s29"); + // TEST_SINGLE(cmle(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "cmle b30, b29"); + // TEST_SINGLE(cmle(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "cmle h30, h29"); + // TEST_SINGLE(cmle(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "cmle s30, s29"); TEST_SINGLE(cmle(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "cmle d30, d29, #0"); - //TEST_SINGLE(neg(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "neg b30, b29"); - //TEST_SINGLE(neg(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "neg h30, h29"); - //TEST_SINGLE(neg(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "neg s30, s29"); + // TEST_SINGLE(neg(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "neg b30, b29"); + // TEST_SINGLE(neg(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "neg h30, h29"); + // TEST_SINGLE(neg(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "neg s30, s29"); TEST_SINGLE(neg(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "neg d30, d29"); - TEST_SINGLE(sqxtun(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "sqxtun b30, h29"); + TEST_SINGLE(sqxtun(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "sqxtun b30, h29"); TEST_SINGLE(sqxtun(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "sqxtun h30, s29"); TEST_SINGLE(sqxtun(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "sqxtun s30, d29"); - //TEST_SINGLE(sqxtun(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "sqxtun d30, d29"); + // TEST_SINGLE(sqxtun(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "sqxtun d30, d29"); - TEST_SINGLE(uqxtn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "uqxtn b30, h29"); + TEST_SINGLE(uqxtn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "uqxtn b30, h29"); TEST_SINGLE(uqxtn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "uqxtn h30, s29"); TEST_SINGLE(uqxtn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "uqxtn s30, d29"); - //TEST_SINGLE(uqxtn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "uqxtn d30, d29"); + // TEST_SINGLE(uqxtn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "uqxtn d30, d29"); - //TEST_SINGLE(fcvtxn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtxn b30, b29"); - //TEST_SINGLE(fcvtxn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtxn h30, h29"); + // TEST_SINGLE(fcvtxn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtxn b30, b29"); + // TEST_SINGLE(fcvtxn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtxn h30, h29"); TEST_SINGLE(fcvtxn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtxn s30, d29"); - //TEST_SINGLE(fcvtxn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtxn d30, d29"); + // TEST_SINGLE(fcvtxn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtxn d30, d29"); - //TEST_SINGLE(fcvtnu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtnu b30, b29"); - //TEST_SINGLE(fcvtnu(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtnu h30, h29"); + // TEST_SINGLE(fcvtnu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtnu b30, b29"); + // TEST_SINGLE(fcvtnu(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtnu h30, h29"); TEST_SINGLE(fcvtnu(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtnu s30, s29"); TEST_SINGLE(fcvtnu(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtnu d30, d29"); - //TEST_SINGLE(fcvtmu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtmu b30, b29"); - //TEST_SINGLE(fcvtmu(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtmu h30, h29"); + // TEST_SINGLE(fcvtmu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtmu b30, b29"); + // TEST_SINGLE(fcvtmu(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtmu h30, h29"); TEST_SINGLE(fcvtmu(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtmu s30, s29"); TEST_SINGLE(fcvtmu(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtmu d30, d29"); - //TEST_SINGLE(fcvtau(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtau b30, b29"); - //TEST_SINGLE(fcvtau(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtau h30, h29"); + // TEST_SINGLE(fcvtau(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtau b30, b29"); + // TEST_SINGLE(fcvtau(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtau h30, h29"); TEST_SINGLE(fcvtau(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtau s30, s29"); TEST_SINGLE(fcvtau(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtau d30, d29"); - //TEST_SINGLE(ucvtf(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "ucvtf b30, b29"); - //TEST_SINGLE(ucvtf(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "ucvtf h30, h29"); + // TEST_SINGLE(ucvtf(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "ucvtf b30, b29"); + // TEST_SINGLE(ucvtf(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "ucvtf h30, h29"); TEST_SINGLE(ucvtf(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "ucvtf s30, s29"); TEST_SINGLE(ucvtf(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "ucvtf d30, d29"); - //TEST_SINGLE(fcmge(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcmge b30, b29"); - //TEST_SINGLE(fcmge(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcmge h30, h29"); + // TEST_SINGLE(fcmge(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcmge b30, b29"); + // TEST_SINGLE(fcmge(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcmge h30, h29"); TEST_SINGLE(fcmge(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcmge s30, s29, #0.0"); TEST_SINGLE(fcmge(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcmge d30, d29, #0.0"); - //TEST_SINGLE(fcmle(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcmle b30, b29"); - //TEST_SINGLE(fcmle(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcmle h30, h29"); + // TEST_SINGLE(fcmle(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcmle b30, b29"); + // TEST_SINGLE(fcmle(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcmle h30, h29"); TEST_SINGLE(fcmle(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcmle s30, s29, #0.0"); TEST_SINGLE(fcmle(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcmle d30, d29, #0.0"); - //TEST_SINGLE(fcvtpu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtpu b30, b29"); - //TEST_SINGLE(fcvtpu(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtpu h30, h29"); + // TEST_SINGLE(fcvtpu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtpu b30, b29"); + // TEST_SINGLE(fcvtpu(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtpu h30, h29"); TEST_SINGLE(fcvtpu(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtpu s30, s29"); TEST_SINGLE(fcvtpu(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtpu d30, d29"); - //TEST_SINGLE(fcvtzu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtzu b30, b29"); - //TEST_SINGLE(fcvtzu(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtzu h30, h29"); + // TEST_SINGLE(fcvtzu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcvtzu b30, b29"); + // TEST_SINGLE(fcvtzu(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtzu h30, h29"); TEST_SINGLE(fcvtzu(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtzu s30, s29"); TEST_SINGLE(fcvtzu(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtzu d30, d29"); - //TEST_SINGLE(frsqrte(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "frsqrte b30, b29"); - //TEST_SINGLE(frsqrte(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "frsqrte h30, h29"); + // TEST_SINGLE(frsqrte(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "frsqrte b30, b29"); + // TEST_SINGLE(frsqrte(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "frsqrte h30, h29"); TEST_SINGLE(frsqrte(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "frsqrte s30, s29"); TEST_SINGLE(frsqrte(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "frsqrte d30, d29"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar pairwise") { // Commented out lines showcase unallocated encodings. - //TEST_SINGLE(addp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "addp b30, b29"); - //TEST_SINGLE(addp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "addp h30, h29"); - //TEST_SINGLE(addp(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "addp s30, s29"); + // TEST_SINGLE(addp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "addp b30, b29"); + // TEST_SINGLE(addp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "addp h30, h29"); + // TEST_SINGLE(addp(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "addp s30, s29"); TEST_SINGLE(addp(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "addp d30, v29.2d"); - TEST_SINGLE(fmaxnmp(HReg::h30, HReg::h29), "fmaxnmp h30, v29.2h"); - //TEST_SINGLE(fmaxnmp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fmaxnmp b30, b29"); - //TEST_SINGLE(fmaxnmp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fmaxnmp h30, h29"); + TEST_SINGLE(fmaxnmp(HReg::h30, HReg::h29), "fmaxnmp h30, v29.2h"); + // TEST_SINGLE(fmaxnmp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fmaxnmp b30, b29"); + // TEST_SINGLE(fmaxnmp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fmaxnmp h30, h29"); TEST_SINGLE(fmaxnmp(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fmaxnmp s30, v29.2s"); TEST_SINGLE(fmaxnmp(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fmaxnmp d30, v29.2d"); - TEST_SINGLE(faddp(HReg::h30, HReg::h29), "faddp h30, v29.2h"); - //TEST_SINGLE(faddp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "faddp b30, b29"); - //TEST_SINGLE(faddp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "faddp h30, h29"); + TEST_SINGLE(faddp(HReg::h30, HReg::h29), "faddp h30, v29.2h"); + // TEST_SINGLE(faddp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "faddp b30, b29"); + // TEST_SINGLE(faddp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "faddp h30, h29"); TEST_SINGLE(faddp(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "faddp s30, v29.2s"); TEST_SINGLE(faddp(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "faddp d30, v29.2d"); - TEST_SINGLE(fmaxp(HReg::h30, HReg::h29), "fmaxp h30, v29.2h"); - //TEST_SINGLE(fmaxp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fmaxp b30, b29"); - //TEST_SINGLE(fmaxp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fmaxp h30, h29"); + TEST_SINGLE(fmaxp(HReg::h30, HReg::h29), "fmaxp h30, v29.2h"); + // TEST_SINGLE(fmaxp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fmaxp b30, b29"); + // TEST_SINGLE(fmaxp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fmaxp h30, h29"); TEST_SINGLE(fmaxp(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fmaxp s30, v29.2s"); TEST_SINGLE(fmaxp(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fmaxp d30, v29.2d"); - TEST_SINGLE(fminnmp(HReg::h30, HReg::h29), "fminnmp h30, v29.2h"); - //TEST_SINGLE(fminnmp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fminnmp b30, b29"); - //TEST_SINGLE(fminnmp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fminnmp h30, h29"); + TEST_SINGLE(fminnmp(HReg::h30, HReg::h29), "fminnmp h30, v29.2h"); + // TEST_SINGLE(fminnmp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fminnmp b30, b29"); + // TEST_SINGLE(fminnmp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fminnmp h30, h29"); TEST_SINGLE(fminnmp(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fminnmp s30, v29.2s"); TEST_SINGLE(fminnmp(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fminnmp d30, v29.2d"); - TEST_SINGLE(fminp(HReg::h30, HReg::h29), "fminp h30, v29.2h"); - //TEST_SINGLE(fminp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fminp b30, b29"); - //TEST_SINGLE(fminp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fminp h30, h29"); + TEST_SINGLE(fminp(HReg::h30, HReg::h29), "fminp h30, v29.2h"); + // TEST_SINGLE(fminp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fminp b30, b29"); + // TEST_SINGLE(fminp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fminp h30, h29"); TEST_SINGLE(fminp(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fminp s30, v29.2s"); TEST_SINGLE(fminp(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fminp d30, v29.2d"); } TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar three different") { // Commented out lines showcase unallocated encodings. - //TEST_SINGLE(sqdmlal(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlal v30.16b, v29.16b, v28.v16b"); - //TEST_SINGLE(sqdmlal(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlal v30.16b, v29.16b, v28.v16b"); + // TEST_SINGLE(sqdmlal(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlal v30.16b, v29.16b, v28.v16b"); + // TEST_SINGLE(sqdmlal(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlal v30.16b, v29.16b, v28.v16b"); TEST_SINGLE(sqdmlal(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlal s30, h29, h28"); TEST_SINGLE(sqdmlal(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlal d30, s29, s28"); - //TEST_SINGLE(sqdmlsl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlsl v30.16b, v29.16b, v28.v16b"); - //TEST_SINGLE(sqdmlsl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlsl v30.16b, v29.16b, v28.v16b"); + // TEST_SINGLE(sqdmlsl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlsl v30.16b, v29.16b, v28.v16b"); + // TEST_SINGLE(sqdmlsl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlsl v30.16b, v29.16b, v28.v16b"); TEST_SINGLE(sqdmlsl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlsl s30, h29, h28"); TEST_SINGLE(sqdmlsl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlsl d30, s29, s28"); - //TEST_SINGLE(sqdmull(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmull v30.16b, v29.16b, v28.v16b"); - //TEST_SINGLE(sqdmull(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmull v30.16b, v29.16b, v28.v16b"); + // TEST_SINGLE(sqdmull(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmull v30.16b, v29.16b, v28.v16b"); + // TEST_SINGLE(sqdmull(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmull v30.16b, v29.16b, v28.v16b"); TEST_SINGLE(sqdmull(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmull s30, h29, h28"); TEST_SINGLE(sqdmull(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmull d30, s29, s28"); } @@ -299,19 +299,19 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar three TEST_SINGLE(sqsub(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqsub s30, s29, s28"); TEST_SINGLE(sqsub(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqsub d30, d29, d28"); - //TEST_SINGLE(cmgt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmgt b30, b29, b28"); - //TEST_SINGLE(cmgt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmgt h30, h29, h28"); - //TEST_SINGLE(cmgt(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmgt s30, s29, s28"); + // TEST_SINGLE(cmgt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmgt b30, b29, b28"); + // TEST_SINGLE(cmgt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmgt h30, h29, h28"); + // TEST_SINGLE(cmgt(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmgt s30, s29, s28"); TEST_SINGLE(cmgt(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "cmgt d30, d29, d28"); - //TEST_SINGLE(cmge(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmge b30, b29, b28"); - //TEST_SINGLE(cmge(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmge h30, h29, h28"); - //TEST_SINGLE(cmge(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmge s30, s29, s28"); + // TEST_SINGLE(cmge(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmge b30, b29, b28"); + // TEST_SINGLE(cmge(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmge h30, h29, h28"); + // TEST_SINGLE(cmge(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmge s30, s29, s28"); TEST_SINGLE(cmge(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "cmge d30, d29, d28"); - //TEST_SINGLE(sshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sshl b30, b29, b28"); - //TEST_SINGLE(sshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sshl h30, h29, h28"); - //TEST_SINGLE(sshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sshl s30, s29, s28"); + // TEST_SINGLE(sshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sshl b30, b29, b28"); + // TEST_SINGLE(sshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sshl h30, h29, h28"); + // TEST_SINGLE(sshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sshl s30, s29, s28"); TEST_SINGLE(sshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sshl d30, d29, d28"); TEST_SINGLE(sqshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqshl b30, b29, b28"); @@ -319,9 +319,9 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar three TEST_SINGLE(sqshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqshl s30, s29, s28"); TEST_SINGLE(sqshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqshl d30, d29, d28"); - //TEST_SINGLE(srshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "srshl b30, b29, b28"); - //TEST_SINGLE(srshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "srshl h30, h29, h28"); - //TEST_SINGLE(srshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "srshl s30, s29, s28"); + // TEST_SINGLE(srshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "srshl b30, b29, b28"); + // TEST_SINGLE(srshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "srshl h30, h29, h28"); + // TEST_SINGLE(srshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "srshl s30, s29, s28"); TEST_SINGLE(srshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "srshl d30, d29, d28"); TEST_SINGLE(sqrshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqrshl b30, b29, b28"); @@ -329,38 +329,38 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar three TEST_SINGLE(sqrshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqrshl s30, s29, s28"); TEST_SINGLE(sqrshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqrshl d30, d29, d28"); - //TEST_SINGLE(add(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "add b30, b29, b28"); - //TEST_SINGLE(add(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "add h30, h29, h28"); - //TEST_SINGLE(add(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "add s30, s29, s28"); + // TEST_SINGLE(add(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "add b30, b29, b28"); + // TEST_SINGLE(add(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "add h30, h29, h28"); + // TEST_SINGLE(add(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "add s30, s29, s28"); TEST_SINGLE(add(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "add d30, d29, d28"); - //TEST_SINGLE(cmtst(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmtst b30, b29, b28"); - //TEST_SINGLE(cmtst(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmtst h30, h29, h28"); - //TEST_SINGLE(cmtst(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmtst s30, s29, s28"); + // TEST_SINGLE(cmtst(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmtst b30, b29, b28"); + // TEST_SINGLE(cmtst(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmtst h30, h29, h28"); + // TEST_SINGLE(cmtst(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmtst s30, s29, s28"); TEST_SINGLE(cmtst(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "cmtst d30, d29, d28"); - //TEST_SINGLE(sqdmulh(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmulh b30, b29, b28"); + // TEST_SINGLE(sqdmulh(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmulh b30, b29, b28"); TEST_SINGLE(sqdmulh(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmulh h30, h29, h28"); TEST_SINGLE(sqdmulh(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmulh s30, s29, s28"); - //TEST_SINGLE(sqdmulh(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmulh d30, d29, d28"); + // TEST_SINGLE(sqdmulh(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmulh d30, d29, d28"); - //TEST_SINGLE(fmulx(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "fmulx b30, b29, b28"); - //TEST_SINGLE(fmulx(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fmulx h30, h29, h28"); + // TEST_SINGLE(fmulx(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "fmulx b30, b29, b28"); + // TEST_SINGLE(fmulx(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fmulx h30, h29, h28"); TEST_SINGLE(fmulx(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fmulx s30, s29, s28"); TEST_SINGLE(fmulx(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fmulx d30, d29, d28"); - //TEST_SINGLE(fcmeq(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "fcmeq b30, b29, b28"); - //TEST_SINGLE(fcmeq(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fcmeq h30, h29, h28"); + // TEST_SINGLE(fcmeq(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "fcmeq b30, b29, b28"); + // TEST_SINGLE(fcmeq(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fcmeq h30, h29, h28"); TEST_SINGLE(fcmeq(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fcmeq s30, s29, s28"); TEST_SINGLE(fcmeq(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fcmeq d30, d29, d28"); - //TEST_SINGLE(frecps(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "frecps b30, b29, b28"); - //TEST_SINGLE(frecps(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "frecps h30, h29, h28"); + // TEST_SINGLE(frecps(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "frecps b30, b29, b28"); + // TEST_SINGLE(frecps(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "frecps h30, h29, h28"); TEST_SINGLE(frecps(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "frecps s30, s29, s28"); TEST_SINGLE(frecps(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "frecps d30, d29, d28"); - //TEST_SINGLE(frsqrts(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "frsqrts b30, b29, b28"); - //TEST_SINGLE(frsqrts(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "frsqrts h30, h29, h28"); + // TEST_SINGLE(frsqrts(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "frsqrts b30, b29, b28"); + // TEST_SINGLE(frsqrts(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "frsqrts h30, h29, h28"); TEST_SINGLE(frsqrts(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "frsqrts s30, s29, s28"); TEST_SINGLE(frsqrts(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "frsqrts d30, d29, d28"); @@ -374,19 +374,19 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar three TEST_SINGLE(uqsub(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "uqsub s30, s29, s28"); TEST_SINGLE(uqsub(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "uqsub d30, d29, d28"); - //TEST_SINGLE(cmhi(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmhi b30, b29, b28"); - //TEST_SINGLE(cmhi(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmhi h30, h29, h28"); - //TEST_SINGLE(cmhi(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmhi s30, s29, s28"); + // TEST_SINGLE(cmhi(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmhi b30, b29, b28"); + // TEST_SINGLE(cmhi(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmhi h30, h29, h28"); + // TEST_SINGLE(cmhi(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmhi s30, s29, s28"); TEST_SINGLE(cmhi(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "cmhi d30, d29, d28"); - //TEST_SINGLE(cmhs(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmhs b30, b29, b28"); - //TEST_SINGLE(cmhs(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmhs h30, h29, h28"); - //TEST_SINGLE(cmhs(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmhs s30, s29, s28"); + // TEST_SINGLE(cmhs(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmhs b30, b29, b28"); + // TEST_SINGLE(cmhs(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmhs h30, h29, h28"); + // TEST_SINGLE(cmhs(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmhs s30, s29, s28"); TEST_SINGLE(cmhs(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "cmhs d30, d29, d28"); - //TEST_SINGLE(ushl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "ushl b30, b29, b28"); - //TEST_SINGLE(ushl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "ushl h30, h29, h28"); - //TEST_SINGLE(ushl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "ushl s30, s29, s28"); + // TEST_SINGLE(ushl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "ushl b30, b29, b28"); + // TEST_SINGLE(ushl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "ushl h30, h29, h28"); + // TEST_SINGLE(ushl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "ushl s30, s29, s28"); TEST_SINGLE(ushl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "ushl d30, d29, d28"); TEST_SINGLE(uqshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "uqshl b30, b29, b28"); @@ -394,9 +394,9 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar three TEST_SINGLE(uqshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "uqshl s30, s29, s28"); TEST_SINGLE(uqshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "uqshl d30, d29, d28"); - //TEST_SINGLE(urshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "urshl b30, b29, b28"); - //TEST_SINGLE(urshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "urshl h30, h29, h28"); - //TEST_SINGLE(urshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "urshl s30, s29, s28"); + // TEST_SINGLE(urshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "urshl b30, b29, b28"); + // TEST_SINGLE(urshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "urshl h30, h29, h28"); + // TEST_SINGLE(urshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "urshl s30, s29, s28"); TEST_SINGLE(urshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "urshl d30, d29, d28"); TEST_SINGLE(uqrshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "uqrshl b30, b29, b28"); @@ -404,43 +404,43 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar three TEST_SINGLE(uqrshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "uqrshl s30, s29, s28"); TEST_SINGLE(uqrshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "uqrshl d30, d29, d28"); - //TEST_SINGLE(sub(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sub b30, b29, b28"); - //TEST_SINGLE(sub(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sub h30, h29, h28"); - //TEST_SINGLE(sub(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sub s30, s29, s28"); + // TEST_SINGLE(sub(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sub b30, b29, b28"); + // TEST_SINGLE(sub(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sub h30, h29, h28"); + // TEST_SINGLE(sub(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sub s30, s29, s28"); TEST_SINGLE(sub(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sub d30, d29, d28"); - //TEST_SINGLE(cmeq(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmeq b30, b29, b28"); - //TEST_SINGLE(cmeq(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmeq h30, h29, h28"); - //TEST_SINGLE(cmeq(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmeq s30, s29, s28"); + // TEST_SINGLE(cmeq(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmeq b30, b29, b28"); + // TEST_SINGLE(cmeq(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmeq h30, h29, h28"); + // TEST_SINGLE(cmeq(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmeq s30, s29, s28"); TEST_SINGLE(cmeq(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "cmeq d30, d29, d28"); - //TEST_SINGLE(sqrdmulh(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqrdmulh b30, b29, b28"); + // TEST_SINGLE(sqrdmulh(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqrdmulh b30, b29, b28"); TEST_SINGLE(sqrdmulh(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqrdmulh h30, h29, h28"); TEST_SINGLE(sqrdmulh(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqrdmulh s30, s29, s28"); - //TEST_SINGLE(sqrdmulh(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqrdmulh d30, d29, d28"); + // TEST_SINGLE(sqrdmulh(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqrdmulh d30, d29, d28"); - //TEST_SINGLE(fcmge(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "fcmge b30, b29, b28"); - //TEST_SINGLE(fcmge(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fcmge h30, h29, h28"); + // TEST_SINGLE(fcmge(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "fcmge b30, b29, b28"); + // TEST_SINGLE(fcmge(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fcmge h30, h29, h28"); TEST_SINGLE(fcmge(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fcmge s30, s29, s28"); TEST_SINGLE(fcmge(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fcmge d30, d29, d28"); - //TEST_SINGLE(facge(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "facge b30, b29, b28"); - //TEST_SINGLE(facge(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "facge h30, h29, h28"); + // TEST_SINGLE(facge(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "facge b30, b29, b28"); + // TEST_SINGLE(facge(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "facge h30, h29, h28"); TEST_SINGLE(facge(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "facge s30, s29, s28"); TEST_SINGLE(facge(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "facge d30, d29, d28"); - //TEST_SINGLE(fabd(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "fabd b30, b29, b28"); - //TEST_SINGLE(fabd(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fabd h30, h29, h28"); + // TEST_SINGLE(fabd(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "fabd b30, b29, b28"); + // TEST_SINGLE(fabd(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fabd h30, h29, h28"); TEST_SINGLE(fabd(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fabd s30, s29, s28"); TEST_SINGLE(fabd(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fabd d30, d29, d28"); - //TEST_SINGLE(fcmgt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "fcmgt b30, b29, b28"); - //TEST_SINGLE(fcmgt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fcmgt h30, h29, h28"); + // TEST_SINGLE(fcmgt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "fcmgt b30, b29, b28"); + // TEST_SINGLE(fcmgt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fcmgt h30, h29, h28"); TEST_SINGLE(fcmgt(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fcmgt s30, s29, s28"); TEST_SINGLE(fcmgt(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fcmgt d30, d29, d28"); - //TEST_SINGLE(facgt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "facgt b30, b29, b28"); - //TEST_SINGLE(facgt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "facgt h30, h29, h28"); + // TEST_SINGLE(facgt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "facgt b30, b29, b28"); + // TEST_SINGLE(facgt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "facgt h30, h29, h28"); TEST_SINGLE(facgt(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "facgt s30, s29, s28"); TEST_SINGLE(facgt(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "facgt d30, d29, d28"); } @@ -452,180 +452,180 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar shift // TEST_SINGLE(sshr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sshr h30, h29, #15"); // TEST_SINGLE(sshr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sshr s30, s29, #1"); // TEST_SINGLE(sshr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sshr s30, s29, #31"); - TEST_SINGLE(sshr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sshr d30, d29, #1"); + TEST_SINGLE(sshr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sshr d30, d29, #1"); TEST_SINGLE(sshr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sshr d30, d29, #63"); - //TEST_SINGLE(ssra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "ssra b30, b29, #1"); - //TEST_SINGLE(ssra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "ssra b30, b29, #7"); - //TEST_SINGLE(ssra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "ssra h30, h29, #1"); - //TEST_SINGLE(ssra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "ssra h30, h29, #15"); - //TEST_SINGLE(ssra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "ssra s30, s29, #1"); - //TEST_SINGLE(ssra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "ssra s30, s29, #31"); - TEST_SINGLE(ssra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "ssra d30, d29, #1"); + // TEST_SINGLE(ssra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "ssra b30, b29, #1"); + // TEST_SINGLE(ssra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "ssra b30, b29, #7"); + // TEST_SINGLE(ssra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "ssra h30, h29, #1"); + // TEST_SINGLE(ssra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "ssra h30, h29, #15"); + // TEST_SINGLE(ssra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "ssra s30, s29, #1"); + // TEST_SINGLE(ssra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "ssra s30, s29, #31"); + TEST_SINGLE(ssra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "ssra d30, d29, #1"); TEST_SINGLE(ssra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "ssra d30, d29, #63"); - //TEST_SINGLE(srshr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "srshr b30, b29, #1"); - //TEST_SINGLE(srshr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "srshr b30, b29, #7"); - //TEST_SINGLE(srshr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "srshr h30, h29, #1"); - //TEST_SINGLE(srshr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "srshr h30, h29, #15"); - //TEST_SINGLE(srshr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "srshr s30, s29, #1"); - //TEST_SINGLE(srshr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "srshr s30, s29, #31"); - TEST_SINGLE(srshr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "srshr d30, d29, #1"); + // TEST_SINGLE(srshr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "srshr b30, b29, #1"); + // TEST_SINGLE(srshr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "srshr b30, b29, #7"); + // TEST_SINGLE(srshr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "srshr h30, h29, #1"); + // TEST_SINGLE(srshr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "srshr h30, h29, #15"); + // TEST_SINGLE(srshr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "srshr s30, s29, #1"); + // TEST_SINGLE(srshr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "srshr s30, s29, #31"); + TEST_SINGLE(srshr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "srshr d30, d29, #1"); TEST_SINGLE(srshr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "srshr d30, d29, #63"); - //TEST_SINGLE(srsra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "srsra b30, b29, #1"); - //TEST_SINGLE(srsra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "srsra b30, b29, #7"); - //TEST_SINGLE(srsra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "srsra h30, h29, #1"); - //TEST_SINGLE(srsra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "srsra h30, h29, #15"); - //TEST_SINGLE(srsra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "srsra s30, s29, #1"); - //TEST_SINGLE(srsra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "srsra s30, s29, #31"); - TEST_SINGLE(srsra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "srsra d30, d29, #1"); + // TEST_SINGLE(srsra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "srsra b30, b29, #1"); + // TEST_SINGLE(srsra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "srsra b30, b29, #7"); + // TEST_SINGLE(srsra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "srsra h30, h29, #1"); + // TEST_SINGLE(srsra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "srsra h30, h29, #15"); + // TEST_SINGLE(srsra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "srsra s30, s29, #1"); + // TEST_SINGLE(srsra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "srsra s30, s29, #31"); + TEST_SINGLE(srsra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "srsra d30, d29, #1"); TEST_SINGLE(srsra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "srsra d30, d29, #63"); - //TEST_SINGLE(shl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "shl b30, b29, #1"); - //TEST_SINGLE(shl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "shl b30, b29, #7"); - //TEST_SINGLE(shl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "shl h30, h29, #1"); - //TEST_SINGLE(shl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "shl h30, h29, #15"); - //TEST_SINGLE(shl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "shl s30, s29, #1"); - //TEST_SINGLE(shl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "shl s30, s29, #31"); - TEST_SINGLE(shl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "shl d30, d29, #1"); + // TEST_SINGLE(shl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "shl b30, b29, #1"); + // TEST_SINGLE(shl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "shl b30, b29, #7"); + // TEST_SINGLE(shl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "shl h30, h29, #1"); + // TEST_SINGLE(shl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "shl h30, h29, #15"); + // TEST_SINGLE(shl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "shl s30, s29, #1"); + // TEST_SINGLE(shl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "shl s30, s29, #31"); + TEST_SINGLE(shl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "shl d30, d29, #1"); TEST_SINGLE(shl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "shl d30, d29, #63"); - TEST_SINGLE(sqshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqshl b30, b29, #1"); - TEST_SINGLE(sqshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqshl b30, b29, #7"); - TEST_SINGLE(sqshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqshl h30, h29, #1"); + TEST_SINGLE(sqshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqshl b30, b29, #1"); + TEST_SINGLE(sqshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqshl b30, b29, #7"); + TEST_SINGLE(sqshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqshl h30, h29, #1"); TEST_SINGLE(sqshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sqshl h30, h29, #15"); - TEST_SINGLE(sqshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqshl s30, s29, #1"); + TEST_SINGLE(sqshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqshl s30, s29, #1"); TEST_SINGLE(sqshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sqshl s30, s29, #31"); - TEST_SINGLE(sqshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sqshl d30, d29, #1"); + TEST_SINGLE(sqshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sqshl d30, d29, #1"); TEST_SINGLE(sqshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sqshl d30, d29, #63"); - TEST_SINGLE(sqshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqshrn b30, h29, #1"); - TEST_SINGLE(sqshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqshrn b30, h29, #7"); - TEST_SINGLE(sqshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqshrn h30, s29, #1"); + TEST_SINGLE(sqshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqshrn b30, h29, #1"); + TEST_SINGLE(sqshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqshrn b30, h29, #7"); + TEST_SINGLE(sqshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqshrn h30, s29, #1"); TEST_SINGLE(sqshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sqshrn h30, s29, #15"); - TEST_SINGLE(sqshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqshrn s30, d29, #1"); + TEST_SINGLE(sqshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqshrn s30, d29, #1"); TEST_SINGLE(sqshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sqshrn s30, d29, #31"); - //TEST_SINGLE(sqshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sqshrn d30, d29, #1"); - //TEST_SINGLE(sqshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sqshrn d30, d29, #63"); + // TEST_SINGLE(sqshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sqshrn d30, d29, #1"); + // TEST_SINGLE(sqshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sqshrn d30, d29, #63"); - TEST_SINGLE(sqrshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqrshrn b30, h29, #1"); - TEST_SINGLE(sqrshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqrshrn b30, h29, #7"); - TEST_SINGLE(sqrshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqrshrn h30, s29, #1"); + TEST_SINGLE(sqrshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqrshrn b30, h29, #1"); + TEST_SINGLE(sqrshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqrshrn b30, h29, #7"); + TEST_SINGLE(sqrshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqrshrn h30, s29, #1"); TEST_SINGLE(sqrshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sqrshrn h30, s29, #15"); - TEST_SINGLE(sqrshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqrshrn s30, d29, #1"); + TEST_SINGLE(sqrshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqrshrn s30, d29, #1"); TEST_SINGLE(sqrshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sqrshrn s30, d29, #31"); - //TEST_SINGLE(sqrshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sqrshrn d30, d29, #1"); - //TEST_SINGLE(sqrshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sqrshrn d30, d29, #63"); + // TEST_SINGLE(sqrshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sqrshrn d30, d29, #1"); + // TEST_SINGLE(sqrshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sqrshrn d30, d29, #63"); // TODO: Implement `SCVTF, FCVTZS` in emitter - //TEST_SINGLE(ushr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "ushr b30, b29, #1"); - //TEST_SINGLE(ushr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "ushr b30, b29, #7"); - //TEST_SINGLE(ushr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "ushr h30, h29, #1"); - //TEST_SINGLE(ushr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "ushr h30, h29, #15"); - //TEST_SINGLE(ushr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "ushr s30, s29, #1"); - //TEST_SINGLE(ushr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "ushr s30, s29, #31"); - TEST_SINGLE(ushr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "ushr d30, d29, #1"); + // TEST_SINGLE(ushr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "ushr b30, b29, #1"); + // TEST_SINGLE(ushr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "ushr b30, b29, #7"); + // TEST_SINGLE(ushr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "ushr h30, h29, #1"); + // TEST_SINGLE(ushr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "ushr h30, h29, #15"); + // TEST_SINGLE(ushr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "ushr s30, s29, #1"); + // TEST_SINGLE(ushr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "ushr s30, s29, #31"); + TEST_SINGLE(ushr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "ushr d30, d29, #1"); TEST_SINGLE(ushr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "ushr d30, d29, #63"); - //TEST_SINGLE(usra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "usra b30, b29, #1"); - //TEST_SINGLE(usra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "usra b30, b29, #7"); - //TEST_SINGLE(usra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "usra h30, h29, #1"); - //TEST_SINGLE(usra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "usra h30, h29, #15"); - //TEST_SINGLE(usra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "usra s30, s29, #1"); - //TEST_SINGLE(usra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "usra s30, s29, #31"); - TEST_SINGLE(usra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "usra d30, d29, #1"); + // TEST_SINGLE(usra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "usra b30, b29, #1"); + // TEST_SINGLE(usra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "usra b30, b29, #7"); + // TEST_SINGLE(usra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "usra h30, h29, #1"); + // TEST_SINGLE(usra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "usra h30, h29, #15"); + // TEST_SINGLE(usra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "usra s30, s29, #1"); + // TEST_SINGLE(usra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "usra s30, s29, #31"); + TEST_SINGLE(usra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "usra d30, d29, #1"); TEST_SINGLE(usra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "usra d30, d29, #63"); - //TEST_SINGLE(urshr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "urshr b30, b29, #1"); - //TEST_SINGLE(urshr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "urshr b30, b29, #7"); - //TEST_SINGLE(urshr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "urshr h30, h29, #1"); - //TEST_SINGLE(urshr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "urshr h30, h29, #15"); - //TEST_SINGLE(urshr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "urshr s30, s29, #1"); - //TEST_SINGLE(urshr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "urshr s30, s29, #31"); - TEST_SINGLE(urshr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "urshr d30, d29, #1"); + // TEST_SINGLE(urshr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "urshr b30, b29, #1"); + // TEST_SINGLE(urshr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "urshr b30, b29, #7"); + // TEST_SINGLE(urshr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "urshr h30, h29, #1"); + // TEST_SINGLE(urshr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "urshr h30, h29, #15"); + // TEST_SINGLE(urshr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "urshr s30, s29, #1"); + // TEST_SINGLE(urshr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "urshr s30, s29, #31"); + TEST_SINGLE(urshr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "urshr d30, d29, #1"); TEST_SINGLE(urshr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "urshr d30, d29, #63"); - //TEST_SINGLE(ursra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "ursra b30, b29, #1"); - //TEST_SINGLE(ursra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "ursra b30, b29, #7"); - //TEST_SINGLE(ursra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "ursra h30, h29, #1"); - //TEST_SINGLE(ursra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "ursra h30, h29, #15"); - //TEST_SINGLE(ursra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "ursra s30, s29, #1"); - //TEST_SINGLE(ursra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "ursra s30, s29, #31"); - TEST_SINGLE(ursra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "ursra d30, d29, #1"); + // TEST_SINGLE(ursra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "ursra b30, b29, #1"); + // TEST_SINGLE(ursra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "ursra b30, b29, #7"); + // TEST_SINGLE(ursra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "ursra h30, h29, #1"); + // TEST_SINGLE(ursra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "ursra h30, h29, #15"); + // TEST_SINGLE(ursra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "ursra s30, s29, #1"); + // TEST_SINGLE(ursra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "ursra s30, s29, #31"); + TEST_SINGLE(ursra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "ursra d30, d29, #1"); TEST_SINGLE(ursra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "ursra d30, d29, #63"); - //TEST_SINGLE(sri(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sri b30, b29, #1"); - //TEST_SINGLE(sri(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sri b30, b29, #7"); - //TEST_SINGLE(sri(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sri h30, h29, #1"); - //TEST_SINGLE(sri(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sri h30, h29, #15"); - //TEST_SINGLE(sri(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sri s30, s29, #1"); - //TEST_SINGLE(sri(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sri s30, s29, #31"); - TEST_SINGLE(sri(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sri d30, d29, #1"); + // TEST_SINGLE(sri(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sri b30, b29, #1"); + // TEST_SINGLE(sri(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sri b30, b29, #7"); + // TEST_SINGLE(sri(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sri h30, h29, #1"); + // TEST_SINGLE(sri(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sri h30, h29, #15"); + // TEST_SINGLE(sri(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sri s30, s29, #1"); + // TEST_SINGLE(sri(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sri s30, s29, #31"); + TEST_SINGLE(sri(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sri d30, d29, #1"); TEST_SINGLE(sri(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sri d30, d29, #63"); - //TEST_SINGLE(sli(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sli b30, b29, #1"); - //TEST_SINGLE(sli(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sli b30, b29, #7"); - //TEST_SINGLE(sli(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sli h30, h29, #1"); - //TEST_SINGLE(sli(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sli h30, h29, #15"); - //TEST_SINGLE(sli(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sli s30, s29, #1"); - //TEST_SINGLE(sli(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sli s30, s29, #31"); - TEST_SINGLE(sli(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sli d30, d29, #1"); + // TEST_SINGLE(sli(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sli b30, b29, #1"); + // TEST_SINGLE(sli(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sli b30, b29, #7"); + // TEST_SINGLE(sli(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sli h30, h29, #1"); + // TEST_SINGLE(sli(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sli h30, h29, #15"); + // TEST_SINGLE(sli(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sli s30, s29, #1"); + // TEST_SINGLE(sli(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sli s30, s29, #31"); + TEST_SINGLE(sli(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sli d30, d29, #1"); TEST_SINGLE(sli(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sli d30, d29, #63"); - TEST_SINGLE(sqshlu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqshlu b30, b29, #1"); - TEST_SINGLE(sqshlu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqshlu b30, b29, #7"); - TEST_SINGLE(sqshlu(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqshlu h30, h29, #1"); + TEST_SINGLE(sqshlu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqshlu b30, b29, #1"); + TEST_SINGLE(sqshlu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqshlu b30, b29, #7"); + TEST_SINGLE(sqshlu(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqshlu h30, h29, #1"); TEST_SINGLE(sqshlu(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sqshlu h30, h29, #15"); - TEST_SINGLE(sqshlu(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqshlu s30, s29, #1"); + TEST_SINGLE(sqshlu(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqshlu s30, s29, #1"); TEST_SINGLE(sqshlu(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sqshlu s30, s29, #31"); - TEST_SINGLE(sqshlu(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sqshlu d30, d29, #1"); + TEST_SINGLE(sqshlu(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sqshlu d30, d29, #1"); TEST_SINGLE(sqshlu(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sqshlu d30, d29, #63"); - TEST_SINGLE(uqshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "uqshl b30, b29, #1"); - TEST_SINGLE(uqshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "uqshl b30, b29, #7"); - TEST_SINGLE(uqshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "uqshl h30, h29, #1"); + TEST_SINGLE(uqshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "uqshl b30, b29, #1"); + TEST_SINGLE(uqshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "uqshl b30, b29, #7"); + TEST_SINGLE(uqshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "uqshl h30, h29, #1"); TEST_SINGLE(uqshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "uqshl h30, h29, #15"); - TEST_SINGLE(uqshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "uqshl s30, s29, #1"); + TEST_SINGLE(uqshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "uqshl s30, s29, #1"); TEST_SINGLE(uqshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "uqshl s30, s29, #31"); - TEST_SINGLE(uqshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "uqshl d30, d29, #1"); + TEST_SINGLE(uqshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "uqshl d30, d29, #1"); TEST_SINGLE(uqshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "uqshl d30, d29, #63"); - TEST_SINGLE(sqshrun(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqshrun b30, h29, #1"); - TEST_SINGLE(sqshrun(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqshrun b30, h29, #7"); - TEST_SINGLE(sqshrun(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqshrun h30, s29, #1"); + TEST_SINGLE(sqshrun(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqshrun b30, h29, #1"); + TEST_SINGLE(sqshrun(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqshrun b30, h29, #7"); + TEST_SINGLE(sqshrun(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqshrun h30, s29, #1"); TEST_SINGLE(sqshrun(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sqshrun h30, s29, #15"); - TEST_SINGLE(sqshrun(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqshrun s30, d29, #1"); + TEST_SINGLE(sqshrun(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqshrun s30, d29, #1"); TEST_SINGLE(sqshrun(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sqshrun s30, d29, #31"); - //TEST_SINGLE(sqshrun(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sqshrun d30, d29, #1"); - //TEST_SINGLE(sqshrun(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sqshrun d30, d29, #63"); + // TEST_SINGLE(sqshrun(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sqshrun d30, d29, #1"); + // TEST_SINGLE(sqshrun(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sqshrun d30, d29, #63"); - TEST_SINGLE(sqrshrun(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqrshrun b30, h29, #1"); - TEST_SINGLE(sqrshrun(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqrshrun b30, h29, #7"); - TEST_SINGLE(sqrshrun(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqrshrun h30, s29, #1"); + TEST_SINGLE(sqrshrun(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqrshrun b30, h29, #1"); + TEST_SINGLE(sqrshrun(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqrshrun b30, h29, #7"); + TEST_SINGLE(sqrshrun(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqrshrun h30, s29, #1"); TEST_SINGLE(sqrshrun(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sqrshrun h30, s29, #15"); - TEST_SINGLE(sqrshrun(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqrshrun s30, d29, #1"); + TEST_SINGLE(sqrshrun(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqrshrun s30, d29, #1"); TEST_SINGLE(sqrshrun(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sqrshrun s30, d29, #31"); - //TEST_SINGLE(sqrshrun(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sqrshrun d30, d29, #1"); - //TEST_SINGLE(sqrshrun(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sqrshrun d30, d29, #63"); + // TEST_SINGLE(sqrshrun(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sqrshrun d30, d29, #1"); + // TEST_SINGLE(sqrshrun(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sqrshrun d30, d29, #63"); - TEST_SINGLE(uqshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "uqshrn b30, h29, #1"); - TEST_SINGLE(uqshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "uqshrn b30, h29, #7"); - TEST_SINGLE(uqshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "uqshrn h30, s29, #1"); + TEST_SINGLE(uqshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "uqshrn b30, h29, #1"); + TEST_SINGLE(uqshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "uqshrn b30, h29, #7"); + TEST_SINGLE(uqshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "uqshrn h30, s29, #1"); TEST_SINGLE(uqshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "uqshrn h30, s29, #15"); - TEST_SINGLE(uqshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "uqshrn s30, d29, #1"); + TEST_SINGLE(uqshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "uqshrn s30, d29, #1"); TEST_SINGLE(uqshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "uqshrn s30, d29, #31"); - //TEST_SINGLE(uqshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "uqshrn d30, d29, #1"); - //TEST_SINGLE(uqshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "uqshrn d30, d29, #63"); + // TEST_SINGLE(uqshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "uqshrn d30, d29, #1"); + // TEST_SINGLE(uqshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "uqshrn d30, d29, #63"); - TEST_SINGLE(uqrshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "uqrshrn b30, h29, #1"); - TEST_SINGLE(uqrshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "uqrshrn b30, h29, #7"); - TEST_SINGLE(uqrshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "uqrshrn h30, s29, #1"); + TEST_SINGLE(uqrshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "uqrshrn b30, h29, #1"); + TEST_SINGLE(uqrshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "uqrshrn b30, h29, #7"); + TEST_SINGLE(uqrshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "uqrshrn h30, s29, #1"); TEST_SINGLE(uqrshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "uqrshrn h30, s29, #15"); - TEST_SINGLE(uqrshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "uqrshrn s30, d29, #1"); + TEST_SINGLE(uqrshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "uqrshrn s30, d29, #1"); TEST_SINGLE(uqrshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "uqrshrn s30, d29, #31"); - //TEST_SINGLE(uqrshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "uqrshrn d30, d29, #1"); - //TEST_SINGLE(uqrshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "uqrshrn d30, d29, #63"); + // TEST_SINGLE(uqrshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "uqrshrn d30, d29, #1"); + // TEST_SINGLE(uqrshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "uqrshrn d30, d29, #63"); // TODO: Implement `UCVTF, FCVTZU' in emitter } @@ -736,7 +736,7 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Floating-point data-process TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Floating-point compare") { // Commented out lines showcase unallocated encodings. - //TEST_SINGLE(fcmp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcmp b30, b29"); + // TEST_SINGLE(fcmp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcmp b30, b29"); TEST_SINGLE(fcmp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcmp h30, h29"); TEST_SINGLE(fcmp(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcmp s30, s29"); TEST_SINGLE(fcmp(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcmp d30, d29"); diff --git a/Scripts/clang-format.py b/Scripts/clang-format.py new file mode 100644 index 0000000000..c23654eb2d --- /dev/null +++ b/Scripts/clang-format.py @@ -0,0 +1,70 @@ +import subprocess +import sys +import os +import re +import fnmatch + +# Wrapper globals +project_root = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..") +ignore_file_path = os.path.join(project_root, ".clang-format-ignore") +clang_format_command = "clang-format" + + +def glob_to_regex(pattern): + # Normalize directory separators + pattern = pattern.replace("\\", "/") + return fnmatch.translate(pattern) + + +def load_ignore_patterns(ignore_file_path): + with open(ignore_file_path, "r") as file: + lines = file.readlines() + + patterns = [] + for line in lines: + line = line.strip() + if line and not line.startswith("#"): # Ignore empty lines and comments + pattern = glob_to_regex(line) + patterns.append(re.compile(pattern)) + return patterns + + +def normalize_path(file_path): + absolute_path = os.path.abspath(file_path) + normalized_path = absolute_path.replace("\\", "/") + return normalized_path + + +def should_ignore(file_path, ignore_patterns): + normalized_path = normalize_path(file_path) + relative_path = os.path.relpath(normalized_path, start=project_root).replace( + "\\", "/" + ) + for pattern in ignore_patterns: + if pattern.match(relative_path): + return True + return False + + +def find_valid_file_paths(args): + return [arg for arg in args if os.path.isfile(arg)] + + +def main(): + ignore_patterns = load_ignore_patterns(ignore_file_path) + valid_paths = find_valid_file_paths(sys.argv[1:]) + + if len(valid_paths) != 1: + print("Error: Expected exactly one valid file path as argument.") + sys.exit(1) + + file_path = valid_paths[0] + if should_ignore(file_path, ignore_patterns): + print(f"Ignoring {file_path} based on ignore patterns.") + return + + subprocess.run([clang_format_command] + sys.argv[1:], check=True) + + +if __name__ == "__main__": + main() diff --git a/Source/Common/ArgumentLoader.cpp b/Source/Common/ArgumentLoader.cpp index 622e88f575..7a20f0abd8 100644 --- a/Source/Common/ArgumentLoader.cpp +++ b/Source/Common/ArgumentLoader.cpp @@ -11,55 +11,55 @@ #include namespace FEX::ArgLoader { - fextl::vector RemainingArgs; - fextl::vector ProgramArguments; +fextl::vector RemainingArgs; +fextl::vector ProgramArguments; - static fextl::string Version = "FEX-Emu (" GIT_DESCRIBE_STRING ") "; - void FEX::ArgLoader::ArgLoader::Load() { - optparse::OptionParser Parser{}; - Parser.version(Version); - optparse::OptionGroup CPUGroup(Parser, "CPU Core options"); - optparse::OptionGroup EmulationGroup(Parser, "Emulation options"); - optparse::OptionGroup DebugGroup(Parser, "Debug options"); - optparse::OptionGroup HacksGroup(Parser, "Hacks options"); - optparse::OptionGroup MiscGroup(Parser, "Miscellaneous options"); - optparse::OptionGroup LoggingGroup(Parser, "Logging options"); +static fextl::string Version = "FEX-Emu (" GIT_DESCRIBE_STRING ") "; +void FEX::ArgLoader::ArgLoader::Load() { + optparse::OptionParser Parser {}; + Parser.version(Version); + optparse::OptionGroup CPUGroup(Parser, "CPU Core options"); + optparse::OptionGroup EmulationGroup(Parser, "Emulation options"); + optparse::OptionGroup DebugGroup(Parser, "Debug options"); + optparse::OptionGroup HacksGroup(Parser, "Hacks options"); + optparse::OptionGroup MiscGroup(Parser, "Miscellaneous options"); + optparse::OptionGroup LoggingGroup(Parser, "Logging options"); #define BEFORE_PARSE #include - Parser.add_option_group(CPUGroup); - Parser.add_option_group(EmulationGroup); - Parser.add_option_group(DebugGroup); - Parser.add_option_group(HacksGroup); - Parser.add_option_group(MiscGroup); - Parser.add_option_group(LoggingGroup); + Parser.add_option_group(CPUGroup); + Parser.add_option_group(EmulationGroup); + Parser.add_option_group(DebugGroup); + Parser.add_option_group(HacksGroup); + Parser.add_option_group(MiscGroup); + Parser.add_option_group(LoggingGroup); - optparse::Values Options = Parser.parse_args(argc, argv); + optparse::Values Options = Parser.parse_args(argc, argv); - using int32 = int32_t; - using uint32 = uint32_t; + using int32 = int32_t; + using uint32 = uint32_t; #define AFTER_PARSE #include - RemainingArgs = Parser.args(); - ProgramArguments = Parser.parsed_args(); - } - - void LoadWithoutArguments(int _argc, char **_argv) { - // Skip argument 0, which will be the interpreter - for (int i = 1; i < _argc; ++i) { - RemainingArgs.emplace_back(_argv[i]); - } + RemainingArgs = Parser.args(); + ProgramArguments = Parser.parsed_args(); +} - // Put the interpreter in ProgramArguments - ProgramArguments.emplace_back(_argv[0]); +void LoadWithoutArguments(int _argc, char** _argv) { + // Skip argument 0, which will be the interpreter + for (int i = 1; i < _argc; ++i) { + RemainingArgs.emplace_back(_argv[i]); } - fextl::vector Get() { - return RemainingArgs; - } - fextl::vector GetParsedArgs() { - return ProgramArguments; - } + // Put the interpreter in ProgramArguments + ProgramArguments.emplace_back(_argv[0]); +} +fextl::vector Get() { + return RemainingArgs; } +fextl::vector GetParsedArgs() { + return ProgramArguments; +} + +} // namespace FEX::ArgLoader diff --git a/Source/Common/Config.cpp b/Source/Common/Config.cpp index 5a1aaef50e..992caf2545 100644 --- a/Source/Common/Config.cpp +++ b/Source/Common/Config.cpp @@ -39,35 +39,34 @@ namespace JSON { return &*alloc->json_objects->emplace(alloc->json_objects->end()); } - static void LoadJSonConfig(const fextl::string &Config, std::function Func) { + static void LoadJSonConfig(const fextl::string& Config, std::function Func) { fextl::vector Data; if (!FEXCore::FileLoading::LoadFile(Data, Config)) { return; } JsonAllocator Pool { - .PoolObject = { - .init = PoolInit, - .alloc = PoolAlloc, - }, + .PoolObject = + { + .init = PoolInit, + .alloc = PoolAlloc, + }, }; - json_t const *json = json_createWithPool(&Data.at(0), &Pool.PoolObject); + const json_t* json = json_createWithPool(&Data.at(0), &Pool.PoolObject); if (!json) { LogMan::Msg::EFmt("Couldn't create json"); return; } - json_t const* ConfigList = json_getProperty(json, "Config"); + const json_t* ConfigList = json_getProperty(json, "Config"); if (!ConfigList) { // This is a non-error if the configuration file exists but no Config section return; } - for (json_t const* ConfigItem = json_getChild(ConfigList); - ConfigItem != nullptr; - ConfigItem = json_getSibling(ConfigItem)) { + for (const json_t* ConfigItem = json_getChild(ConfigList); ConfigItem != nullptr; ConfigItem = json_getSibling(ConfigItem)) { const char* ConfigName = json_getName(ConfigItem); const char* ConfigString = json_getValue(ConfigItem); @@ -84,473 +83,444 @@ namespace JSON { Func(ConfigName, ConfigString); } } -} +} // namespace JSON - static const fextl::map ConfigToNameLookup = {{ +static const fextl::map ConfigToNameLookup = {{ #define OPT_BASE(type, group, enum, json, default) {FEXCore::Config::ConfigOption::CONFIG_##enum, #json}, #include - }}; - - void SaveLayerToJSON(const fextl::string& Filename, FEXCore::Config::Layer *const Layer) { - char Buffer[4096]; - char *Dest{}; - Dest = json_objOpen(Buffer, nullptr); - Dest = json_objOpen(Dest, "Config"); - for (auto &it : Layer->GetOptionMap()) { - auto &Name = ConfigToNameLookup.find(it.first)->second; - for (auto &var : it.second) { - Dest = json_str(Dest, Name.c_str(), var.c_str()); - } +}}; + +void SaveLayerToJSON(const fextl::string& Filename, FEXCore::Config::Layer* const Layer) { + char Buffer[4096]; + char* Dest {}; + Dest = json_objOpen(Buffer, nullptr); + Dest = json_objOpen(Dest, "Config"); + for (auto& it : Layer->GetOptionMap()) { + auto& Name = ConfigToNameLookup.find(it.first)->second; + for (auto& var : it.second) { + Dest = json_str(Dest, Name.c_str(), var.c_str()); } - Dest = json_objClose(Dest); - Dest = json_objClose(Dest); - json_end(Dest); + } + Dest = json_objClose(Dest); + Dest = json_objClose(Dest); + json_end(Dest); - auto File = FEXCore::File::File(Filename.c_str(), - FEXCore::File::FileModes::WRITE | - FEXCore::File::FileModes::CREATE | - FEXCore::File::FileModes::TRUNCATE); + auto File = FEXCore::File::File(Filename.c_str(), + FEXCore::File::FileModes::WRITE | FEXCore::File::FileModes::CREATE | FEXCore::File::FileModes::TRUNCATE); - if (File.IsValid()) { - File.Write(Buffer, strlen(Buffer)); - } + if (File.IsValid()) { + File.Write(Buffer, strlen(Buffer)); } +} - // Application loaders - class OptionMapper : public FEXCore::Config::Layer { - public: - explicit OptionMapper(FEXCore::Config::LayerType Layer); +// Application loaders +class OptionMapper : public FEXCore::Config::Layer { +public: + explicit OptionMapper(FEXCore::Config::LayerType Layer); - protected: - void MapNameToOption(const char *ConfigName, const char *ConfigString); - }; +protected: + void MapNameToOption(const char* ConfigName, const char* ConfigString); +}; - class MainLoader final : public OptionMapper { - public: - explicit MainLoader(FEXCore::Config::LayerType Type); - explicit MainLoader(fextl::string ConfigFile); - void Load() override; +class MainLoader final : public OptionMapper { +public: + explicit MainLoader(FEXCore::Config::LayerType Type); + explicit MainLoader(fextl::string ConfigFile); + void Load() override; - private: - fextl::string Config; - }; +private: + fextl::string Config; +}; - class AppLoader final : public OptionMapper { - public: - explicit AppLoader(const fextl::string& Filename, FEXCore::Config::LayerType Type); - void Load(); +class AppLoader final : public OptionMapper { +public: + explicit AppLoader(const fextl::string& Filename, FEXCore::Config::LayerType Type); + void Load(); - private: - fextl::string Config; - }; +private: + fextl::string Config; +}; - class EnvLoader final : public FEXCore::Config::Layer { - public: - explicit EnvLoader(char *const _envp[]); - void Load() override; +class EnvLoader final : public FEXCore::Config::Layer { +public: + explicit EnvLoader(char* const _envp[]); + void Load() override; - private: - char *const *envp; - }; +private: + char* const* envp; +}; - static const fextl::map> ConfigLookup = {{ +static const fextl::map> ConfigLookup = {{ #define OPT_BASE(type, group, enum, json, default) {#json, FEXCore::Config::ConfigOption::CONFIG_##enum}, #include - }}; +}}; - OptionMapper::OptionMapper(FEXCore::Config::LayerType Layer) - : FEXCore::Config::Layer(Layer) { - } +OptionMapper::OptionMapper(FEXCore::Config::LayerType Layer) + : FEXCore::Config::Layer(Layer) {} - void OptionMapper::MapNameToOption(const char *ConfigName, const char *ConfigString) { - auto it = ConfigLookup.find(ConfigName); - if (it != ConfigLookup.end()) { - const auto KeyOption = it->second; - const auto KeyName = std::string_view(ConfigName); - const auto Value_View = std::string_view(ConfigString); +void OptionMapper::MapNameToOption(const char* ConfigName, const char* ConfigString) { + auto it = ConfigLookup.find(ConfigName); + if (it != ConfigLookup.end()) { + const auto KeyOption = it->second; + const auto KeyName = std::string_view(ConfigName); + const auto Value_View = std::string_view(ConfigString); #define JSONLOADER #include - } } +} - static const fextl::vector> EnvConfigLookup = {{ +static const fextl::vector> EnvConfigLookup = {{ #define OPT_BASE(type, group, enum, json, default) {"FEX_" #enum, FEXCore::Config::ConfigOption::CONFIG_##enum}, #include - }}; +}}; - MainLoader::MainLoader(FEXCore::Config::LayerType Type) - : OptionMapper(Type) - , Config{FEXCore::Config::GetConfigFileLocation(Type == FEXCore::Config::LayerType::LAYER_GLOBAL_MAIN)} { - } +MainLoader::MainLoader(FEXCore::Config::LayerType Type) + : OptionMapper(Type) + , Config {FEXCore::Config::GetConfigFileLocation(Type == FEXCore::Config::LayerType::LAYER_GLOBAL_MAIN)} {} - MainLoader::MainLoader(fextl::string ConfigFile) - : OptionMapper(FEXCore::Config::LayerType::LAYER_MAIN) - , Config{std::move(ConfigFile)} { - } +MainLoader::MainLoader(fextl::string ConfigFile) + : OptionMapper(FEXCore::Config::LayerType::LAYER_MAIN) + , Config {std::move(ConfigFile)} {} - void MainLoader::Load() { - JSON::LoadJSonConfig(Config, [this](const char *Name, const char *ConfigString) { - MapNameToOption(Name, ConfigString); - }); - } +void MainLoader::Load() { + JSON::LoadJSonConfig(Config, [this](const char* Name, const char* ConfigString) { MapNameToOption(Name, ConfigString); }); +} - AppLoader::AppLoader(const fextl::string& Filename, FEXCore::Config::LayerType Type) - : OptionMapper(Type) { - const bool Global = Type == FEXCore::Config::LayerType::LAYER_GLOBAL_STEAM_APP || - Type == FEXCore::Config::LayerType::LAYER_GLOBAL_APP; - Config = FEXCore::Config::GetApplicationConfig(Filename, Global); +AppLoader::AppLoader(const fextl::string& Filename, FEXCore::Config::LayerType Type) + : OptionMapper(Type) { + const bool Global = Type == FEXCore::Config::LayerType::LAYER_GLOBAL_STEAM_APP || Type == FEXCore::Config::LayerType::LAYER_GLOBAL_APP; + Config = FEXCore::Config::GetApplicationConfig(Filename, Global); - // Immediately load so we can reload the meta layer - Load(); - } + // Immediately load so we can reload the meta layer + Load(); +} - void AppLoader::Load() { - JSON::LoadJSonConfig(Config, [this](const char *Name, const char *ConfigString) { - MapNameToOption(Name, ConfigString); - }); - } +void AppLoader::Load() { + JSON::LoadJSonConfig(Config, [this](const char* Name, const char* ConfigString) { MapNameToOption(Name, ConfigString); }); +} - EnvLoader::EnvLoader(char *const _envp[]) - : FEXCore::Config::Layer(FEXCore::Config::LayerType::LAYER_ENVIRONMENT) - , envp {_envp} { - } +EnvLoader::EnvLoader(char* const _envp[]) + : FEXCore::Config::Layer(FEXCore::Config::LayerType::LAYER_ENVIRONMENT) + , envp {_envp} {} - void EnvLoader::Load() { - using EnvMapType = fextl::unordered_map; - EnvMapType EnvMap; +void EnvLoader::Load() { + using EnvMapType = fextl::unordered_map; + EnvMapType EnvMap; - for(const char *const *pvar=envp; pvar && *pvar; pvar++) { - std::string_view Var(*pvar); - size_t pos = Var.rfind('='); - if (fextl::string::npos == pos) - continue; + for (const char* const* pvar = envp; pvar && *pvar; pvar++) { + std::string_view Var(*pvar); + size_t pos = Var.rfind('='); + if (fextl::string::npos == pos) { + continue; + } - std::string_view Key = Var.substr(0,pos); - std::string_view Value_View {Var.substr(pos+1)}; - std::optional Value; + std::string_view Key = Var.substr(0, pos); + std::string_view Value_View {Var.substr(pos + 1)}; + std::optional Value; #define ENVLOADER #include - if (Value) { - EnvMap.insert_or_assign(Key, *Value); - } - else { - EnvMap.insert_or_assign(Key, Value_View); - } + if (Value) { + EnvMap.insert_or_assign(Key, *Value); + } else { + EnvMap.insert_or_assign(Key, Value_View); } + } - auto GetVar = [](EnvMapType &EnvMap, const std::string_view id) -> std::optional { - if (EnvMap.find(id) != EnvMap.end()) - return EnvMap.at(id); + auto GetVar = [](EnvMapType& EnvMap, const std::string_view id) -> std::optional { + if (EnvMap.find(id) != EnvMap.end()) { + return EnvMap.at(id); + } - // If envp[] was empty, search using std::getenv() - const char* vs = std::getenv(id.data()); - if (vs) { - return vs; - } - else { - return std::nullopt; - } - }; + // If envp[] was empty, search using std::getenv() + const char* vs = std::getenv(id.data()); + if (vs) { + return vs; + } else { + return std::nullopt; + } + }; - std::optional Value; + std::optional Value; - for (auto &it : EnvConfigLookup) { - if ((Value = GetVar(EnvMap, it.first)).has_value()) { - Set(it.second, fextl::string(*Value)); - } + for (auto& it : EnvConfigLookup) { + if ((Value = GetVar(EnvMap, it.first)).has_value()) { + Set(it.second, fextl::string(*Value)); } } +} - fextl::unique_ptr CreateGlobalMainLayer() { - return fextl::make_unique(FEXCore::Config::LayerType::LAYER_GLOBAL_MAIN); - } +fextl::unique_ptr CreateGlobalMainLayer() { + return fextl::make_unique(FEXCore::Config::LayerType::LAYER_GLOBAL_MAIN); +} - fextl::unique_ptr CreateMainLayer(fextl::string const *File) { - if (File) { - return fextl::make_unique(*File); - } - else { - return fextl::make_unique(FEXCore::Config::LayerType::LAYER_MAIN); - } +fextl::unique_ptr CreateMainLayer(const fextl::string* File) { + if (File) { + return fextl::make_unique(*File); + } else { + return fextl::make_unique(FEXCore::Config::LayerType::LAYER_MAIN); } +} - fextl::unique_ptr CreateAppLayer(const fextl::string& Filename, FEXCore::Config::LayerType Type) { - return fextl::make_unique(Filename, Type); - } +fextl::unique_ptr CreateAppLayer(const fextl::string& Filename, FEXCore::Config::LayerType Type) { + return fextl::make_unique(Filename, Type); +} - fextl::unique_ptr CreateEnvironmentLayer(char *const _envp[]) { - return fextl::make_unique(_envp); - } +fextl::unique_ptr CreateEnvironmentLayer(char* const _envp[]) { + return fextl::make_unique(_envp); +} - fextl::string RecoverGuestProgramFilename(fextl::string Program, bool ExecFDInterp, const std::string_view ProgramFDFromEnv) { - // If executed with a FEX FD then the Program argument might be empty. - // In this case we need to scan the FD node to recover the application binary that exists on disk. - // Only do this if the Program argument is empty, since we would prefer the application's expectation - // of application name. - if (!ProgramFDFromEnv.empty() && Program.empty()) { - // Get the `dev` node of the execveat fd string. - Program = "/dev/fd/"; - Program += ProgramFDFromEnv; - } +fextl::string RecoverGuestProgramFilename(fextl::string Program, bool ExecFDInterp, const std::string_view ProgramFDFromEnv) { + // If executed with a FEX FD then the Program argument might be empty. + // In this case we need to scan the FD node to recover the application binary that exists on disk. + // Only do this if the Program argument is empty, since we would prefer the application's expectation + // of application name. + if (!ProgramFDFromEnv.empty() && Program.empty()) { + // Get the `dev` node of the execveat fd string. + Program = "/dev/fd/"; + Program += ProgramFDFromEnv; + } - // If we were provided a relative path then we need to canonicalize it to become absolute. - // If the program name isn't resolved to an absolute path then glibc breaks inside it's `_dl_get_origin` function. - // This is because we rewrite `/proc/self/exe` to the absolute program path calculated in here. - if (!Program.starts_with('/')) { - char ExistsTempPath[PATH_MAX]; - char *RealPath = FHU::Filesystem::Absolute(Program.c_str(), ExistsTempPath); - if (RealPath) { - Program = RealPath; - } + // If we were provided a relative path then we need to canonicalize it to become absolute. + // If the program name isn't resolved to an absolute path then glibc breaks inside it's `_dl_get_origin` function. + // This is because we rewrite `/proc/self/exe` to the absolute program path calculated in here. + if (!Program.starts_with('/')) { + char ExistsTempPath[PATH_MAX]; + char* RealPath = FHU::Filesystem::Absolute(Program.c_str(), ExistsTempPath); + if (RealPath) { + Program = RealPath; } + } - // If FEX was invoked through an FD path (either binfmt_misc or execveat) then we need to check the - // Program to see if it is a symlink to find the real path. - // - // binfmt_misc: Arg[0] is actually the execve `pathname` argument or `/dev/fd/` path - // - `pathname` with execve (See Side Note) - // - FD path with execveat and FD doesn't have an existing file on the disk - // - // ProgramFDFromEnv: Arg[0] is Application provided data or `/dev/fd/` from above fix-up. - // - execveat was either passed no arguments (argv=NULL) or the first argument is an empty string (argv[0]=""). - // - FD path with execveat and FD doesn't have an existing file on the disk - // - // Side Note: - // The `execve` syscall doesn't take an FD but binfmt_misc will give FEX an FD to execute still. - // Arg[0] will always contain the `pathname` argument provided to execve. - // It does not resolve symlinks, and it does not convert the path to absolute. - // - // Examples: - // - Regular execve. Application must exist on disk. - // execve binfmt_misc args layout: `FEXInterpreter ...` - // - Regular execveat with FD. FD is backed by application on disk. - // execveat binfmt_misc args layout: `FEXInterpreter ...` - // - Regular execveat with FD. FD points to file on disk that has been deleted. - // execveat binfmt_misc args layout: `FEXInterpreter /dev/fd/ ...` + // If FEX was invoked through an FD path (either binfmt_misc or execveat) then we need to check the + // Program to see if it is a symlink to find the real path. + // + // binfmt_misc: Arg[0] is actually the execve `pathname` argument or `/dev/fd/` path + // - `pathname` with execve (See Side Note) + // - FD path with execveat and FD doesn't have an existing file on the disk + // + // ProgramFDFromEnv: Arg[0] is Application provided data or `/dev/fd/` from above fix-up. + // - execveat was either passed no arguments (argv=NULL) or the first argument is an empty string (argv[0]=""). + // - FD path with execveat and FD doesn't have an existing file on the disk + // + // Side Note: + // The `execve` syscall doesn't take an FD but binfmt_misc will give FEX an FD to execute still. + // Arg[0] will always contain the `pathname` argument provided to execve. + // It does not resolve symlinks, and it does not convert the path to absolute. + // + // Examples: + // - Regular execve. Application must exist on disk. + // execve binfmt_misc args layout: `FEXInterpreter ...` + // - Regular execveat with FD. FD is backed by application on disk. + // execveat binfmt_misc args layout: `FEXInterpreter ...` + // - Regular execveat with FD. FD points to file on disk that has been deleted. + // execveat binfmt_misc args layout: `FEXInterpreter /dev/fd/ ...` #ifndef _WIN32 - if (ExecFDInterp || !ProgramFDFromEnv.empty()) { - // Only in the case that FEX is executing an FD will the program argument potentially be a symlink. - // This symlink will be in the style of `/dev/fd/`. - // - // If the argument /is/ a symlink then resolve its path to get the original application name. - if (FHU::Symlinks::IsSymlink(Program)) { - char Filename[PATH_MAX]; - auto SymlinkPath = FHU::Symlinks::ResolveSymlink(Program, Filename); - if (SymlinkPath.starts_with('/')) { - // This file was executed through an FD. - // Remove the ` (deleted)` text if the file was deleted after the fact. - // Otherwise just get the symlink without the deleted text. - return fextl::string{SymlinkPath.substr(0, SymlinkPath.rfind(" (deleted)"))}; - } + if (ExecFDInterp || !ProgramFDFromEnv.empty()) { + // Only in the case that FEX is executing an FD will the program argument potentially be a symlink. + // This symlink will be in the style of `/dev/fd/`. + // + // If the argument /is/ a symlink then resolve its path to get the original application name. + if (FHU::Symlinks::IsSymlink(Program)) { + char Filename[PATH_MAX]; + auto SymlinkPath = FHU::Symlinks::ResolveSymlink(Program, Filename); + if (SymlinkPath.starts_with('/')) { + // This file was executed through an FD. + // Remove the ` (deleted)` text if the file was deleted after the fact. + // Otherwise just get the symlink without the deleted text. + return fextl::string {SymlinkPath.substr(0, SymlinkPath.rfind(" (deleted)"))}; } } + } #endif - return Program; - } + return Program; +} - ApplicationNames LoadConfig( - bool NoFEXArguments, - bool LoadProgramConfig, - int argc, - char **argv, - char **const envp, - bool ExecFDInterp, - const std::string_view ProgramFDFromEnv) { - FEX::Config::InitializeConfigs(); - FEXCore::Config::Initialize(); - FEXCore::Config::AddLayer(CreateGlobalMainLayer()); - FEXCore::Config::AddLayer(CreateMainLayer()); - - if (NoFEXArguments) { - FEX::ArgLoader::LoadWithoutArguments(argc, argv); - } - else { - FEXCore::Config::AddLayer(fextl::make_unique(argc, argv)); - } +ApplicationNames LoadConfig(bool NoFEXArguments, bool LoadProgramConfig, int argc, char** argv, char** const envp, bool ExecFDInterp, + const std::string_view ProgramFDFromEnv) { + FEX::Config::InitializeConfigs(); + FEXCore::Config::Initialize(); + FEXCore::Config::AddLayer(CreateGlobalMainLayer()); + FEXCore::Config::AddLayer(CreateMainLayer()); + + if (NoFEXArguments) { + FEX::ArgLoader::LoadWithoutArguments(argc, argv); + } else { + FEXCore::Config::AddLayer(fextl::make_unique(argc, argv)); + } - FEXCore::Config::AddLayer(CreateEnvironmentLayer(envp)); - FEXCore::Config::Load(); + FEXCore::Config::AddLayer(CreateEnvironmentLayer(envp)); + FEXCore::Config::Load(); - auto Args = FEX::ArgLoader::Get(); + auto Args = FEX::ArgLoader::Get(); - if (LoadProgramConfig) { - if (Args.empty()) { - // Early exit if we weren't passed an argument - return {}; - } + if (LoadProgramConfig) { + if (Args.empty()) { + // Early exit if we weren't passed an argument + return {}; + } - Args[0] = RecoverGuestProgramFilename(std::move(Args[0]), ExecFDInterp, ProgramFDFromEnv); - fextl::string& Program = Args[0]; - - bool Wine = false; - fextl::string ProgramName; - for (size_t CurrentProgramNameIndex = 0; CurrentProgramNameIndex < Args.size(); ++CurrentProgramNameIndex) { - auto CurrentProgramName = FHU::Filesystem::GetFilename(Args[CurrentProgramNameIndex]); - - if (CurrentProgramName == "wine-preloader" || - CurrentProgramName == "wine64-preloader") { - // Wine preloader is required to be in the format of `wine-preloader ` - // The preloader doesn't execve the executable, instead maps it directly itself - // Skip the next argument since we know it is wine (potentially with custom wine executable name) - ++CurrentProgramNameIndex; - Wine = true; - } - else if(CurrentProgramName == "wine" || - CurrentProgramName == "wine64") { - // Next argument, this isn't the program we want - // - // If we are running wine or wine64 then we should check the next argument for the application name instead. - // wine will change the active program name with `setprogname` or `prctl(PR_SET_NAME`. - // Since FEX needs this data far earlier than libraries we need a different check. - Wine = true; - } - else { - if (Wine == true) { - // If this was path separated with '\' then we need to check that. - auto WinSeparator = CurrentProgramName.find_last_of('\\'); - if (WinSeparator != CurrentProgramName.npos) { - // Used windows separators - CurrentProgramName = CurrentProgramName.substr(WinSeparator + 1); - } + Args[0] = RecoverGuestProgramFilename(std::move(Args[0]), ExecFDInterp, ProgramFDFromEnv); + fextl::string& Program = Args[0]; + + bool Wine = false; + fextl::string ProgramName; + for (size_t CurrentProgramNameIndex = 0; CurrentProgramNameIndex < Args.size(); ++CurrentProgramNameIndex) { + auto CurrentProgramName = FHU::Filesystem::GetFilename(Args[CurrentProgramNameIndex]); + + if (CurrentProgramName == "wine-preloader" || CurrentProgramName == "wine64-preloader") { + // Wine preloader is required to be in the format of `wine-preloader ` + // The preloader doesn't execve the executable, instead maps it directly itself + // Skip the next argument since we know it is wine (potentially with custom wine executable name) + ++CurrentProgramNameIndex; + Wine = true; + } else if (CurrentProgramName == "wine" || CurrentProgramName == "wine64") { + // Next argument, this isn't the program we want + // + // If we are running wine or wine64 then we should check the next argument for the application name instead. + // wine will change the active program name with `setprogname` or `prctl(PR_SET_NAME`. + // Since FEX needs this data far earlier than libraries we need a different check. + Wine = true; + } else { + if (Wine == true) { + // If this was path separated with '\' then we need to check that. + auto WinSeparator = CurrentProgramName.find_last_of('\\'); + if (WinSeparator != CurrentProgramName.npos) { + // Used windows separators + CurrentProgramName = CurrentProgramName.substr(WinSeparator + 1); } - - ProgramName = CurrentProgramName; - - // Past any wine program names - break; } - } - FEXCore::Config::AddLayer(CreateAppLayer(ProgramName, FEXCore::Config::LayerType::LAYER_GLOBAL_APP)); - FEXCore::Config::AddLayer(CreateAppLayer(ProgramName, FEXCore::Config::LayerType::LAYER_LOCAL_APP)); + ProgramName = CurrentProgramName; - auto SteamID = getenv("SteamAppId"); - if (SteamID) { - // If a SteamID exists then let's search for Steam application configs as well. - // We want to key off both the SteamAppId number /and/ the executable since we may not want to thunk all binaries. - fextl::string SteamAppName = fextl::fmt::format("Steam_{}_{}", SteamID, ProgramName); - FEXCore::Config::AddLayer(CreateAppLayer(SteamAppName, FEXCore::Config::LayerType::LAYER_GLOBAL_STEAM_APP)); - FEXCore::Config::AddLayer(CreateAppLayer(SteamAppName, FEXCore::Config::LayerType::LAYER_LOCAL_STEAM_APP)); + // Past any wine program names + break; } + } - return ApplicationNames{std::move(Program), std::move(ProgramName)}; + FEXCore::Config::AddLayer(CreateAppLayer(ProgramName, FEXCore::Config::LayerType::LAYER_GLOBAL_APP)); + FEXCore::Config::AddLayer(CreateAppLayer(ProgramName, FEXCore::Config::LayerType::LAYER_LOCAL_APP)); + + auto SteamID = getenv("SteamAppId"); + if (SteamID) { + // If a SteamID exists then let's search for Steam application configs as well. + // We want to key off both the SteamAppId number /and/ the executable since we may not want to thunk all binaries. + fextl::string SteamAppName = fextl::fmt::format("Steam_{}_{}", SteamID, ProgramName); + FEXCore::Config::AddLayer(CreateAppLayer(SteamAppName, FEXCore::Config::LayerType::LAYER_GLOBAL_STEAM_APP)); + FEXCore::Config::AddLayer(CreateAppLayer(SteamAppName, FEXCore::Config::LayerType::LAYER_LOCAL_STEAM_APP)); } - return {}; + + return ApplicationNames {std::move(Program), std::move(ProgramName)}; } + return {}; +} #ifndef _WIN32 - char const* FindUserHomeThroughUID() { - auto passwd = getpwuid(geteuid()); - if (passwd) { - return passwd->pw_dir; - } - return nullptr; +const char* FindUserHomeThroughUID() { + auto passwd = getpwuid(geteuid()); + if (passwd) { + return passwd->pw_dir; } + return nullptr; +} - const char *GetHomeDirectory() { - char const *HomeDir = getenv("HOME"); - - // Try to get home directory from uid - if (!HomeDir) { - HomeDir = FindUserHomeThroughUID(); - } +const char* GetHomeDirectory() { + const char* HomeDir = getenv("HOME"); - // try the PWD - if (!HomeDir) { - HomeDir = getenv("PWD"); - } + // Try to get home directory from uid + if (!HomeDir) { + HomeDir = FindUserHomeThroughUID(); + } - // Still doesn't exit? You get local - if (!HomeDir) { - HomeDir = "."; - } + // try the PWD + if (!HomeDir) { + HomeDir = getenv("PWD"); + } - return HomeDir; + // Still doesn't exit? You get local + if (!HomeDir) { + HomeDir = "."; } -#else - const char *GetHomeDirectory() { - const char *HomeObjectPath = getenv("WINEHOMEDIR"); - if (!HomeObjectPath) { - return nullptr; - } - // Skip over the \??\ prefix in the NT path since we want a DOS path - return HomeObjectPath + 4; + return HomeDir; +} +#else +const char* GetHomeDirectory() { + const char* HomeObjectPath = getenv("WINEHOMEDIR"); + if (!HomeObjectPath) { + return nullptr; } -#endif - fextl::string GetDataDirectory() { - fextl::string DataDir{}; + // Skip over the \??\ prefix in the NT path since we want a DOS path + return HomeObjectPath + 4; +} +#endif - char const *HomeDir = GetHomeDirectory(); - char const *DataXDG = getenv("XDG_DATA_HOME"); - char const *DataOverride = getenv("FEX_APP_DATA_LOCATION"); - if (DataOverride) { - // Data override will override the complete directory - DataDir = DataOverride; - } - else { - DataDir = DataXDG ?: HomeDir; - DataDir += "/.fex-emu/"; - } - return DataDir; +fextl::string GetDataDirectory() { + fextl::string DataDir {}; + + const char* HomeDir = GetHomeDirectory(); + const char* DataXDG = getenv("XDG_DATA_HOME"); + const char* DataOverride = getenv("FEX_APP_DATA_LOCATION"); + if (DataOverride) { + // Data override will override the complete directory + DataDir = DataOverride; + } else { + DataDir = DataXDG ?: HomeDir; + DataDir += "/.fex-emu/"; } + return DataDir; +} - fextl::string GetConfigDirectory(bool Global) { - fextl::string ConfigDir; - if (Global) { - ConfigDir = GLOBAL_DATA_DIRECTORY; +fextl::string GetConfigDirectory(bool Global) { + fextl::string ConfigDir; + if (Global) { + ConfigDir = GLOBAL_DATA_DIRECTORY; + } else { + const char* HomeDir = GetHomeDirectory(); + const char* ConfigXDG = getenv("XDG_CONFIG_HOME"); + const char* ConfigOverride = getenv("FEX_APP_CONFIG_LOCATION"); + if (ConfigOverride) { + // Config override completely overrides the config directory + ConfigDir = ConfigOverride; + } else { + ConfigDir = ConfigXDG ? ConfigXDG : HomeDir; + ConfigDir += "/.fex-emu/"; } - else { - char const *HomeDir = GetHomeDirectory(); - char const *ConfigXDG = getenv("XDG_CONFIG_HOME"); - char const *ConfigOverride = getenv("FEX_APP_CONFIG_LOCATION"); - if (ConfigOverride) { - // Config override completely overrides the config directory - ConfigDir = ConfigOverride; - } - else { - ConfigDir = ConfigXDG ? ConfigXDG : HomeDir; - ConfigDir += "/.fex-emu/"; - } - // Ensure the folder structure is created for our configuration - if (!FHU::Filesystem::Exists(ConfigDir) && - !FHU::Filesystem::CreateDirectories(ConfigDir)) { - // Let's go local in this case - return "./"; - } + // Ensure the folder structure is created for our configuration + if (!FHU::Filesystem::Exists(ConfigDir) && !FHU::Filesystem::CreateDirectories(ConfigDir)) { + // Let's go local in this case + return "./"; } - - return ConfigDir; } - fextl::string GetConfigFileLocation(bool Global) { - fextl::string ConfigFile{}; - if (Global) { - ConfigFile = GetConfigDirectory(true) + "Config.json"; - } - else { - const char *AppConfig = getenv("FEX_APP_CONFIG"); - if (AppConfig) { - // App config environment variable overwrites only the config file - ConfigFile = AppConfig; - } - else { - ConfigFile = GetConfigDirectory(false) + "Config.json"; - } + return ConfigDir; +} + +fextl::string GetConfigFileLocation(bool Global) { + fextl::string ConfigFile {}; + if (Global) { + ConfigFile = GetConfigDirectory(true) + "Config.json"; + } else { + const char* AppConfig = getenv("FEX_APP_CONFIG"); + if (AppConfig) { + // App config environment variable overwrites only the config file + ConfigFile = AppConfig; + } else { + ConfigFile = GetConfigDirectory(false) + "Config.json"; } - return ConfigFile; } + return ConfigFile; +} - void InitializeConfigs() { - FEXCore::Config::SetDataDirectory(GetDataDirectory()); - FEXCore::Config::SetConfigDirectory(GetConfigDirectory(false), false); - FEXCore::Config::SetConfigDirectory(GetConfigDirectory(true), true); - FEXCore::Config::SetConfigFileLocation(GetConfigFileLocation(false), false); - FEXCore::Config::SetConfigFileLocation(GetConfigFileLocation(true), true); - } +void InitializeConfigs() { + FEXCore::Config::SetDataDirectory(GetDataDirectory()); + FEXCore::Config::SetConfigDirectory(GetConfigDirectory(false), false); + FEXCore::Config::SetConfigDirectory(GetConfigDirectory(true), true); + FEXCore::Config::SetConfigFileLocation(GetConfigFileLocation(false), false); + FEXCore::Config::SetConfigFileLocation(GetConfigFileLocation(true), true); } +} // namespace FEX::Config diff --git a/Source/Common/EnvironmentLoader.cpp b/Source/Common/EnvironmentLoader.cpp index 2620b90741..e88e6100d9 100644 --- a/Source/Common/EnvironmentLoader.cpp +++ b/Source/Common/EnvironmentLoader.cpp @@ -3,11 +3,9 @@ namespace FEX::EnvLoader { - using string = std::string; - using string_view = std::string_view; +using string = std::string; +using string_view = std::string_view; - void Load(char *const envp[]) - { - } +void Load(char* const envp[]) {} -} +} // namespace FEX::EnvLoader diff --git a/Source/Common/FEXServerClient.cpp b/Source/Common/FEXServerClient.cpp index 6717c9cc39..424ecf0a09 100644 --- a/Source/Common/FEXServerClient.cpp +++ b/Source/Common/FEXServerClient.cpp @@ -24,351 +24,341 @@ #include namespace FEXServerClient { - int RequestPIDFDPacket(int ServerSocket, PacketType Type) { - FEXServerRequestPacket Req { - .Header { - .Type = Type, - }, +int RequestPIDFDPacket(int ServerSocket, PacketType Type) { + FEXServerRequestPacket Req { + .Header { + .Type = Type, + }, + }; + + int Result = write(ServerSocket, &Req, sizeof(Req.BasicRequest)); + if (Result != -1) { + // Wait for success response with SCM_RIGHTS + + FEXServerResultPacket Res {}; + struct iovec iov { + .iov_base = &Res, .iov_len = sizeof(Res), }; - int Result = write(ServerSocket, &Req, sizeof(Req.BasicRequest)); - if (Result != -1) { - // Wait for success response with SCM_RIGHTS - - FEXServerResultPacket Res{}; - struct iovec iov { - .iov_base = &Res, - .iov_len = sizeof(Res), - }; - - struct msghdr msg { - .msg_name = nullptr, - .msg_namelen = 0, - .msg_iov = &iov, - .msg_iovlen = 1, - }; - - // Setup the ancillary buffer. This is where we will be getting pipe FDs - // We only need 4 bytes for the FD - constexpr size_t CMSG_SIZE = CMSG_SPACE(sizeof(int)); - union AncillaryBuffer { - struct cmsghdr Header; - uint8_t Buffer[CMSG_SIZE]; - }; - AncillaryBuffer AncBuf{}; - - // Now link to our ancilllary buffer - msg.msg_control = AncBuf.Buffer; - msg.msg_controllen = CMSG_SIZE; - - ssize_t DataResult = recvmsg(ServerSocket, &msg, 0); - if (DataResult > 0) { - // Now that we have the data, we can extract the FD from the ancillary buffer - struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); - - // Do some error checking - if (cmsg == nullptr || - cmsg->cmsg_len != CMSG_LEN(sizeof(int)) || - cmsg->cmsg_level != SOL_SOCKET || - cmsg->cmsg_type != SCM_RIGHTS) { - // Couldn't get a socket - } - else { - // Check for Success. - // If type error was returned then the FEXServer doesn't have a log to pipe in to - if (Res.Header.Type == PacketType::TYPE_SUCCESS) { - // Now that we know the cmsg is sane, read the FD - int NewFD{}; - memcpy(&NewFD, CMSG_DATA(cmsg), sizeof(NewFD)); - return NewFD; - } + struct msghdr msg { + .msg_name = nullptr, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, + }; + + // Setup the ancillary buffer. This is where we will be getting pipe FDs + // We only need 4 bytes for the FD + constexpr size_t CMSG_SIZE = CMSG_SPACE(sizeof(int)); + union AncillaryBuffer { + struct cmsghdr Header; + uint8_t Buffer[CMSG_SIZE]; + }; + AncillaryBuffer AncBuf {}; + + // Now link to our ancilllary buffer + msg.msg_control = AncBuf.Buffer; + msg.msg_controllen = CMSG_SIZE; + + ssize_t DataResult = recvmsg(ServerSocket, &msg, 0); + if (DataResult > 0) { + // Now that we have the data, we can extract the FD from the ancillary buffer + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + + // Do some error checking + if (cmsg == nullptr || cmsg->cmsg_len != CMSG_LEN(sizeof(int)) || cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) { + // Couldn't get a socket + } else { + // Check for Success. + // If type error was returned then the FEXServer doesn't have a log to pipe in to + if (Res.Header.Type == PacketType::TYPE_SUCCESS) { + // Now that we know the cmsg is sane, read the FD + int NewFD {}; + memcpy(&NewFD, CMSG_DATA(cmsg), sizeof(NewFD)); + return NewFD; } } } - - return -1; } - static int ServerFD {-1}; - - fextl::string GetServerLockFolder() { - return FEXCore::Config::GetDataDirectory() + "Server/"; - } + return -1; +} - fextl::string GetServerLockFile() { - return GetServerLockFolder() + "Server.lock"; - } +static int ServerFD {-1}; - fextl::string GetServerRootFSLockFile() { - return GetServerLockFolder() + "RootFS.lock"; - } +fextl::string GetServerLockFolder() { + return FEXCore::Config::GetDataDirectory() + "Server/"; +} - fextl::string GetTempFolder() { - auto XDGRuntimeEnv = getenv("XDG_RUNTIME_DIR"); - if (XDGRuntimeEnv) { - // If the XDG runtime directory works then use that. - return XDGRuntimeEnv; - } - // Fallback to `/tmp/` if XDG_RUNTIME_DIR doesn't exist. - // Might not be ideal but we don't have much of a choice. - return fextl::string{std::filesystem::temp_directory_path().string()}; - } +fextl::string GetServerLockFile() { + return GetServerLockFolder() + "Server.lock"; +} - fextl::string GetServerMountFolder() { - // We need a FEXServer mount directory that has some tricky requirements. - // - We don't want to use `/tmp/` if possible. - // - systemd services use `PrivateTmp` feature to gives services their own tmp. - // - We will use this as a fallback path /only/. - // - Can't be `[$XDG_DATA_HOME,$HOME]/.fex-emu/` - // - Might be mounted with a filesystem (sshfs) which can't handle mount points inside it. - // - // Directories it can be in: - // - $XDG_RUNTIME_DIR if set - // - Is typically `/run/user//` - // - systemd `PrivateTmp` feature doesn't touch this. - // - If this path doesn't exist then fallback to `/tmp/` as a last resort. - // - pressure-vessel explicitly creates an internal XDG_RUNTIME_DIR inside its chroot. - // - This is okay since pressure-vessel rbinds the FEX rootfs from the host to `/run/pressure-vessel/interpreter-root`. - auto Folder = GetTempFolder(); - - if (FEXCore::Config::FindContainer() == "pressure-vessel") { - // In pressure-vessel the mount point changes location. - // This is due to pressure-vesssel being a chroot environment. - // It by default maps the host-filesystem to `/run/host/` so we need to redirect. - // After pressure-vessel is fully set up it will set the `FEX_ROOTFS` environment variable, - // which the FEXInterpreter will pick up on. - Folder = "/run/host/" + Folder; - } +fextl::string GetServerRootFSLockFile() { + return GetServerLockFolder() + "RootFS.lock"; +} - return Folder; +fextl::string GetTempFolder() { + auto XDGRuntimeEnv = getenv("XDG_RUNTIME_DIR"); + if (XDGRuntimeEnv) { + // If the XDG runtime directory works then use that. + return XDGRuntimeEnv; } + // Fallback to `/tmp/` if XDG_RUNTIME_DIR doesn't exist. + // Might not be ideal but we don't have much of a choice. + return fextl::string {std::filesystem::temp_directory_path().string()}; +} - fextl::string GetServerSocketName() { - FEX_CONFIG_OPT(ServerSocketPath, SERVERSOCKETPATH); - if (ServerSocketPath().empty()) { - return fextl::fmt::format("{}.FEXServer.Socket", ::geteuid()); - } - return ServerSocketPath; +fextl::string GetServerMountFolder() { + // We need a FEXServer mount directory that has some tricky requirements. + // - We don't want to use `/tmp/` if possible. + // - systemd services use `PrivateTmp` feature to gives services their own tmp. + // - We will use this as a fallback path /only/. + // - Can't be `[$XDG_DATA_HOME,$HOME]/.fex-emu/` + // - Might be mounted with a filesystem (sshfs) which can't handle mount points inside it. + // + // Directories it can be in: + // - $XDG_RUNTIME_DIR if set + // - Is typically `/run/user//` + // - systemd `PrivateTmp` feature doesn't touch this. + // - If this path doesn't exist then fallback to `/tmp/` as a last resort. + // - pressure-vessel explicitly creates an internal XDG_RUNTIME_DIR inside its chroot. + // - This is okay since pressure-vessel rbinds the FEX rootfs from the host to `/run/pressure-vessel/interpreter-root`. + auto Folder = GetTempFolder(); + + if (FEXCore::Config::FindContainer() == "pressure-vessel") { + // In pressure-vessel the mount point changes location. + // This is due to pressure-vesssel being a chroot environment. + // It by default maps the host-filesystem to `/run/host/` so we need to redirect. + // After pressure-vessel is fully set up it will set the `FEX_ROOTFS` environment variable, + // which the FEXInterpreter will pick up on. + Folder = "/run/host/" + Folder; } - int GetServerFD() { - return ServerFD; + return Folder; +} + +fextl::string GetServerSocketName() { + FEX_CONFIG_OPT(ServerSocketPath, SERVERSOCKETPATH); + if (ServerSocketPath().empty()) { + return fextl::fmt::format("{}.FEXServer.Socket", ::geteuid()); } + return ServerSocketPath; +} - int ConnectToServer(ConnectionOption ConnectionOption) { - auto ServerSocketName = GetServerSocketName(); +int GetServerFD() { + return ServerFD; +} - // Create the initial unix socket - int SocketFD = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); - if (SocketFD == -1) { - LogMan::Msg::EFmt("Couldn't open AF_UNIX socket {} {}", errno, strerror(errno)); - return -1; - } +int ConnectToServer(ConnectionOption ConnectionOption) { + auto ServerSocketName = GetServerSocketName(); - // AF_UNIX has a special feature for named socket paths. - // If the name of the socket begins with `\0` then it is an "abstract" socket address. - // The entirety of the name is used as a path to a socket that doesn't have any filesystem backing. - struct sockaddr_un addr{}; - addr.sun_family = AF_UNIX; - size_t SizeOfSocketString = std::min(ServerSocketName.size() + 1, sizeof(addr.sun_path) - 1); - addr.sun_path[0] = 0; // Abstract AF_UNIX sockets start with \0 - strncpy(addr.sun_path + 1, ServerSocketName.data(), SizeOfSocketString); - // Include final null character. - size_t SizeOfAddr = sizeof(addr.sun_family) + SizeOfSocketString; - - if (connect(SocketFD, reinterpret_cast(&addr), SizeOfAddr) == -1) { - if (ConnectionOption == ConnectionOption::Default || errno != ECONNREFUSED) { - LogMan::Msg::EFmt("Couldn't connect to FEXServer socket {} {} {}", ServerSocketName, errno, strerror(errno)); - } - close(SocketFD); - return -1; - } - - return SocketFD; + // Create the initial unix socket + int SocketFD = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (SocketFD == -1) { + LogMan::Msg::EFmt("Couldn't open AF_UNIX socket {} {}", errno, strerror(errno)); + return -1; } - bool SetupClient(char *InterpreterPath) { - ServerFD = FEXServerClient::ConnectToAndStartServer(InterpreterPath); - if (ServerFD == -1) { - return false; + // AF_UNIX has a special feature for named socket paths. + // If the name of the socket begins with `\0` then it is an "abstract" socket address. + // The entirety of the name is used as a path to a socket that doesn't have any filesystem backing. + struct sockaddr_un addr {}; + addr.sun_family = AF_UNIX; + size_t SizeOfSocketString = std::min(ServerSocketName.size() + 1, sizeof(addr.sun_path) - 1); + addr.sun_path[0] = 0; // Abstract AF_UNIX sockets start with \0 + strncpy(addr.sun_path + 1, ServerSocketName.data(), SizeOfSocketString); + // Include final null character. + size_t SizeOfAddr = sizeof(addr.sun_family) + SizeOfSocketString; + + if (connect(SocketFD, reinterpret_cast(&addr), SizeOfAddr) == -1) { + if (ConnectionOption == ConnectionOption::Default || errno != ECONNREFUSED) { + LogMan::Msg::EFmt("Couldn't connect to FEXServer socket {} {} {}", ServerSocketName, errno, strerror(errno)); } + close(SocketFD); + return -1; + } - // If we were started in a container then we want to use the rootfs that they provided. - // In the pressure-vessel case this is a combination of our rootfs and the steam soldier runtime. - if (FEXCore::Config::FindContainer() != "pressure-vessel") { - fextl::string RootFSPath = FEXServerClient::RequestRootFSPath(ServerFD); + return SocketFD; +} - //// If everything has passed then we can now update the rootfs path - FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_ROOTFS, RootFSPath); - } +bool SetupClient(char* InterpreterPath) { + ServerFD = FEXServerClient::ConnectToAndStartServer(InterpreterPath); + if (ServerFD == -1) { + return false; + } + + // If we were started in a container then we want to use the rootfs that they provided. + // In the pressure-vessel case this is a combination of our rootfs and the steam soldier runtime. + if (FEXCore::Config::FindContainer() != "pressure-vessel") { + fextl::string RootFSPath = FEXServerClient::RequestRootFSPath(ServerFD); - return true; + //// If everything has passed then we can now update the rootfs path + FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_ROOTFS, RootFSPath); } - int ConnectToAndStartServer(char *InterpreterPath) { - int ServerFD = ConnectToServer(ConnectionOption::NoPrintConnectionError); - if (ServerFD == -1) { - // Couldn't connect to the server. Start one + return true; +} - // Open some pipes for letting us know when the server is ready - int fds[2]{}; - if (pipe2(fds, 0) != 0) { - LogMan::Msg::EFmt("Couldn't open pipe"); - return -1; - } +int ConnectToAndStartServer(char* InterpreterPath) { + int ServerFD = ConnectToServer(ConnectionOption::NoPrintConnectionError); + if (ServerFD == -1) { + // Couldn't connect to the server. Start one - fextl::string FEXServerPath = FHU::Filesystem::ParentPath(InterpreterPath) + "/FEXServer"; - // Check if a local FEXServer next to FEXInterpreter exists - // If it does then it takes priority over the installed one - if (!FHU::Filesystem::Exists(FEXServerPath)) { - FEXServerPath = "FEXServer"; - } + // Open some pipes for letting us know when the server is ready + int fds[2] {}; + if (pipe2(fds, 0) != 0) { + LogMan::Msg::EFmt("Couldn't open pipe"); + return -1; + } - // Set-up our SIGCHLD handler to ignore the signal. - // This is early in the initialization stage so no handlers have been installed. - // - // We want to ignore the signal so that if FEXServer starts in daemon mode, it - // doesn't leave a zombie process around waiting for something to get the result. - struct sigaction action{}; - action.sa_handler = SIG_IGN, - sigaction(SIGCHLD, &action, &action); - - pid_t pid = fork(); - if (pid == 0) { - // Child - close(fds[0]); // Close read end of pipe - - const char *argv[2]; - - argv[0] = FEXServerPath.c_str(); - argv[1] = nullptr; - - if (execvp(argv[0], (char * const*)argv) == -1) { - // Let the parent know that we couldn't execute for some reason - uint64_t error{1}; - write(fds[1], &error, sizeof(error)); - - // Give a hopefully helpful error message for users - LogMan::Msg::EFmt("Couldn't execute: {}", argv[0]); - LogMan::Msg::EFmt("This means the squashFS rootfs won't be mounted."); - LogMan::Msg::EFmt("Expect errors!"); - // Destroy this fork - exit(1); - } + fextl::string FEXServerPath = FHU::Filesystem::ParentPath(InterpreterPath) + "/FEXServer"; + // Check if a local FEXServer next to FEXInterpreter exists + // If it does then it takes priority over the installed one + if (!FHU::Filesystem::Exists(FEXServerPath)) { + FEXServerPath = "FEXServer"; + } - FEX_UNREACHABLE; + // Set-up our SIGCHLD handler to ignore the signal. + // This is early in the initialization stage so no handlers have been installed. + // + // We want to ignore the signal so that if FEXServer starts in daemon mode, it + // doesn't leave a zombie process around waiting for something to get the result. + struct sigaction action {}; + action.sa_handler = SIG_IGN, sigaction(SIGCHLD, &action, &action); + + pid_t pid = fork(); + if (pid == 0) { + // Child + close(fds[0]); // Close read end of pipe + + const char* argv[2]; + + argv[0] = FEXServerPath.c_str(); + argv[1] = nullptr; + + if (execvp(argv[0], (char* const*)argv) == -1) { + // Let the parent know that we couldn't execute for some reason + uint64_t error {1}; + write(fds[1], &error, sizeof(error)); + + // Give a hopefully helpful error message for users + LogMan::Msg::EFmt("Couldn't execute: {}", argv[0]); + LogMan::Msg::EFmt("This means the squashFS rootfs won't be mounted."); + LogMan::Msg::EFmt("Expect errors!"); + // Destroy this fork + exit(1); } - else { - // Parent - // Wait for the child to exit so we can check if it is mounted or not - close(fds[1]); // Close write end of the pipe - // Wait for a message from FEXServer - pollfd PollFD; - PollFD.fd = fds[0]; - PollFD.events = POLLIN | POLLOUT | POLLRDHUP | POLLERR | POLLHUP | POLLNVAL; + FEX_UNREACHABLE; + } else { + // Parent + // Wait for the child to exit so we can check if it is mounted or not + close(fds[1]); // Close write end of the pipe - // Wait for a result on the pipe that isn't EINTR - while (poll(&PollFD, 1, -1) == -1 && errno == EINTR); + // Wait for a message from FEXServer + pollfd PollFD; + PollFD.fd = fds[0]; + PollFD.events = POLLIN | POLLOUT | POLLRDHUP | POLLERR | POLLHUP | POLLNVAL; - for (size_t i = 0; i < 5; ++i) { - ServerFD = ConnectToServer(ConnectionOption::Default); + // Wait for a result on the pipe that isn't EINTR + while (poll(&PollFD, 1, -1) == -1 && errno == EINTR) + ; - if (ServerFD != -1) { - break; - } + for (size_t i = 0; i < 5; ++i) { + ServerFD = ConnectToServer(ConnectionOption::Default); - std::this_thread::sleep_for(std::chrono::seconds(1)); + if (ServerFD != -1) { + break; } - if (ServerFD == -1) { - // Still couldn't connect to the socket. - LogMan::Msg::EFmt("Couldn't connect to FEXServer socket {} after launching the process", GetServerSocketName()); - } + std::this_thread::sleep_for(std::chrono::seconds(1)); } - // Restore the original SIGCHLD handler if it existed. - sigaction(SIGCHLD, &action, nullptr); + if (ServerFD == -1) { + // Still couldn't connect to the socket. + LogMan::Msg::EFmt("Couldn't connect to FEXServer socket {} after launching the process", GetServerSocketName()); + } } - return ServerFD; - } - /** - * @name Packet request functions - * @{ */ - void RequestServerKill(int ServerSocket) { - FEXServerRequestPacket Req { - .Header { - .Type = PacketType::TYPE_KILL, - }, - }; - - write(ServerSocket, &Req, sizeof(Req.BasicRequest)); + // Restore the original SIGCHLD handler if it existed. + sigaction(SIGCHLD, &action, nullptr); } + return ServerFD; +} - int RequestLogFD(int ServerSocket) { - return RequestPIDFDPacket(ServerSocket, PacketType::TYPE_GET_LOG_FD); - } +/** + * @name Packet request functions + * @{ */ +void RequestServerKill(int ServerSocket) { + FEXServerRequestPacket Req { + .Header { + .Type = PacketType::TYPE_KILL, + }, + }; + + write(ServerSocket, &Req, sizeof(Req.BasicRequest)); +} - fextl::string RequestRootFSPath(int ServerSocket) { - FEXServerRequestPacket Req { - .Header { - .Type = PacketType::TYPE_GET_ROOTFS_PATH, - }, - }; +int RequestLogFD(int ServerSocket) { + return RequestPIDFDPacket(ServerSocket, PacketType::TYPE_GET_LOG_FD); +} - int Result = write(ServerSocket, &Req, sizeof(Req.BasicRequest)); - if (Result != -1) { - // Wait for success response with data - fextl::vector Data(PATH_MAX + sizeof(FEXServerResultPacket)); - - ssize_t DataResult = recv(ServerSocket, Data.data(), Data.size(), 0); - if (DataResult >= sizeof(FEXServerResultPacket)) { - FEXServerResultPacket *ResultPacket = reinterpret_cast(Data.data()); - if (ResultPacket->Header.Type == PacketType::TYPE_GET_ROOTFS_PATH && - ResultPacket->MountPath.Length > 0) { - return fextl::string(ResultPacket->MountPath.Mount); - } +fextl::string RequestRootFSPath(int ServerSocket) { + FEXServerRequestPacket Req { + .Header { + .Type = PacketType::TYPE_GET_ROOTFS_PATH, + }, + }; + + int Result = write(ServerSocket, &Req, sizeof(Req.BasicRequest)); + if (Result != -1) { + // Wait for success response with data + fextl::vector Data(PATH_MAX + sizeof(FEXServerResultPacket)); + + ssize_t DataResult = recv(ServerSocket, Data.data(), Data.size(), 0); + if (DataResult >= sizeof(FEXServerResultPacket)) { + FEXServerResultPacket* ResultPacket = reinterpret_cast(Data.data()); + if (ResultPacket->Header.Type == PacketType::TYPE_GET_ROOTFS_PATH && ResultPacket->MountPath.Length > 0) { + return fextl::string(ResultPacket->MountPath.Mount); } } - - return {}; } - int RequestPIDFD(int ServerSocket) { - return RequestPIDFDPacket(ServerSocket, PacketType::TYPE_GET_PID_FD); - } + return {}; +} - /** @} */ - - /** - * @name FEX logging through FEXServer - * @{ */ - - void MsgHandler(int FD, LogMan::DebugLevels Level, char const *Message) { - size_t MsgLen = strlen(Message) + 1; - - Logging::PacketMsg Msg; - Msg.Header = Logging::FillHeader(Logging::PacketTypes::TYPE_MSG); - Msg.MessageLength = MsgLen; - Msg.Level = Level; - - const iovec vec[2] = { - { - .iov_base = &Msg, - .iov_len = sizeof(Msg), - }, - { - .iov_base = const_cast(Message), - .iov_len = Msg.MessageLength, - }, - }; +int RequestPIDFD(int ServerSocket) { + return RequestPIDFDPacket(ServerSocket, PacketType::TYPE_GET_PID_FD); +} - writev(FD, vec, 2); - } +/** @} */ - void AssertHandler(int FD, char const *Message) { - MsgHandler(FD, LogMan::DebugLevels::ASSERT, Message); - } - /** @} */ +/** + * @name FEX logging through FEXServer + * @{ */ + +void MsgHandler(int FD, LogMan::DebugLevels Level, const char* Message) { + size_t MsgLen = strlen(Message) + 1; + + Logging::PacketMsg Msg; + Msg.Header = Logging::FillHeader(Logging::PacketTypes::TYPE_MSG); + Msg.MessageLength = MsgLen; + Msg.Level = Level; + + const iovec vec[2] = { + { + .iov_base = &Msg, + .iov_len = sizeof(Msg), + }, + { + .iov_base = const_cast(Message), + .iov_len = Msg.MessageLength, + }, + }; + + writev(FD, vec, 2); +} + +void AssertHandler(int FD, const char* Message) { + MsgHandler(FD, LogMan::DebugLevels::ASSERT, Message); } +/** @} */ +} // namespace FEXServerClient diff --git a/Source/Common/FileFormatCheck.cpp b/Source/Common/FileFormatCheck.cpp index 6f81772cc4..dd37cda02e 100644 --- a/Source/Common/FileFormatCheck.cpp +++ b/Source/Common/FileFormatCheck.cpp @@ -7,75 +7,75 @@ #include namespace FEX::FormatCheck { - bool IsSquashFS(fextl::string const &Filename) { - // If it is a regular file then we need to check if it is a valid archive - struct SquashFSHeader { - uint32_t magic; - uint32_t inode_count; - uint32_t mtime; - uint32_t block_size; - uint32_t fragment_entry_count; - uint16_t compression_id; - uint16_t block_log; - uint16_t flags; - uint16_t id_count; - uint16_t version_major; - uint16_t version_minor; - uint64_t More[8]; // More things that don't matter to us - }; +bool IsSquashFS(const fextl::string& Filename) { + // If it is a regular file then we need to check if it is a valid archive + struct SquashFSHeader { + uint32_t magic; + uint32_t inode_count; + uint32_t mtime; + uint32_t block_size; + uint32_t fragment_entry_count; + uint16_t compression_id; + uint16_t block_log; + uint16_t flags; + uint16_t id_count; + uint16_t version_major; + uint16_t version_minor; + uint64_t More[8]; // More things that don't matter to us + }; - SquashFSHeader Header{}; - int fd = open(Filename.c_str(), O_RDONLY | O_CLOEXEC); - if (fd == -1) { - return false; - } - - if (pread(fd, reinterpret_cast(&Header), sizeof(SquashFSHeader), 0) != sizeof(SquashFSHeader)) { - close(fd); - return false; - } + SquashFSHeader Header {}; + int fd = open(Filename.c_str(), O_RDONLY | O_CLOEXEC); + if (fd == -1) { + return false; + } + if (pread(fd, reinterpret_cast(&Header), sizeof(SquashFSHeader), 0) != sizeof(SquashFSHeader)) { close(fd); - - // Make sure the cookie matches - if (Header.magic == 0x73717368) { - // Sanity check the version - uint32_t version = (uint32_t)Header.version_major << 16 | Header.version_minor; - if (version >= 0x00040000) { - // Everything is sane, we can add it - return true; - } - } return false; } - bool IsEroFS(fextl::string const &Filename) { - // v1 of EroFS has a 128byte header - // This lives within a fixed offset inside of the first superblock of the file - // Each superblock is 4096bytes - // - // We only care about the uint32_t at the start of this offset which is the cookie - struct EroFSHeader { - uint32_t Magic; - // Additional data after this if necessary in the future. - }; - - constexpr size_t HEADER_OFFSET = 1024; - constexpr uint32_t COOKIE_MAGIC_V1 = 0xE0F5E1E2; + close(fd); - EroFSHeader Header{}; - int fd = open(Filename.c_str(), O_RDONLY | O_CLOEXEC); - if (fd == -1) { - return false; + // Make sure the cookie matches + if (Header.magic == 0x73717368) { + // Sanity check the version + uint32_t version = (uint32_t)Header.version_major << 16 | Header.version_minor; + if (version >= 0x00040000) { + // Everything is sane, we can add it + return true; } + } + return false; +} - if (pread(fd, reinterpret_cast(&Header), sizeof(EroFSHeader), HEADER_OFFSET) != sizeof(EroFSHeader)) { - close(fd); - return false; - } +bool IsEroFS(const fextl::string& Filename) { + // v1 of EroFS has a 128byte header + // This lives within a fixed offset inside of the first superblock of the file + // Each superblock is 4096bytes + // + // We only care about the uint32_t at the start of this offset which is the cookie + struct EroFSHeader { + uint32_t Magic; + // Additional data after this if necessary in the future. + }; - close(fd); + constexpr size_t HEADER_OFFSET = 1024; + constexpr uint32_t COOKIE_MAGIC_V1 = 0xE0F5E1E2; - return Header.Magic == COOKIE_MAGIC_V1; + EroFSHeader Header {}; + int fd = open(Filename.c_str(), O_RDONLY | O_CLOEXEC); + if (fd == -1) { + return false; } + + if (pread(fd, reinterpret_cast(&Header), sizeof(EroFSHeader), HEADER_OFFSET) != sizeof(EroFSHeader)) { + close(fd); + return false; + } + + close(fd); + + return Header.Magic == COOKIE_MAGIC_V1; } +} // namespace FEX::FormatCheck diff --git a/Source/Common/StringUtil.cpp b/Source/Common/StringUtil.cpp index d1ab07afd3..6072bf3fb0 100644 --- a/Source/Common/StringUtil.cpp +++ b/Source/Common/StringUtil.cpp @@ -2,19 +2,15 @@ #include "Common/StringUtil.h" namespace FEX::StringUtil { -void ltrim(fextl::string &s) { - s.erase(std::find_if(s.begin(), s.end(), [](int ch) { - return !std::isspace(ch); - })); +void ltrim(fextl::string& s) { + s.erase(std::find_if(s.begin(), s.end(), [](int ch) { return !std::isspace(ch); })); } -void rtrim(fextl::string &s) { - s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { - return !std::isspace(ch); - }).base(), s.end()); +void rtrim(fextl::string& s) { + s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end()); } -void trim(fextl::string &s) { +void trim(fextl::string& s) { ltrim(s); rtrim(s); } -} +} // namespace FEX::StringUtil diff --git a/Source/Tools/CodeSizeValidation/Main.cpp b/Source/Tools/CodeSizeValidation/Main.cpp index 0701f8bc92..c04943be75 100644 --- a/Source/Tools/CodeSizeValidation/Main.cpp +++ b/Source/Tools/CodeSizeValidation/Main.cpp @@ -10,207 +10,208 @@ #include namespace CodeSize { - class CodeSizeValidation final { - public: - struct InstructionStats { - uint64_t GuestCodeInstructions{}; - uint64_t HostCodeInstructions{}; +class CodeSizeValidation final { +public: + struct InstructionStats { + uint64_t GuestCodeInstructions {}; + uint64_t HostCodeInstructions {}; - uint64_t HeaderSize{}; - uint64_t TailSize{}; - }; + uint64_t HeaderSize {}; + uint64_t TailSize {}; + }; - using CodeLines = fextl::vector; - using InstructionData = std::pair; + using CodeLines = fextl::vector; + using InstructionData = std::pair; - bool ParseMessage(char const *Message); + bool ParseMessage(const char* Message); - InstructionData *GetDataForRIP(uint64_t RIP) { - return &RIPToStats[RIP]; - } + InstructionData* GetDataForRIP(uint64_t RIP) { + return &RIPToStats[RIP]; + } - bool InfoPrintingDisabled() const { - return SetupInfoDisabled; - } + bool InfoPrintingDisabled() const { + return SetupInfoDisabled; + } - void CalculateBaseStats(FEXCore::Context::Context *CTX, FEXCore::Core::InternalThreadState *Thread); - private: - void ClearStats() { - RIPToStats.clear(); - } + void CalculateBaseStats(FEXCore::Context::Context* CTX, FEXCore::Core::InternalThreadState* Thread); +private: + void ClearStats() { + RIPToStats.clear(); + } - void SetBaseStats(InstructionStats const &NewBase) { - BaseStats = NewBase; - } + void SetBaseStats(const InstructionStats& NewBase) { + BaseStats = NewBase; + } - void CalculateDifferenceBetweenStats(InstructionData *Nop, InstructionData *Fence); + void CalculateDifferenceBetweenStats(InstructionData* Nop, InstructionData* Fence); - uint64_t CurrentRIPParse{}; - bool ConsumingDisassembly{}; - InstructionData *CurrentStats{}; - InstructionStats BaseStats{}; + uint64_t CurrentRIPParse {}; + bool ConsumingDisassembly {}; + InstructionData* CurrentStats {}; + InstructionStats BaseStats {}; - fextl::unordered_map RIPToStats; - bool SetupInfoDisabled{}; - }; + fextl::unordered_map RIPToStats; + bool SetupInfoDisabled {}; +}; - constexpr std::string_view RIPMessage = "RIP: 0x"; - constexpr std::string_view GuestCodeMessage = "Guest Code instructions: "; - constexpr std::string_view HostCodeMessage = "Host Code instructions: "; - constexpr std::string_view DisassembleBeginMessage = "Disassemble Begin"; - constexpr std::string_view DisassembleEndMessage = "Disassemble End"; - constexpr std::string_view BlowUpMsg = "Blow-up Amt: "; - - static std::string_view SanitizeDisassembly(std::string_view Message) { - auto it = Message.find(" (addr"); - // If it contains an address calculation, strip it out. - Message = Message.substr(0, it); - if (Message.find("adrp ") != std::string_view::npos || - Message.find("adr ") != std::string_view::npos) { - Message = Message.substr(0, Message.find(" #")); - } - return Message; - } - - bool CodeSizeValidation::ParseMessage(char const *Message) { - // std::string_view doesn't have contains until c++23. - std::string_view MessageView {Message}; - if (MessageView.find(RIPMessage) != MessageView.npos) { - // New RIP found - std::string_view RIPView = std::string_view{Message + RIPMessage.size()}; - std::from_chars(RIPView.data(), RIPView.end(), CurrentRIPParse, 16); - CurrentStats = &RIPToStats[CurrentRIPParse]; - return false; - } +constexpr std::string_view RIPMessage = "RIP: 0x"; +constexpr std::string_view GuestCodeMessage = "Guest Code instructions: "; +constexpr std::string_view HostCodeMessage = "Host Code instructions: "; +constexpr std::string_view DisassembleBeginMessage = "Disassemble Begin"; +constexpr std::string_view DisassembleEndMessage = "Disassemble End"; +constexpr std::string_view BlowUpMsg = "Blow-up Amt: "; + +static std::string_view SanitizeDisassembly(std::string_view Message) { + auto it = Message.find(" (addr"); + // If it contains an address calculation, strip it out. + Message = Message.substr(0, it); + if (Message.find("adrp ") != std::string_view::npos || Message.find("adr ") != std::string_view::npos) { + Message = Message.substr(0, Message.find(" #")); + } + return Message; +} - if (MessageView.find(GuestCodeMessage) != MessageView.npos) { - std::string_view CodeSizeView = std::string_view{Message + GuestCodeMessage.size()}; - std::from_chars(CodeSizeView.data(), CodeSizeView.end(), CurrentStats->first.GuestCodeInstructions); - return false; - } - if (MessageView.find(HostCodeMessage) != MessageView.npos) { - std::string_view CodeSizeView = std::string_view{Message + HostCodeMessage.size()}; - std::from_chars(CodeSizeView.data(), CodeSizeView.end(), CurrentStats->first.HostCodeInstructions); +bool CodeSizeValidation::ParseMessage(const char* Message) { + // std::string_view doesn't have contains until c++23. + std::string_view MessageView {Message}; + if (MessageView.find(RIPMessage) != MessageView.npos) { + // New RIP found + std::string_view RIPView = std::string_view {Message + RIPMessage.size()}; + std::from_chars(RIPView.data(), RIPView.end(), CurrentRIPParse, 16); + CurrentStats = &RIPToStats[CurrentRIPParse]; + return false; + } + + if (MessageView.find(GuestCodeMessage) != MessageView.npos) { + std::string_view CodeSizeView = std::string_view {Message + GuestCodeMessage.size()}; + std::from_chars(CodeSizeView.data(), CodeSizeView.end(), CurrentStats->first.GuestCodeInstructions); + return false; + } + if (MessageView.find(HostCodeMessage) != MessageView.npos) { + std::string_view CodeSizeView = std::string_view {Message + HostCodeMessage.size()}; + std::from_chars(CodeSizeView.data(), CodeSizeView.end(), CurrentStats->first.HostCodeInstructions); + + CurrentStats->first.HostCodeInstructions -= BaseStats.HostCodeInstructions; + return false; + } + if (MessageView.find(DisassembleBeginMessage) != MessageView.npos) { + ConsumingDisassembly = true; + // Just so the output isn't a mess. + return false; + } + if (MessageView.find(DisassembleEndMessage) != MessageView.npos) { + ConsumingDisassembly = false; + // Just so the output isn't a mess. - CurrentStats->first.HostCodeInstructions -= BaseStats.HostCodeInstructions; - return false; + // Remove the header and tails. + if (BaseStats.HeaderSize) { + CurrentStats->second.erase(CurrentStats->second.begin(), CurrentStats->second.begin() + BaseStats.HeaderSize); } - if (MessageView.find(DisassembleBeginMessage) != MessageView.npos) { - ConsumingDisassembly = true; - // Just so the output isn't a mess. - return false; + if (BaseStats.TailSize) { + CurrentStats->second.erase(CurrentStats->second.end() - BaseStats.TailSize, CurrentStats->second.end()); } - if (MessageView.find(DisassembleEndMessage) != MessageView.npos) { - ConsumingDisassembly = false; - // Just so the output isn't a mess. + return false; + } - // Remove the header and tails. - if (BaseStats.HeaderSize) { - CurrentStats->second.erase(CurrentStats->second.begin(), CurrentStats->second.begin() + BaseStats.HeaderSize); - } - if (BaseStats.TailSize) { - CurrentStats->second.erase(CurrentStats->second.end() - BaseStats.TailSize, CurrentStats->second.end()); - } - return false; - } + if (MessageView.find(BlowUpMsg) != MessageView.npos) { + return false; + } - if (MessageView.find(BlowUpMsg) != MessageView.npos) { - return false; - } + if (ConsumingDisassembly) { + // Currently consuming disassembly. Each line will be a single line of disassembly. + CurrentStats->second.push_back(fextl::string(SanitizeDisassembly(Message))); + return false; + } - if (ConsumingDisassembly) { - // Currently consuming disassembly. Each line will be a single line of disassembly. - CurrentStats->second.push_back(fextl::string(SanitizeDisassembly(Message))); - return false; - } + return true; +} - return true; - } - - void CodeSizeValidation::CalculateDifferenceBetweenStats(InstructionData *Nop, InstructionData *Fence) { - // Expected format. - // adr x0, #-0x4 (addr 0x7fffe9880054) - // str x0, [x28, #184] - // dmb sy - // ldr x0, pc+8 (addr 0x7fffe988006c) - // blr x0 - // unallocated (Unallocated) - // udf #0x7fff - // unallocated (Unallocated) - // udf #0x0 - // - // First two lines are the header. - // Next comes the implementation (0 instruction size for nop, 1 instruction for fence) - // After that is the tail. - - const auto &NOPCode = Nop->second; - const auto &FENCECode = Fence->second; - - LOGMAN_THROW_A_FMT(NOPCode.size() < FENCECode.size(), "NOP code must be smaller than fence!"); - for (size_t i = 0; i < NOPCode.size(); ++i) { - const auto &NOPLine = NOPCode.at(i); - const auto &FENCELine = FENCECode.at(i); - - const auto NOPmnemonic = std::string_view(NOPLine.data(), NOPLine.find(' ')); - const auto FENCEmnemonic = std::string_view(FENCELine.data(), FENCELine.find(' ')); - - if (NOPmnemonic != FENCEmnemonic) { - // Headersize of a block is now `i` number of instructions. - Nop->first.HeaderSize = i; - - // Tail size is going to be the remaining size - Nop->first.TailSize = NOPCode.size() - i; - break; - } +void CodeSizeValidation::CalculateDifferenceBetweenStats(InstructionData* Nop, InstructionData* Fence) { + // Expected format. + // adr x0, #-0x4 (addr 0x7fffe9880054) + // str x0, [x28, #184] + // dmb sy + // ldr x0, pc+8 (addr 0x7fffe988006c) + // blr x0 + // unallocated (Unallocated) + // udf #0x7fff + // unallocated (Unallocated) + // udf #0x0 + // + // First two lines are the header. + // Next comes the implementation (0 instruction size for nop, 1 instruction for fence) + // After that is the tail. + + const auto& NOPCode = Nop->second; + const auto& FENCECode = Fence->second; + + LOGMAN_THROW_A_FMT(NOPCode.size() < FENCECode.size(), "NOP code must be smaller than fence!"); + for (size_t i = 0; i < NOPCode.size(); ++i) { + const auto& NOPLine = NOPCode.at(i); + const auto& FENCELine = FENCECode.at(i); + + const auto NOPmnemonic = std::string_view(NOPLine.data(), NOPLine.find(' ')); + const auto FENCEmnemonic = std::string_view(FENCELine.data(), FENCELine.find(' ')); + + if (NOPmnemonic != FENCEmnemonic) { + // Headersize of a block is now `i` number of instructions. + Nop->first.HeaderSize = i; + + // Tail size is going to be the remaining size + Nop->first.TailSize = NOPCode.size() - i; + break; } - - SetBaseStats(Nop->first); } - void CodeSizeValidation::CalculateBaseStats(FEXCore::Context::Context *CTX, FEXCore::Core::InternalThreadState *Thread) { - SetupInfoDisabled = true; + SetBaseStats(Nop->first); +} - // Known hardcoded instructions that will generate blocks of particular sizes. - // NOP will never generate any instructions. - constexpr static uint8_t NOP[] = { - 0x90, - }; +void CodeSizeValidation::CalculateBaseStats(FEXCore::Context::Context* CTX, FEXCore::Core::InternalThreadState* Thread) { + SetupInfoDisabled = true; - // MFENCE will always generate a block with one instruction. - constexpr static uint8_t MFENCE[] = { - 0x0f, 0xae, 0xf0, - }; + // Known hardcoded instructions that will generate blocks of particular sizes. + // NOP will never generate any instructions. + constexpr static uint8_t NOP[] = { + 0x90, + }; - // Compile the NOP. - CTX->CompileRIP(Thread, (uint64_t)NOP); - // Gather the stats for the NOP. - auto NOPStats = GetDataForRIP((uint64_t)NOP); + // MFENCE will always generate a block with one instruction. + constexpr static uint8_t MFENCE[] = { + 0x0f, + 0xae, + 0xf0, + }; - // Compile MFence - CTX->CompileRIP(Thread, (uint64_t)MFENCE); + // Compile the NOP. + CTX->CompileRIP(Thread, (uint64_t)NOP); + // Gather the stats for the NOP. + auto NOPStats = GetDataForRIP((uint64_t)NOP); - // Get MFence stats. - auto MFENCEStats = GetDataForRIP((uint64_t)MFENCE); + // Compile MFence + CTX->CompileRIP(Thread, (uint64_t)MFENCE); - // Now scan the difference in disasembly between NOP and MFENCE to remove the header and tail. - // Just searching for first instruction change. + // Get MFence stats. + auto MFENCEStats = GetDataForRIP((uint64_t)MFENCE); - CalculateDifferenceBetweenStats(NOPStats, MFENCEStats); - // Now that the stats have been cleared. Clear our currentStats. - ClearStats(); + // Now scan the difference in disasembly between NOP and MFENCE to remove the header and tail. + // Just searching for first instruction change. - // Invalidate the code ranges to be safe. - auto CodeInvalidationlk = FEXCore::GuardSignalDeferringSection(CTX->GetCodeInvalidationMutex(), Thread); - CTX->InvalidateGuestCodeRange(Thread, (uint64_t)NOP, sizeof(NOP)); - CTX->InvalidateGuestCodeRange(Thread, (uint64_t)MFENCE, sizeof(MFENCE)); - SetupInfoDisabled = false; - } + CalculateDifferenceBetweenStats(NOPStats, MFENCEStats); + // Now that the stats have been cleared. Clear our currentStats. + ClearStats(); - static CodeSizeValidation Validation{}; + // Invalidate the code ranges to be safe. + auto CodeInvalidationlk = FEXCore::GuardSignalDeferringSection(CTX->GetCodeInvalidationMutex(), Thread); + CTX->InvalidateGuestCodeRange(Thread, (uint64_t)NOP, sizeof(NOP)); + CTX->InvalidateGuestCodeRange(Thread, (uint64_t)MFENCE, sizeof(MFENCE)); + SetupInfoDisabled = false; } -void MsgHandler(LogMan::DebugLevels Level, char const *Message) { - const char *CharLevel{LogMan::DebugLevelStr(Level)}; +static CodeSizeValidation Validation {}; +} // namespace CodeSize + +void MsgHandler(LogMan::DebugLevels Level, const char* Message) { + const char* CharLevel {LogMan::DebugLevelStr(Level)}; if (Level == LogMan::INFO) { // Disassemble information is sent through the Info log level. @@ -225,7 +226,7 @@ void MsgHandler(LogMan::DebugLevels Level, char const *Message) { fextl::fmt::print("[{}] {}\n", CharLevel, Message); } -void AssertHandler(char const *Message) { +void AssertHandler(const char* Message) { fextl::fmt::print("[ASSERT] {}\n", Message); // make sure buffers are flushed @@ -243,7 +244,7 @@ struct TestInfo { struct TestHeader { uint64_t Bitness; - uint64_t NumTests{}; + uint64_t NumTests {}; uint64_t EnabledHostFeatures; uint64_t DisabledHostFeatures; uint64_t EnvironmentVariableCount; @@ -251,15 +252,15 @@ struct TestHeader { }; static fextl::vector TestData; -static TestHeader const *TestHeaderData{}; -static TestInfo const *TestsStart{}; -static fextl::vector> EnvironmentVariables{}; +static const TestHeader* TestHeaderData {}; +static const TestInfo* TestsStart {}; +static fextl::vector> EnvironmentVariables {}; -static bool TestInstructions(FEXCore::Context::Context *CTX, FEXCore::Core::InternalThreadState *Thread, const char *UpdatedInstructionCountsPath) { +static bool TestInstructions(FEXCore::Context::Context* CTX, FEXCore::Core::InternalThreadState* Thread, const char* UpdatedInstructionCountsPath) { LogMan::Msg::IFmt("Compiling code"); // Tell FEXCore to compile all the instructions upfront. - TestInfo const *CurrentTest = TestsStart; + const TestInfo* CurrentTest = TestsStart; for (size_t i = 0; i < TestHeaderData->NumTests; ++i) { uint64_t CodeRIP = (uint64_t)&CurrentTest->Code[0]; LogMan::Msg::IFmt("Compiling instruction '{}'", CurrentTest->TestInst); @@ -268,7 +269,7 @@ static bool TestInstructions(FEXCore::Context::Context *CTX, FEXCore::Core::Inte CTX->CompileRIPCount(Thread, CodeRIP, CurrentTest->x86InstCount); // Go to the next test. - CurrentTest = reinterpret_cast(&CurrentTest->Code[CurrentTest->CodeSize]); + CurrentTest = reinterpret_cast< const TestInfo*>(&CurrentTest->Code[CurrentTest->CodeSize]); } bool TestsPassed {true}; @@ -283,8 +284,7 @@ static bool TestInstructions(FEXCore::Context::Context *CTX, FEXCore::Core::Inte LogMan::Msg::IFmt("Testing instruction '{}': {} host instructions", CurrentTest->TestInst, INSTStats->first.HostCodeInstructions); // Show the code if the count of instructions changed to something we didn't expect. - bool ShouldShowCode = - INSTStats->first.HostCodeInstructions != CurrentTest->ExpectedInstructionCount; + bool ShouldShowCode = INSTStats->first.HostCodeInstructions != CurrentTest->ExpectedInstructionCount; if (ShouldShowCode) { for (auto Line : INSTStats->second) { @@ -294,21 +294,23 @@ static bool TestInstructions(FEXCore::Context::Context *CTX, FEXCore::Core::Inte if (INSTStats->first.HostCodeInstructions != CurrentTest->ExpectedInstructionCount) { LogMan::Msg::EFmt("Fail: '{}': {} host instructions", CurrentTest->TestInst, INSTStats->first.HostCodeInstructions); - LogMan::Msg::EFmt("Fail: Test took {} instructions but we expected {} instructions!", INSTStats->first.HostCodeInstructions, CurrentTest->ExpectedInstructionCount); + LogMan::Msg::EFmt("Fail: Test took {} instructions but we expected {} instructions!", INSTStats->first.HostCodeInstructions, + CurrentTest->ExpectedInstructionCount); // Fail the test if the instruction count has changed at all. TestsPassed = false; } // Go to the next test. - CurrentTest = reinterpret_cast(&CurrentTest->Code[CurrentTest->CodeSize]); + CurrentTest = reinterpret_cast< const TestInfo*>(&CurrentTest->Code[CurrentTest->CodeSize]); } if (UpdatedInstructionCountsPath) { // Unlink the file. unlink(UpdatedInstructionCountsPath); - FEXCore::File::File FD(UpdatedInstructionCountsPath, FEXCore::File::FileModes::WRITE | FEXCore::File::FileModes::CREATE | FEXCore::File::FileModes::TRUNCATE); + FEXCore::File::File FD(UpdatedInstructionCountsPath, + FEXCore::File::FileModes::WRITE | FEXCore::File::FileModes::CREATE | FEXCore::File::FileModes::TRUNCATE); if (!FD.IsValid()) { // If we couldn't open the file then early exit this. @@ -332,7 +334,7 @@ static bool TestInstructions(FEXCore::Context::Context *CTX, FEXCore::Core::Inte FD.Write(fextl::fmt::format("\t\t\"ExpectedArm64ASM\": [\n", INSTStats->first.HostCodeInstructions)); for (auto it = INSTStats->second.begin(); it != INSTStats->second.end(); ++it) { - const auto &Line = *it; + const auto& Line = *it; const auto NextIt = it + 1; FD.Write(fextl::fmt::format("\t\t\t\"{}\"{}\n", Line, NextIt != INSTStats->second.end() ? "," : "")); } @@ -341,7 +343,7 @@ static bool TestInstructions(FEXCore::Context::Context *CTX, FEXCore::Core::Inte FD.Write(fextl::fmt::format("\t}},\n", CurrentTest->TestInst)); // Go to the next test. - CurrentTest = reinterpret_cast(&CurrentTest->Code[CurrentTest->CodeSize]); + CurrentTest = reinterpret_cast< const TestInfo*>(&CurrentTest->Code[CurrentTest->CodeSize]); } // Print a null member @@ -352,15 +354,15 @@ static bool TestInstructions(FEXCore::Context::Context *CTX, FEXCore::Core::Inte return TestsPassed; } -bool LoadTests(const char *Path) { +bool LoadTests(const char* Path) { if (!FEXCore::FileLoading::LoadFile(TestData, Path)) { return false; } - TestHeaderData = reinterpret_cast(TestData.data()); + TestHeaderData = reinterpret_cast< const TestHeader*>(TestData.data()); // Need to walk past the environment variables to get to the actual tests. - const uint8_t *Data = TestHeaderData->Data; + const uint8_t* Data = TestHeaderData->Data; for (size_t i = 0; i < TestHeaderData->EnvironmentVariableCount; ++i) { // Environment variables are a pair of null terminated strings. Data += strlen(reinterpret_cast(Data)) + 1; @@ -386,7 +388,7 @@ class TestEnvLoader final : public FEXCore::Config::Layer { void Load() override { fextl::unordered_map EnvMap; - const uint8_t *Data = TestHeaderData->Data; + const uint8_t* Data = TestHeaderData->Data; for (size_t i = 0; i < TestHeaderData->EnvironmentVariableCount; ++i) { // Environment variables are a pair of null terminated strings. const std::string_view Key = reinterpret_cast(Data); @@ -401,21 +403,21 @@ class TestEnvLoader final : public FEXCore::Config::Layer { if (Value) { EnvMap.insert_or_assign(Key, *Value); - } - else { + } else { EnvMap.insert_or_assign(Key, Value_View); } } auto GetVar = [&](const std::string_view id) -> std::optional { const auto it = EnvMap.find(id); - if (it == EnvMap.end()) + if (it == EnvMap.end()) { return std::nullopt; + } return it->second; }; - for (auto &it : EnvConfigLookup) { + for (auto& it : EnvConfigLookup) { if (auto Value = GetVar(it.first); Value) { Set(it.second, *Value); } @@ -425,9 +427,9 @@ class TestEnvLoader final : public FEXCore::Config::Layer { private: fextl::vector> Env; }; -} +} // namespace -int main(int argc, char **argv, char **const envp) { +int main(int argc, char** argv, char** const envp) { FEXCore::Allocator::GLIBCScopedFault GLIBFaultScope; LogMan::Throw::InstallHandler(AssertHandler); LogMan::Msg::InstallHandler(MsgHandler); @@ -453,7 +455,9 @@ int main(int argc, char **argv, char **const envp) { // IRJIT. Only works on JITs. FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_CORE, fextl::fmt::format("{}", static_cast(FEXCore::Config::CONFIG_IRJIT))); // Enable block disassembly. - FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_DISASSEMBLE, fextl::fmt::format("{}", static_cast(FEXCore::Config::Disassemble::BLOCKS | FEXCore::Config::Disassemble::STATS))); + FEXCore::Config::EraseSet( + FEXCore::Config::CONFIG_DISASSEMBLE, + fextl::fmt::format("{}", static_cast(FEXCore::Config::Disassemble::BLOCKS | FEXCore::Config::Disassemble::STATS))); // Choose bitness. FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_IS64BIT_MODE, TestHeaderData->Bitness == 64 ? "1" : "0"); // Disable telemetry, it can affect instruction counts. @@ -466,18 +470,18 @@ int main(int argc, char **argv, char **const envp) { FEATURE_SVE128 = (1U << 0), FEATURE_SVE256 = (1U << 1), FEATURE_CLZERO = (1U << 2), - FEATURE_RNG = (1U << 3), - FEATURE_FCMA = (1U << 4), - FEATURE_CSSC = (1U << 5), - FEATURE_AFP = (1U << 6), - FEATURE_RPRES = (1U << 7), - FEATURE_FLAGM = (1U << 8), + FEATURE_RNG = (1U << 3), + FEATURE_FCMA = (1U << 4), + FEATURE_CSSC = (1U << 5), + FEATURE_AFP = (1U << 6), + FEATURE_RPRES = (1U << 7), + FEATURE_FLAGM = (1U << 8), FEATURE_FLAGM2 = (1U << 9), FEATURE_CRYPTO = (1U << 10), }; uint64_t SVEWidth = 0; - uint64_t HostFeatureControl{}; + uint64_t HostFeatureControl {}; if (TestHeaderData->EnabledHostFeatures & FEATURE_SVE128) { HostFeatureControl |= static_cast(FEXCore::Config::HostFeatures::ENABLESVE); SVEWidth = 128; diff --git a/Source/Tools/CommonTools/DummyHandlers.cpp b/Source/Tools/CommonTools/DummyHandlers.cpp index db5b58c172..73285ed1ee 100644 --- a/Source/Tools/CommonTools/DummyHandlers.cpp +++ b/Source/Tools/CommonTools/DummyHandlers.cpp @@ -2,19 +2,19 @@ #include "DummyHandlers.h" namespace FEX::DummyHandlers { - thread_local FEXCore::Core::InternalThreadState *TLSThread; +thread_local FEXCore::Core::InternalThreadState* TLSThread; - void DummySignalDelegator::RegisterTLSState(FEXCore::Core::InternalThreadState *Thread) { - TLSThread = Thread; - } +void DummySignalDelegator::RegisterTLSState(FEXCore::Core::InternalThreadState* Thread) { + TLSThread = Thread; +} - void DummySignalDelegator::UninstallTLSState(FEXCore::Core::InternalThreadState *Thread) { - TLSThread = nullptr; - } +void DummySignalDelegator::UninstallTLSState(FEXCore::Core::InternalThreadState* Thread) { + TLSThread = nullptr; +} - FEXCore::Core::InternalThreadState *DummySignalDelegator::GetTLSThread() { - return TLSThread; - } +FEXCore::Core::InternalThreadState* DummySignalDelegator::GetTLSThread() { + return TLSThread; +} fextl::unique_ptr CreateSyscallHandler() { return fextl::make_unique(); @@ -23,4 +23,4 @@ fextl::unique_ptr CreateSyscallHandler() { fextl::unique_ptr CreateSignalDelegator() { return fextl::make_unique(); } -} +} // namespace FEX::DummyHandlers diff --git a/Source/Tools/CommonTools/Linux/Utils/ELFContainer.cpp b/Source/Tools/CommonTools/Linux/Utils/ELFContainer.cpp index d6f4a8ff71..017ad19608 100644 --- a/Source/Tools/CommonTools/Linux/Utils/ELFContainer.cpp +++ b/Source/Tools/CommonTools/Linux/Utils/ELFContainer.cpp @@ -26,31 +26,27 @@ desc: Loads and parses an elf to memory. Also handles some loading & logic. namespace ELFLoader { - static ELFContainer::ELFType CheckELFType(uint8_t* Data) { - if (Data[EI_MAG0] != ELFMAG0 || - Data[EI_MAG1] != ELFMAG1 || - Data[EI_MAG2] != ELFMAG2 || - Data[EI_MAG3] != ELFMAG3) { - return ELFContainer::ELFType::TYPE_NONE; - } +static ELFContainer::ELFType CheckELFType(uint8_t* Data) { + if (Data[EI_MAG0] != ELFMAG0 || Data[EI_MAG1] != ELFMAG1 || Data[EI_MAG2] != ELFMAG2 || Data[EI_MAG3] != ELFMAG3) { + return ELFContainer::ELFType::TYPE_NONE; + } - if (Data[EI_CLASS] == ELFCLASS32) { - Elf32_Ehdr *Header = reinterpret_cast(Data); - if (Header->e_machine == EM_386) { - return ELFContainer::ELFType::TYPE_X86_32; - } + if (Data[EI_CLASS] == ELFCLASS32) { + Elf32_Ehdr* Header = reinterpret_cast(Data); + if (Header->e_machine == EM_386) { + return ELFContainer::ELFType::TYPE_X86_32; } - else if (Data[EI_CLASS] == ELFCLASS64) { - Elf64_Ehdr *Header = reinterpret_cast(Data); - if (Header->e_machine == EM_X86_64) { - return ELFContainer::ELFType::TYPE_X86_64; - } + } else if (Data[EI_CLASS] == ELFCLASS64) { + Elf64_Ehdr* Header = reinterpret_cast(Data); + if (Header->e_machine == EM_X86_64) { + return ELFContainer::ELFType::TYPE_X86_64; } - - return ELFContainer::ELFType::TYPE_OTHER_ELF; } -ELFContainer::ELFType ELFContainer::GetELFType(fextl::string const &Filename) { + return ELFContainer::ELFType::TYPE_OTHER_ELF; +} + +ELFContainer::ELFType ELFContainer::GetELFType(const fextl::string& Filename) { // Open the Filename to determine if it is a shebang file. int FD = open(Filename.c_str(), O_RDONLY | O_CLOEXEC); if (FD == -1) { @@ -69,7 +65,7 @@ ELFContainer::ELFType ELFContainer::GetELFType(int FD) { // We can't use dup since that makes the FD have the same underlying state backing both FDs. // We need to first determine the file size through fstat. - struct stat buf{}; + struct stat buf {}; if (fstat(FD, &buf) == -1) { // Couldn't get size. return ELFType::TYPE_NONE; @@ -94,7 +90,7 @@ ELFContainer::ELFType ELFContainer::GetELFType(int FD) { return CheckELFType(reinterpret_cast(&RawFile.at(0))); } -ELFContainer::ELFContainer(fextl::string const &Filename, fextl::string const &RootFS, bool CustomInterpreter) { +ELFContainer::ELFContainer(const fextl::string& Filename, const fextl::string& RootFS, bool CustomInterpreter) { Loaded = true; if (!LoadELF(Filename)) { LogMan::Msg::EFmt("Couldn't Load ELF file"); @@ -106,16 +102,15 @@ ELFContainer::ELFContainer(fextl::string const &Filename, fextl::string const &R // If we we are dynamic application then we have an interpreter program header // We need to load that ELF instead if it exists // We are no longer dynamic since we are executing the interpreter - const char *RawString{}; + const char* RawString {}; if (Mode == MODE_32BIT) { RawString = &RawFile.at(InterpreterHeader._32->p_offset); - } - else { + } else { RawString = &RawFile.at(InterpreterHeader._64->p_offset); } fextl::string RootFSLink = RootFS + RawString; char Filename[PATH_MAX]; - while(FHU::Symlinks::IsSymlink(RootFSLink)) { + while (FHU::Symlinks::IsSymlink(RootFSLink)) { // Do some special handling if the RootFS's linker is a symlink // Ubuntu's rootFS by default provides an absolute location symlink to the linker // Resolve this around back to the rootfs @@ -123,22 +118,19 @@ ELFContainer::ELFContainer(fextl::string const &Filename, fextl::string const &R if (FHU::Filesystem::IsAbsolute(SymlinkTarget)) { RootFSLink = RootFS; RootFSLink += SymlinkTarget; - } - else { + } else { break; } } if (LoadELF(RootFSLink)) { // Found the interpreter in the rootfs - } - else if (!LoadELF(RawString)) { + } else if (!LoadELF(RawString)) { LogMan::Msg::EFmt("Failed to find guest ELF's interpter '{}'", RawString); LogMan::Msg::EFmt("Did you forget to set an x86 rootfs? Currently '{}'", RootFS); Loaded = false; return; } - } - else if (InterpreterHeader._64) { + } else if (InterpreterHeader._64) { GetDynamicLibs(); } @@ -148,14 +140,14 @@ ELFContainer::ELFContainer(fextl::string const &Filename, fextl::string const &R // Print Information // PrintHeader(); - //PrintSectionHeaders(); - //PrintProgramHeaders(); - //PrintSymbolTable(); - //PrintRelocationTable(); - //PrintInitArray(); - //PrintDynamicTable(); - - //LOGMAN_THROW_AA_FMT(InterpreterHeader == nullptr, "Can only handle static programs"); + // PrintSectionHeaders(); + // PrintProgramHeaders(); + // PrintSymbolTable(); + // PrintRelocationTable(); + // PrintInitArray(); + // PrintDynamicTable(); + + // LOGMAN_THROW_AA_FMT(InterpreterHeader == nullptr, "Can only handle static programs"); } ELFContainer::~ELFContainer() { @@ -168,7 +160,7 @@ ELFContainer::~ELFContainer() { RawFile.clear(); } -bool ELFContainer::LoadELF(fextl::string const &Filename) { +bool ELFContainer::LoadELF(const fextl::string& Filename) { if (!FEXCore::FileLoading::LoadFile(RawFile, Filename)) { return false; } @@ -178,20 +170,16 @@ bool ELFContainer::LoadELF(fextl::string const &Filename) { SectionHeaders.clear(); ProgramHeaders.clear(); - uint8_t *Ident = reinterpret_cast(&RawFile.at(0)); + uint8_t* Ident = reinterpret_cast(&RawFile.at(0)); - if (Ident[EI_MAG0] != ELFMAG0 || - Ident[EI_MAG1] != ELFMAG1 || - Ident[EI_MAG2] != ELFMAG2 || - Ident[EI_MAG3] != ELFMAG3) { + if (Ident[EI_MAG0] != ELFMAG0 || Ident[EI_MAG1] != ELFMAG1 || Ident[EI_MAG2] != ELFMAG2 || Ident[EI_MAG3] != ELFMAG3) { LogMan::Msg::EFmt("ELF missing magic cookie"); return false; } if (Ident[EI_CLASS] == ELFCLASS32) { return LoadELF_32(); - } - else if (Ident[EI_CLASS] == ELFCLASS64) { + } else if (Ident[EI_CLASS] == ELFCLASS64) { return LoadELF_64(); } @@ -202,8 +190,7 @@ bool ELFContainer::LoadELF(fextl::string const &Filename) { bool ELFContainer::LoadELF_32() { Mode = MODE_32BIT; - memcpy(&Header, reinterpret_cast(&RawFile.at(0)), - sizeof(Elf32_Ehdr)); + memcpy(&Header, reinterpret_cast(&RawFile.at(0)), sizeof(Elf32_Ehdr)); LOGMAN_THROW_AA_FMT(Header._32.e_phentsize == sizeof(Elf32_Phdr), "PH Entry size wasn't correct size"); LOGMAN_THROW_AA_FMT(Header._32.e_shentsize == sizeof(Elf32_Shdr), "PH Entry size wasn't correct size"); @@ -215,10 +202,8 @@ bool ELFContainer::LoadELF_32() { SectionHeaders.resize(Header._32.e_shnum); ProgramHeaders.resize(Header._32.e_phnum); - Elf32_Shdr *RawShdrs = - reinterpret_cast(&RawFile.at(Header._32.e_shoff)); - Elf32_Phdr *RawPhdrs = - reinterpret_cast(&RawFile.at(Header._32.e_phoff)); + Elf32_Shdr* RawShdrs = reinterpret_cast(&RawFile.at(Header._32.e_shoff)); + Elf32_Phdr* RawPhdrs = reinterpret_cast(&RawFile.at(Header._32.e_phoff)); for (uint32_t i = 0; i < Header._32.e_shnum; ++i) { SectionHeaders[i]._32 = &RawShdrs[i]; @@ -228,7 +213,7 @@ bool ELFContainer::LoadELF_32() { ProgramHeaders[i]._32 = &RawPhdrs[i]; if (ProgramHeaders[i]._32->p_type == PT_INTERP) { InterpreterHeader = ProgramHeaders[i]; - DynamicLinker = reinterpret_cast(&RawFile.at(InterpreterHeader._32->p_offset)); + DynamicLinker = reinterpret_cast(&RawFile.at(InterpreterHeader._32->p_offset)); } } @@ -243,8 +228,7 @@ bool ELFContainer::LoadELF_32() { bool ELFContainer::LoadELF_64() { Mode = MODE_64BIT; - memcpy(&Header, reinterpret_cast(&RawFile.at(0)), - sizeof(Elf64_Ehdr)); + memcpy(&Header, reinterpret_cast(&RawFile.at(0)), sizeof(Elf64_Ehdr)); LOGMAN_THROW_AA_FMT(Header._64.e_phentsize == 56, "PH Entry size wasn't 56"); LOGMAN_THROW_AA_FMT(Header._64.e_shentsize == 64, "PH Entry size wasn't 64"); @@ -256,10 +240,8 @@ bool ELFContainer::LoadELF_64() { SectionHeaders.resize(Header._64.e_shnum); ProgramHeaders.resize(Header._64.e_phnum); - Elf64_Shdr *RawShdrs = - reinterpret_cast(&RawFile.at(Header._64.e_shoff)); - Elf64_Phdr *RawPhdrs = - reinterpret_cast(&RawFile.at(Header._64.e_phoff)); + Elf64_Shdr* RawShdrs = reinterpret_cast(&RawFile.at(Header._64.e_shoff)); + Elf64_Phdr* RawPhdrs = reinterpret_cast(&RawFile.at(Header._64.e_phoff)); for (uint32_t i = 0; i < Header._64.e_shnum; ++i) { SectionHeaders[i]._64 = &RawShdrs[i]; @@ -269,7 +251,7 @@ bool ELFContainer::LoadELF_64() { ProgramHeaders[i]._64 = &RawPhdrs[i]; if (ProgramHeaders[i]._64->p_type == PT_INTERP) { InterpreterHeader = ProgramHeaders[i]; - DynamicLinker = reinterpret_cast(&RawFile.at(InterpreterHeader._64->p_offset)); + DynamicLinker = reinterpret_cast(&RawFile.at(InterpreterHeader._64->p_offset)); } } @@ -284,9 +266,9 @@ bool ELFContainer::LoadELF_64() { void ELFContainer::WriteLoadableSections(MemoryWriter Writer, uint64_t Offset) { if (Mode == MODE_32BIT) { for (uint32_t i = 0; i < ProgramHeaders.size(); ++i) { - Elf32_Phdr const *hdr = ProgramHeaders.at(i)._32; + const Elf32_Phdr* hdr = ProgramHeaders.at(i)._32; if (hdr->p_type == PT_LOAD) { - //LogMan::Msg::DFmt("PT_LOAD: Base: {} Offset: [0x{:x}, 0x{:x})", Offset, hdr->p_paddr, hdr->p_filesz); + // LogMan::Msg::DFmt("PT_LOAD: Base: {} Offset: [0x{:x}, 0x{:x})", Offset, hdr->p_paddr, hdr->p_filesz); Writer(&RawFile.at(hdr->p_offset), Offset + hdr->p_paddr, hdr->p_filesz); } @@ -294,10 +276,9 @@ void ELFContainer::WriteLoadableSections(MemoryWriter Writer, uint64_t Offset) { Writer(&RawFile.at(hdr->p_offset), Offset + hdr->p_paddr, hdr->p_filesz); } } - } - else { + } else { for (uint32_t i = 0; i < ProgramHeaders.size(); ++i) { - Elf64_Phdr const *hdr = ProgramHeaders.at(i)._64; + const Elf64_Phdr* hdr = ProgramHeaders.at(i)._64; if (hdr->p_type == PT_LOAD) { Writer(&RawFile.at(hdr->p_offset), Offset + hdr->p_paddr, hdr->p_filesz); } @@ -309,27 +290,32 @@ void ELFContainer::WriteLoadableSections(MemoryWriter Writer, uint64_t Offset) { } } -ELFSymbol const *ELFContainer::GetSymbol(char const *Name) { +const ELFSymbol* ELFContainer::GetSymbol(const char* Name) { auto Sym = SymbolMap.find(Name); - if (Sym == SymbolMap.end()) + if (Sym == SymbolMap.end()) { return nullptr; + } return Sym->second; } -ELFSymbol const *ELFContainer::GetSymbol(uint64_t Address) { +const ELFSymbol* ELFContainer::GetSymbol(uint64_t Address) { auto Sym = SymbolMapByAddress.find(Address); - if (Sym == SymbolMapByAddress.end()) + if (Sym == SymbolMapByAddress.end()) { return nullptr; + } return Sym->second; } -ELFSymbol const *ELFContainer::GetSymbolInRange(RangeType Address) { +const ELFSymbol* ELFContainer::GetSymbolInRange(RangeType Address) { auto Sym = SymbolMapByAddress.upper_bound(Address.first); - if (Sym != SymbolMapByAddress.begin()) + if (Sym != SymbolMapByAddress.begin()) { --Sym; - if (Sym == SymbolMapByAddress.end()) + } + if (Sym == SymbolMapByAddress.end()) { return nullptr; + } - if ((Sym->second->Address + Sym->second->Size) < Address.first) + if ((Sym->second->Address + Sym->second->Size) < Address.first) { return nullptr; + } return Sym->second; } @@ -341,7 +327,7 @@ void ELFContainer::CalculateMemoryLayouts() { if (Mode == MODE_32BIT) { for (uint32_t i = 0; i < ProgramHeaders.size(); ++i) { - Elf32_Phdr *hdr = ProgramHeaders.at(i)._32; + Elf32_Phdr* hdr = ProgramHeaders.at(i)._32; if (hdr->p_memsz > 0) { MinPhysAddr = std::min(MinPhysAddr, static_cast(hdr->p_paddr)); MaxPhysAddr = std::max(MaxPhysAddr, static_cast(hdr->p_paddr) + hdr->p_memsz); @@ -350,10 +336,9 @@ void ELFContainer::CalculateMemoryLayouts() { TLSHeader._32 = hdr; } } - } - else { + } else { for (uint32_t i = 0; i < ProgramHeaders.size(); ++i) { - Elf64_Phdr *hdr = ProgramHeaders.at(i)._64; + Elf64_Phdr* hdr = ProgramHeaders.at(i)._64; // Many elfs have program region labeled .GNU_STACK which is empty and has a null address. // It's used to mark the memory protection flags of the stack. @@ -384,16 +369,16 @@ void ELFContainer::CalculateMemoryLayouts() { void ELFContainer::CalculateSymbols() { // Find the symbol table if (Mode == MODE_32BIT) { - Elf32_Shdr const *SymTabHeader{nullptr}; - Elf32_Shdr const *StringTableHeader{nullptr}; - char const *StrTab{nullptr}; + const Elf32_Shdr* SymTabHeader {nullptr}; + const Elf32_Shdr* StringTableHeader {nullptr}; + const char* StrTab {nullptr}; - Elf32_Shdr const *DynSymTabHeader{nullptr}; - Elf32_Shdr const *DynStringTableHeader{nullptr}; - char const *DynStrTab{nullptr}; + const Elf32_Shdr* DynSymTabHeader {nullptr}; + const Elf32_Shdr* DynStringTableHeader {nullptr}; + const char* DynStrTab {nullptr}; for (uint32_t i = 0; i < SectionHeaders.size(); ++i) { - Elf32_Shdr const *hdr = SectionHeaders.at(i)._32; + const Elf32_Shdr* hdr = SectionHeaders.at(i)._32; if (hdr->sh_type == SHT_SYMTAB) { SymTabHeader = hdr; break; @@ -401,7 +386,7 @@ void ELFContainer::CalculateSymbols() { } for (uint32_t i = 0; i < SectionHeaders.size(); ++i) { - Elf32_Shdr const *hdr = SectionHeaders.at(i)._32; + const Elf32_Shdr* hdr = SectionHeaders.at(i)._32; if (hdr->sh_type == SHT_DYNSYM) { DynSymTabHeader = hdr; break; @@ -416,10 +401,8 @@ void ELFContainer::CalculateSymbols() { uint64_t NumSymTabSymbols = 0; uint64_t NumDynSymSymbols = 0; if (SymTabHeader) { - LOGMAN_THROW_A_FMT(SymTabHeader->sh_link < SectionHeaders.size(), - "Symbol table string table section is wrong"); - LOGMAN_THROW_AA_FMT(SymTabHeader->sh_entsize == sizeof(Elf32_Sym), - "Entry size doesn't match symbol entry"); + LOGMAN_THROW_A_FMT(SymTabHeader->sh_link < SectionHeaders.size(), "Symbol table string table section is wrong"); + LOGMAN_THROW_AA_FMT(SymTabHeader->sh_entsize == sizeof(Elf32_Sym), "Entry size doesn't match symbol entry"); StringTableHeader = SectionHeaders.at(SymTabHeader->sh_link)._32; StrTab = &RawFile.at(StringTableHeader->sh_offset); @@ -427,10 +410,8 @@ void ELFContainer::CalculateSymbols() { } if (DynSymTabHeader) { - LOGMAN_THROW_A_FMT(DynSymTabHeader->sh_link < SectionHeaders.size(), - "Symbol table string table section is wrong"); - LOGMAN_THROW_AA_FMT(DynSymTabHeader->sh_entsize == sizeof(Elf32_Sym), - "Entry size doesn't match symbol entry"); + LOGMAN_THROW_A_FMT(DynSymTabHeader->sh_link < SectionHeaders.size(), "Symbol table string table section is wrong"); + LOGMAN_THROW_AA_FMT(DynSymTabHeader->sh_entsize == sizeof(Elf32_Sym), "Entry size doesn't match symbol entry"); DynStringTableHeader = SectionHeaders.at(DynSymTabHeader->sh_link)._32; DynStrTab = &RawFile.at(DynStringTableHeader->sh_offset); @@ -442,13 +423,11 @@ void ELFContainer::CalculateSymbols() { Symbols.resize(NumSymbols); for (uint64_t i = 0; i < NumSymTabSymbols; ++i) { uint64_t offset = SymTabHeader->sh_offset + i * SymTabHeader->sh_entsize; - Elf32_Sym const *Symbol = - reinterpret_cast(&RawFile.at(offset)); - if (ELF32_ST_VISIBILITY(Symbol->st_other) != STV_HIDDEN && - Symbol->st_value != 0) { - char const * Name = &StrTab[Symbol->st_name]; + const Elf32_Sym* Symbol = reinterpret_cast< const Elf32_Sym*>(&RawFile.at(offset)); + if (ELF32_ST_VISIBILITY(Symbol->st_other) != STV_HIDDEN && Symbol->st_value != 0) { + const char* Name = &StrTab[Symbol->st_name]; if (Name[0] != '\0') { - ELFSymbol *DefinedSymbol = &Symbols.at(i); + ELFSymbol* DefinedSymbol = &Symbols.at(i); DefinedSymbol->FileOffset = offset; DefinedSymbol->Address = Symbol->st_value; DefinedSymbol->Size = Symbol->st_size; @@ -465,13 +444,11 @@ void ELFContainer::CalculateSymbols() { for (uint64_t i = 0; i < NumDynSymSymbols; ++i) { uint64_t offset = DynSymTabHeader->sh_offset + i * DynSymTabHeader->sh_entsize; - Elf32_Sym const *Symbol = - reinterpret_cast(&RawFile.at(offset)); - if (ELF32_ST_VISIBILITY(Symbol->st_other) != STV_HIDDEN && - Symbol->st_value != 0) { - char const * Name = &DynStrTab[Symbol->st_name]; + const Elf32_Sym* Symbol = reinterpret_cast< const Elf32_Sym*>(&RawFile.at(offset)); + if (ELF32_ST_VISIBILITY(Symbol->st_other) != STV_HIDDEN && Symbol->st_value != 0) { + const char* Name = &DynStrTab[Symbol->st_name]; if (Name[0] != '\0') { - ELFSymbol *DefinedSymbol = &Symbols.at(NumSymTabSymbols + i); + ELFSymbol* DefinedSymbol = &Symbols.at(NumSymTabSymbols + i); DefinedSymbol->FileOffset = offset; DefinedSymbol->Address = Symbol->st_value; DefinedSymbol->Size = Symbol->st_size; @@ -486,10 +463,10 @@ void ELFContainer::CalculateSymbols() { } } - Elf32_Shdr const *StrHeader = SectionHeaders.at(Header._32.e_shstrndx)._32; - char const *SHStrings = &RawFile.at(StrHeader->sh_offset); + const Elf32_Shdr* StrHeader = SectionHeaders.at(Header._32.e_shstrndx)._32; + const char* SHStrings = &RawFile.at(StrHeader->sh_offset); for (uint32_t i = 0; i < SectionHeaders.size(); ++i) { - Elf32_Shdr const *hdr = SectionHeaders.at(i)._32; + const Elf32_Shdr* hdr = SectionHeaders.at(i)._32; if (strcmp(&SHStrings[hdr->sh_name], ".eh_frame_hdr") == 0) { auto eh_frame_hdr = &RawFile.at(hdr->sh_offset); // we only handle this specific unwind table encoding @@ -506,7 +483,7 @@ void ELFContainer::CalculateSymbols() { int32_t fde; }; - entry *Table = (entry*)(eh_frame_hdr+12); + entry* Table = (entry*)(eh_frame_hdr + 12); for (int f = 0; f < fde_count; f++) { uintptr_t Entry = (uintptr_t)(Table[f].pc + hdr->sh_offset); UnwindEntries.push_back(Entry); @@ -515,18 +492,17 @@ void ELFContainer::CalculateSymbols() { break; } } - } - else { - Elf64_Shdr const *SymTabHeader{nullptr}; - Elf64_Shdr const *StringTableHeader{nullptr}; - char const *StrTab{nullptr}; + } else { + const Elf64_Shdr* SymTabHeader {nullptr}; + const Elf64_Shdr* StringTableHeader {nullptr}; + const char* StrTab {nullptr}; - Elf64_Shdr const *DynSymTabHeader{nullptr}; - Elf64_Shdr const *DynStringTableHeader{nullptr}; - char const *DynStrTab{nullptr}; + const Elf64_Shdr* DynSymTabHeader {nullptr}; + const Elf64_Shdr* DynStringTableHeader {nullptr}; + const char* DynStrTab {nullptr}; for (uint32_t i = 0; i < SectionHeaders.size(); ++i) { - Elf64_Shdr const *hdr = SectionHeaders.at(i)._64; + const Elf64_Shdr* hdr = SectionHeaders.at(i)._64; if (hdr->sh_type == SHT_SYMTAB) { SymTabHeader = hdr; break; @@ -534,7 +510,7 @@ void ELFContainer::CalculateSymbols() { } for (uint32_t i = 0; i < SectionHeaders.size(); ++i) { - Elf64_Shdr const *hdr = SectionHeaders.at(i)._64; + const Elf64_Shdr* hdr = SectionHeaders.at(i)._64; if (hdr->sh_type == SHT_DYNSYM) { DynSymTabHeader = hdr; break; @@ -549,10 +525,8 @@ void ELFContainer::CalculateSymbols() { uint64_t NumSymTabSymbols = 0; uint64_t NumDynSymSymbols = 0; if (SymTabHeader) { - LOGMAN_THROW_A_FMT(SymTabHeader->sh_link < SectionHeaders.size(), - "Symbol table string table section is wrong"); - LOGMAN_THROW_AA_FMT(SymTabHeader->sh_entsize == sizeof(Elf64_Sym), - "Entry size doesn't match symbol entry"); + LOGMAN_THROW_A_FMT(SymTabHeader->sh_link < SectionHeaders.size(), "Symbol table string table section is wrong"); + LOGMAN_THROW_AA_FMT(SymTabHeader->sh_entsize == sizeof(Elf64_Sym), "Entry size doesn't match symbol entry"); StringTableHeader = SectionHeaders.at(SymTabHeader->sh_link)._64; StrTab = &RawFile.at(StringTableHeader->sh_offset); @@ -560,10 +534,8 @@ void ELFContainer::CalculateSymbols() { } if (DynSymTabHeader) { - LOGMAN_THROW_A_FMT(DynSymTabHeader->sh_link < SectionHeaders.size(), - "Symbol table string table section is wrong"); - LOGMAN_THROW_AA_FMT(DynSymTabHeader->sh_entsize == sizeof(Elf64_Sym), - "Entry size doesn't match symbol entry"); + LOGMAN_THROW_A_FMT(DynSymTabHeader->sh_link < SectionHeaders.size(), "Symbol table string table section is wrong"); + LOGMAN_THROW_AA_FMT(DynSymTabHeader->sh_entsize == sizeof(Elf64_Sym), "Entry size doesn't match symbol entry"); DynStringTableHeader = SectionHeaders.at(DynSymTabHeader->sh_link)._64; DynStrTab = &RawFile.at(DynStringTableHeader->sh_offset); @@ -575,13 +547,11 @@ void ELFContainer::CalculateSymbols() { Symbols.resize(NumSymbols); for (uint64_t i = 0; i < NumSymTabSymbols; ++i) { uint64_t offset = SymTabHeader->sh_offset + i * SymTabHeader->sh_entsize; - Elf64_Sym const *Symbol = - reinterpret_cast(&RawFile.at(offset)); - if (ELF64_ST_VISIBILITY(Symbol->st_other) != STV_HIDDEN && - Symbol->st_value != 0) { - char const * Name = &StrTab[Symbol->st_name]; + const Elf64_Sym* Symbol = reinterpret_cast< const Elf64_Sym*>(&RawFile.at(offset)); + if (ELF64_ST_VISIBILITY(Symbol->st_other) != STV_HIDDEN && Symbol->st_value != 0) { + const char* Name = &StrTab[Symbol->st_name]; if (Name[0] != '\0') { - ELFSymbol *DefinedSymbol = &Symbols.at(i); + ELFSymbol* DefinedSymbol = &Symbols.at(i); DefinedSymbol->FileOffset = offset; DefinedSymbol->Address = Symbol->st_value; DefinedSymbol->Size = Symbol->st_size; @@ -598,13 +568,11 @@ void ELFContainer::CalculateSymbols() { for (uint64_t i = 0; i < NumDynSymSymbols; ++i) { uint64_t offset = DynSymTabHeader->sh_offset + i * DynSymTabHeader->sh_entsize; - Elf64_Sym const *Symbol = - reinterpret_cast(&RawFile.at(offset)); - if (ELF64_ST_VISIBILITY(Symbol->st_other) != STV_HIDDEN && - Symbol->st_value != 0) { - char const * Name = &DynStrTab[Symbol->st_name]; + const Elf64_Sym* Symbol = reinterpret_cast< const Elf64_Sym*>(&RawFile.at(offset)); + if (ELF64_ST_VISIBILITY(Symbol->st_other) != STV_HIDDEN && Symbol->st_value != 0) { + const char* Name = &DynStrTab[Symbol->st_name]; if (Name[0] != '\0') { - ELFSymbol *DefinedSymbol = &Symbols.at(NumSymTabSymbols + i); + ELFSymbol* DefinedSymbol = &Symbols.at(NumSymTabSymbols + i); DefinedSymbol->FileOffset = offset; DefinedSymbol->Address = Symbol->st_value; DefinedSymbol->Size = Symbol->st_size; @@ -619,10 +587,10 @@ void ELFContainer::CalculateSymbols() { } } - Elf64_Shdr const *StrHeader = SectionHeaders.at(Header._64.e_shstrndx)._64; - char const *SHStrings = &RawFile.at(StrHeader->sh_offset); + const Elf64_Shdr* StrHeader = SectionHeaders.at(Header._64.e_shstrndx)._64; + const char* SHStrings = &RawFile.at(StrHeader->sh_offset); for (uint32_t i = 0; i < SectionHeaders.size(); ++i) { - Elf64_Shdr const *hdr = SectionHeaders.at(i)._64; + const Elf64_Shdr* hdr = SectionHeaders.at(i)._64; if (strcmp(&SHStrings[hdr->sh_name], ".eh_frame_hdr") == 0) { auto eh_frame_hdr = &RawFile.at(hdr->sh_offset); // we only handle this specific unwind table encoding @@ -639,7 +607,7 @@ void ELFContainer::CalculateSymbols() { int32_t fde; }; - entry *Table = (entry*)(eh_frame_hdr+12); + entry* Table = (entry*)(eh_frame_hdr + 12); for (int f = 0; f < fde_count; f++) { uintptr_t Entry = (uintptr_t)(Table[f].pc + hdr->sh_offset); UnwindEntries.push_back(Entry); @@ -654,33 +622,36 @@ void ELFContainer::CalculateSymbols() { void ELFContainer::GetDynamicLibs() { if (Mode == MODE_32BIT) { for (uint32_t i = 0; i < SectionHeaders.size(); ++i) { - Elf32_Shdr const *hdr = SectionHeaders.at(i)._32; + const Elf32_Shdr* hdr = SectionHeaders.at(i)._32; if (hdr->sh_type == SHT_DYNAMIC) { - Elf32_Shdr const *StrHeader = SectionHeaders.at(hdr->sh_link)._32; - char const *SHStrings = &RawFile.at(StrHeader->sh_offset); + const Elf32_Shdr* StrHeader = SectionHeaders.at(hdr->sh_link)._32; + const char* SHStrings = &RawFile.at(StrHeader->sh_offset); size_t Entries = hdr->sh_size / hdr->sh_entsize; for (size_t j = 0; i < Entries; ++j) { - Elf32_Dyn const *Dynamic = reinterpret_cast(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize)); - if (Dynamic->d_tag == DT_NULL) break; + const Elf32_Dyn* Dynamic = reinterpret_cast< const Elf32_Dyn*>(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize)); + if (Dynamic->d_tag == DT_NULL) { + break; + } if (Dynamic->d_tag == DT_NEEDED) { NecessaryLibs.emplace_back(&SHStrings[Dynamic->d_un.d_val]); } } } } - } - else { + } else { for (uint32_t i = 0; i < SectionHeaders.size(); ++i) { - Elf64_Shdr const *hdr = SectionHeaders.at(i)._64; + const Elf64_Shdr* hdr = SectionHeaders.at(i)._64; if (hdr->sh_type == SHT_DYNAMIC) { - Elf64_Shdr const *StrHeader = SectionHeaders.at(hdr->sh_link)._64; - char const *SHStrings = &RawFile.at(StrHeader->sh_offset); + const Elf64_Shdr* StrHeader = SectionHeaders.at(hdr->sh_link)._64; + const char* SHStrings = &RawFile.at(StrHeader->sh_offset); size_t Entries = hdr->sh_size / hdr->sh_entsize; for (size_t j = 0; i < Entries; ++j) { - Elf64_Dyn const *Dynamic = reinterpret_cast(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize)); - if (Dynamic->d_tag == DT_NULL) break; + const Elf64_Dyn* Dynamic = reinterpret_cast< const Elf64_Dyn*>(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize)); + if (Dynamic->d_tag == DT_NULL) { + break; + } if (Dynamic->d_tag == DT_NEEDED) { NecessaryLibs.emplace_back(&SHStrings[Dynamic->d_un.d_val]); } @@ -691,7 +662,7 @@ void ELFContainer::GetDynamicLibs() { } void ELFContainer::AddSymbols(SymbolAdder Adder) { - for (auto &Sym : Symbols) { + for (auto& Sym : Symbols) { if (Sym.FileOffset) { Adder(&Sym); } @@ -718,8 +689,7 @@ void ELFContainer::PrintHeader() const { LogMan::Msg::IFmt("PH Entry Size: {}", Header._32.e_phentsize); LogMan::Msg::IFmt("SH Entry Size: {}", Header._32.e_shentsize); LogMan::Msg::IFmt("SH Str Index: {}", Header._32.e_shstrndx); - } - else { + } else { LogMan::Msg::IFmt("Type: {}", Header._64.e_type); LogMan::Msg::IFmt("Machine: {}", Header._64.e_machine); LogMan::Msg::IFmt("Version: {}", Header._64.e_version); @@ -738,12 +708,11 @@ void ELFContainer::PrintHeader() const { void ELFContainer::PrintSectionHeaders() const { if (Mode == MODE_32BIT) { - LOGMAN_THROW_A_FMT(Header._32.e_shstrndx < SectionHeaders.size(), - "String index section is wrong index!"); - Elf32_Shdr const *StrHeader = SectionHeaders.at(Header._32.e_shstrndx)._32; - char const *SHStrings = &RawFile.at(StrHeader->sh_offset); + LOGMAN_THROW_A_FMT(Header._32.e_shstrndx < SectionHeaders.size(), "String index section is wrong index!"); + const Elf32_Shdr* StrHeader = SectionHeaders.at(Header._32.e_shstrndx)._32; + const char* SHStrings = &RawFile.at(StrHeader->sh_offset); for (size_t i = 0; i < SectionHeaders.size(); ++i) { - Elf32_Shdr const *hdr = SectionHeaders[i]._32; + const Elf32_Shdr* hdr = SectionHeaders[i]._32; LogMan::Msg::IFmt("Index: {}", i); LogMan::Msg::IFmt("Name: {}", &SHStrings[hdr->sh_name]); LogMan::Msg::IFmt("Type: {}", hdr->sh_type); @@ -756,14 +725,12 @@ void ELFContainer::PrintSectionHeaders() const { LogMan::Msg::IFmt("AddrAlign: {}", hdr->sh_addralign); LogMan::Msg::IFmt("Entry Size: {}", hdr->sh_entsize); } - } - else { - LOGMAN_THROW_A_FMT(Header._64.e_shstrndx < SectionHeaders.size(), - "String index section is wrong index!"); - Elf64_Shdr const *StrHeader = SectionHeaders.at(Header._64.e_shstrndx)._64; - char const *SHStrings = &RawFile.at(StrHeader->sh_offset); + } else { + LOGMAN_THROW_A_FMT(Header._64.e_shstrndx < SectionHeaders.size(), "String index section is wrong index!"); + const Elf64_Shdr* StrHeader = SectionHeaders.at(Header._64.e_shstrndx)._64; + const char* SHStrings = &RawFile.at(StrHeader->sh_offset); for (size_t i = 0; i < SectionHeaders.size(); ++i) { - Elf64_Shdr const *hdr = SectionHeaders[i]._64; + const Elf64_Shdr* hdr = SectionHeaders[i]._64; LogMan::Msg::IFmt("Index: {}", i); LogMan::Msg::IFmt("Name: {}", &SHStrings[hdr->sh_name]); LogMan::Msg::IFmt("Type: {}", hdr->sh_type); @@ -781,10 +748,9 @@ void ELFContainer::PrintSectionHeaders() const { void ELFContainer::PrintProgramHeaders() const { if (Mode == MODE_32BIT) { - LOGMAN_THROW_A_FMT(Header._32.e_shstrndx < SectionHeaders.size(), - "String index section is wrong index!"); + LOGMAN_THROW_A_FMT(Header._32.e_shstrndx < SectionHeaders.size(), "String index section is wrong index!"); for (size_t i = 0; i < ProgramHeaders.size(); ++i) { - Elf32_Phdr const *hdr = ProgramHeaders[i]._32; + const Elf32_Phdr* hdr = ProgramHeaders[i]._32; LogMan::Msg::IFmt("Type: {}", hdr->p_type); LogMan::Msg::IFmt("Flags: {}", hdr->p_flags); LogMan::Msg::IFmt("Offset: {}", hdr->p_offset); @@ -794,12 +760,10 @@ void ELFContainer::PrintProgramHeaders() const { LogMan::Msg::IFmt("MemSize: {}", hdr->p_memsz); LogMan::Msg::IFmt("Align: {}", hdr->p_align); } - } - else { - LOGMAN_THROW_A_FMT(Header._64.e_shstrndx < SectionHeaders.size(), - "String index section is wrong index!"); + } else { + LOGMAN_THROW_A_FMT(Header._64.e_shstrndx < SectionHeaders.size(), "String index section is wrong index!"); for (size_t i = 0; i < ProgramHeaders.size(); ++i) { - Elf64_Phdr const *hdr = ProgramHeaders[i]._64; + const Elf64_Phdr* hdr = ProgramHeaders[i]._64; LogMan::Msg::IFmt("Type: {}", hdr->p_type); LogMan::Msg::IFmt("Flags: {}", hdr->p_flags); LogMan::Msg::IFmt("Offset: {}", hdr->p_offset); @@ -815,11 +779,11 @@ void ELFContainer::PrintProgramHeaders() const { void ELFContainer::PrintSymbolTable() const { if (Mode == MODE_32BIT) { // Find the symbol table - Elf32_Shdr const *SymTabHeader{nullptr}; - Elf32_Shdr const *StringTableHeader{nullptr}; - char const *StrTab{nullptr}; + const Elf32_Shdr* SymTabHeader {nullptr}; + const Elf32_Shdr* StringTableHeader {nullptr}; + const char* StrTab {nullptr}; for (uint32_t i = 0; i < SectionHeaders.size(); ++i) { - Elf32_Shdr const *hdr = SectionHeaders.at(i)._32; + const Elf32_Shdr* hdr = SectionHeaders.at(i)._32; if (hdr->sh_type == SHT_SYMTAB) { SymTabHeader = hdr; break; @@ -830,10 +794,8 @@ void ELFContainer::PrintSymbolTable() const { return; } - LOGMAN_THROW_A_FMT(SymTabHeader->sh_link < SectionHeaders.size(), - "Symbol table string table section is wrong"); - LOGMAN_THROW_AA_FMT(SymTabHeader->sh_entsize == sizeof(Elf32_Sym), - "Entry size doesn't match symbol entry"); + LOGMAN_THROW_A_FMT(SymTabHeader->sh_link < SectionHeaders.size(), "Symbol table string table section is wrong"); + LOGMAN_THROW_AA_FMT(SymTabHeader->sh_entsize == sizeof(Elf32_Sym), "Entry size doesn't match symbol entry"); StringTableHeader = SectionHeaders.at(SymTabHeader->sh_link)._32; StrTab = &RawFile.at(StringTableHeader->sh_offset); @@ -841,20 +803,18 @@ void ELFContainer::PrintSymbolTable() const { const uint64_t NumSymbols = SymTabHeader->sh_size / SymTabHeader->sh_entsize; for (uint64_t i = 0; i < NumSymbols; ++i) { const uint64_t offset = SymTabHeader->sh_offset + i * SymTabHeader->sh_entsize; - const auto *Symbol = reinterpret_cast(&RawFile.at(offset)); + const auto* Symbol = reinterpret_cast(&RawFile.at(offset)); - LogMan::Msg::IFmt("{} : {:x} {} {} {} {} {}", i, Symbol->st_value, Symbol->st_size, - uint32_t(Symbol->st_info), uint32_t(Symbol->st_other), - Symbol->st_shndx, &StrTab[Symbol->st_name]); + LogMan::Msg::IFmt("{} : {:x} {} {} {} {} {}", i, Symbol->st_value, Symbol->st_size, uint32_t(Symbol->st_info), + uint32_t(Symbol->st_other), Symbol->st_shndx, &StrTab[Symbol->st_name]); } - } - else { + } else { // Find the symbol table - Elf64_Shdr const *SymTabHeader{nullptr}; - Elf64_Shdr const *StringTableHeader{nullptr}; - char const *StrTab{nullptr}; + const Elf64_Shdr* SymTabHeader {nullptr}; + const Elf64_Shdr* StringTableHeader {nullptr}; + const char* StrTab {nullptr}; for (uint32_t i = 0; i < SectionHeaders.size(); ++i) { - Elf64_Shdr const *hdr = SectionHeaders.at(i)._64; + const Elf64_Shdr* hdr = SectionHeaders.at(i)._64; if (hdr->sh_type == SHT_SYMTAB) { SymTabHeader = hdr; break; @@ -865,10 +825,8 @@ void ELFContainer::PrintSymbolTable() const { return; } - LOGMAN_THROW_A_FMT(SymTabHeader->sh_link < SectionHeaders.size(), - "Symbol table string table section is wrong"); - LOGMAN_THROW_AA_FMT(SymTabHeader->sh_entsize == sizeof(Elf64_Sym), - "Entry size doesn't match symbol entry"); + LOGMAN_THROW_A_FMT(SymTabHeader->sh_link < SectionHeaders.size(), "Symbol table string table section is wrong"); + LOGMAN_THROW_AA_FMT(SymTabHeader->sh_entsize == sizeof(Elf64_Sym), "Entry size doesn't match symbol entry"); StringTableHeader = SectionHeaders.at(SymTabHeader->sh_link)._64; StrTab = &RawFile.at(StringTableHeader->sh_offset); @@ -876,34 +834,31 @@ void ELFContainer::PrintSymbolTable() const { const uint64_t NumSymbols = SymTabHeader->sh_size / SymTabHeader->sh_entsize; for (uint64_t i = 0; i < NumSymbols; ++i) { const uint64_t offset = SymTabHeader->sh_offset + i * SymTabHeader->sh_entsize; - const auto *Symbol = reinterpret_cast(&RawFile.at(offset)); + const auto* Symbol = reinterpret_cast(&RawFile.at(offset)); - LogMan::Msg::IFmt("{} : {:x} {} {} {} {} {}", i, Symbol->st_value, Symbol->st_size, - uint32_t(Symbol->st_info), uint32_t(Symbol->st_other), - Symbol->st_shndx, &StrTab[Symbol->st_name]); + LogMan::Msg::IFmt("{} : {:x} {} {} {} {} {}", i, Symbol->st_value, Symbol->st_size, uint32_t(Symbol->st_info), + uint32_t(Symbol->st_other), Symbol->st_shndx, &StrTab[Symbol->st_name]); } } } void ELFContainer::PrintRelocationTable() const { if (Mode == MODE_32BIT) { - } - else { - Elf64_Shdr const *RelaHeader{nullptr}; - Elf64_Shdr const *DynSymHeader {nullptr}; + } else { + const Elf64_Shdr* RelaHeader {nullptr}; + const Elf64_Shdr* DynSymHeader {nullptr}; - Elf64_Shdr const *StrHeader = SectionHeaders.at(Header._64.e_shstrndx)._64; - char const *SHStrings = &RawFile.at(StrHeader->sh_offset); + const Elf64_Shdr* StrHeader = SectionHeaders.at(Header._64.e_shstrndx)._64; + const char* SHStrings = &RawFile.at(StrHeader->sh_offset); - Elf64_Shdr const *StringTableHeader{nullptr}; - char const *StrTab{nullptr}; + const Elf64_Shdr* StringTableHeader {nullptr}; + const char* StrTab {nullptr}; for (uint32_t i = 0; i < SectionHeaders.size(); ++i) { - Elf64_Shdr const *hdr = SectionHeaders.at(i)._64; + const Elf64_Shdr* hdr = SectionHeaders.at(i)._64; if (hdr->sh_type == SHT_REL) { LogMan::Msg::DFmt("Unhandled REL section"); - } - else if (hdr->sh_type == SHT_RELA) { + } else if (hdr->sh_type == SHT_RELA) { RelaHeader = hdr; LogMan::Msg::DFmt("Relocation Section: '{}'", &SHStrings[RelaHeader->sh_name]); @@ -920,10 +875,10 @@ void ELFContainer::PrintRelocationTable() const { } const size_t EntryCount = RelaHeader->sh_size / RelaHeader->sh_entsize; - const auto *Entries = reinterpret_cast(&RawFile.at(RelaHeader->sh_offset)); + const auto* Entries = reinterpret_cast(&RawFile.at(RelaHeader->sh_offset)); for (size_t j = 0; j < EntryCount; ++j) { - const auto *Entry = &Entries[j]; + const auto* Entry = &Entries[j]; const uint32_t Sym = Entry->r_info >> 32; const uint32_t Type = Entry->r_info & ~0U; LogMan::Msg::DFmt("RELA Entry {}", j); @@ -933,7 +888,7 @@ void ELFContainer::PrintRelocationTable() const { LOGMAN_THROW_AA_FMT(DynSymHeader->sh_entsize == sizeof(Elf64_Sym), "Oops, entry size doesn't match"); const uint64_t offset = DynSymHeader->sh_offset + Sym * DynSymHeader->sh_entsize; - const auto *Symbol = reinterpret_cast(&RawFile.at(offset)); + const auto* Symbol = reinterpret_cast(&RawFile.at(offset)); LogMan::Msg::DFmt("\tSym Name: '{}'", &StrTab[Symbol->st_name]); } @@ -941,29 +896,21 @@ void ELFContainer::PrintRelocationTable() const { LogMan::Msg::DFmt("\tadded: 0x{:x}", Entry->r_addend); if (Type == R_X86_64_IRELATIVE) { // 37/0x25 LogMan::Msg::DFmt("\tR_x86_64_IRELATIVE"); - } - else if (Type == R_X86_64_64) { + } else if (Type == R_X86_64_64) { LogMan::Msg::DFmt("\tR_X86_64_64"); - } - else if (Type == R_X86_64_RELATIVE) { + } else if (Type == R_X86_64_RELATIVE) { LogMan::Msg::DFmt("\tR_X86_64_RELATIVE"); - } - else if (Type == R_X86_64_GLOB_DAT) { + } else if (Type == R_X86_64_GLOB_DAT) { LogMan::Msg::DFmt("\tR_X86_64_GLOB_DAT"); - } - else if (Type == R_X86_64_JUMP_SLOT) { + } else if (Type == R_X86_64_JUMP_SLOT) { LogMan::Msg::DFmt("\tR_X86_64_JUMP_SLOT"); - } - else if (Type == R_X86_64_DTPMOD64) { + } else if (Type == R_X86_64_DTPMOD64) { LogMan::Msg::DFmt("\tR_X86_64_DTPMOD64"); - } - else if (Type == R_X86_64_DTPOFF64) { + } else if (Type == R_X86_64_DTPOFF64) { LogMan::Msg::DFmt("\tR_X86_64_DTPOFF64"); - } - else if (Type == R_X86_64_TPOFF64) { + } else if (Type == R_X86_64_TPOFF64) { LogMan::Msg::DFmt("\tR_X86_64_TPOFF64"); - } - else { + } else { LogMan::Msg::DFmt("Unknown relocation type: {}(0x{:x})", Type, Type); } } @@ -972,22 +919,20 @@ void ELFContainer::PrintRelocationTable() const { } } -void ELFContainer::FixupRelocations(void *ELFBase, uint64_t GuestELFBase, SymbolGetter Getter) { +void ELFContainer::FixupRelocations(void* ELFBase, uint64_t GuestELFBase, SymbolGetter Getter) { if (Mode == MODE_32BIT) { - } - else { - Elf64_Shdr const *RelaHeader{nullptr}; - Elf64_Shdr const *DynSymHeader {nullptr}; + } else { + const Elf64_Shdr* RelaHeader {nullptr}; + const Elf64_Shdr* DynSymHeader {nullptr}; - Elf64_Shdr const *StringTableHeader{nullptr}; - char const *StrTab{nullptr}; + const Elf64_Shdr* StringTableHeader {nullptr}; + const char* StrTab {nullptr}; for (size_t i = 0; i < SectionHeaders.size(); ++i) { - const auto *hdr = SectionHeaders[i]._64; + const auto* hdr = SectionHeaders[i]._64; if (hdr->sh_type == SHT_REL) { LogMan::Msg::DFmt("Unhandled REL section"); - } - else if (hdr->sh_type == SHT_RELA) { + } else if (hdr->sh_type == SHT_RELA) { RelaHeader = hdr; if (RelaHeader->sh_info != 0) { @@ -1003,52 +948,47 @@ void ELFContainer::FixupRelocations(void *ELFBase, uint64_t GuestELFBase, Symbol } const size_t EntryCount = RelaHeader->sh_size / RelaHeader->sh_entsize; - const auto *Entries = reinterpret_cast(&RawFile.at(RelaHeader->sh_offset)); + const auto* Entries = reinterpret_cast(&RawFile.at(RelaHeader->sh_offset)); for (size_t j = 0; j < EntryCount; ++j) { - const auto *Entry = &Entries[j]; + const auto* Entry = &Entries[j]; const uint32_t Sym = Entry->r_info >> 32; const uint32_t Type = Entry->r_info & ~0U; - const Elf64_Sym *EntrySymbol{nullptr}; - const char *EntrySymbolName{nullptr}; + const Elf64_Sym* EntrySymbol {nullptr}; + const char* EntrySymbolName {nullptr}; if (DynSymHeader && Sym != 0) { LOGMAN_THROW_AA_FMT(DynSymHeader->sh_entsize == sizeof(Elf64_Sym), "Oops, entry size doesn't match"); const uint64_t offset = DynSymHeader->sh_offset + Sym * DynSymHeader->sh_entsize; - EntrySymbol = reinterpret_cast(&RawFile.at(offset)); + EntrySymbol = reinterpret_cast(&RawFile.at(offset)); EntrySymbolName = &StrTab[EntrySymbol->st_name]; } if (Type == R_X86_64_IRELATIVE) { // 37/0x25 // Indirect (B + A) - uint64_t *Location = reinterpret_cast(reinterpret_cast(ELFBase) + Entry->r_offset); + uint64_t* Location = reinterpret_cast(reinterpret_cast(ELFBase) + Entry->r_offset); *Location = GuestELFBase + Entry->r_addend; - } - else if (Type == R_X86_64_64) { + } else if (Type == R_X86_64_64) { // S + A - uint64_t *Location = reinterpret_cast(reinterpret_cast(ELFBase) + Entry->r_offset); + uint64_t* Location = reinterpret_cast(reinterpret_cast(ELFBase) + Entry->r_offset); if (EntrySymbol != nullptr) { auto ELFSym = Getter(EntrySymbolName, 0); if (ELFSym != nullptr) { *Location = ELFSym->Address + Entry->r_addend; - } - else { + } else { *Location = 0xDEADBEEFBAD0DAD2ULL; } - } - else { + } else { *Location = 0xDEADBEEFBAD0DAD2ULL; } - } - else if (Type == R_X86_64_RELATIVE) { + } else if (Type == R_X86_64_RELATIVE) { // B + A - uint64_t *Location = reinterpret_cast(reinterpret_cast(ELFBase) + Entry->r_offset); + uint64_t* Location = reinterpret_cast(reinterpret_cast(ELFBase) + Entry->r_offset); *Location = GuestELFBase + Entry->r_addend; - } - else if (Type == R_X86_64_GLOB_DAT) { + } else if (Type == R_X86_64_GLOB_DAT) { // XXX: This is way wrong // S - uint64_t *Location = reinterpret_cast(reinterpret_cast(ELFBase) + Entry->r_offset); + uint64_t* Location = reinterpret_cast(reinterpret_cast(ELFBase) + Entry->r_offset); if (EntrySymbol != nullptr) { auto ELFSym = Getter(EntrySymbolName, 2); // Leave out Symbols from the main executable and only grab non-weak @@ -1061,20 +1001,17 @@ void ELFContainer::FixupRelocations(void *ELFBase, uint64_t GuestELFBase, Symbol if (ELFSym != nullptr) { *Location = ELFSym->Address; - } - else { + } else { // XXX: This seems to be a loader edge case that if the symbol doesn't exist // and it is a weakly defined GLOB_DAT type then it is allowed to continue? // If we set Location to a value then apps crash } - } - else { + } else { *Location = 0xDEADBEEFBAD0DAD1ULL; } - } - else if (Type == R_X86_64_JUMP_SLOT) { + } else if (Type == R_X86_64_JUMP_SLOT) { // S - uint64_t *Location = reinterpret_cast(reinterpret_cast(ELFBase) + Entry->r_offset); + uint64_t* Location = reinterpret_cast(reinterpret_cast(ELFBase) + Entry->r_offset); if (EntrySymbol != nullptr) { auto ELFSym = Getter(EntrySymbolName, 0); if (!ELFSym) { // XXX: Try again @@ -1083,53 +1020,43 @@ void ELFContainer::FixupRelocations(void *ELFBase, uint64_t GuestELFBase, Symbol if (ELFSym != nullptr) { *Location = ELFSym->Address; - } - else { + } else { // XXX: This seems to be a loader edge case that if the symbol doesn't exist // and it is a weakly defined GLOB_DAT type then it is allowed to continue? *Location = 0xDEADBEEFBAD0DAD5ULL; } - } - else { + } else { *Location = 0xDEADBEEFBAD0DAD4ULL; } - } - else if (Type == R_X86_64_DTPMOD64) { + } else if (Type == R_X86_64_DTPMOD64) { // XXX: This is supposed to be the ID of the module that the symbol comes from for TLS purposes? - uint64_t *Location = reinterpret_cast(reinterpret_cast(ELFBase) + Entry->r_offset); + uint64_t* Location = reinterpret_cast(reinterpret_cast(ELFBase) + Entry->r_offset); *Location = 0; - } - else if (Type == R_X86_64_DTPOFF64) { - uint64_t *Location = reinterpret_cast(reinterpret_cast(ELFBase) + Entry->r_offset); + } else if (Type == R_X86_64_DTPOFF64) { + uint64_t* Location = reinterpret_cast(reinterpret_cast(ELFBase) + Entry->r_offset); if (EntrySymbol != nullptr) { *Location = EntrySymbol->st_value + Entry->r_addend; - } - else { + } else { *Location = 0xDEADBEEFBAD0DAD6ULL; } - } - else if (Type == R_X86_64_TPOFF64) { - uint64_t *Location = reinterpret_cast(reinterpret_cast(ELFBase) + Entry->r_offset); + } else if (Type == R_X86_64_TPOFF64) { + uint64_t* Location = reinterpret_cast(reinterpret_cast(ELFBase) + Entry->r_offset); if (EntrySymbol != nullptr) { // XXX: This is supposed to be a symbol with a TLS offset? *Location = EntrySymbol->st_value + Entry->r_addend; - } - else { + } else { // If we set Location to a value then apps crash // *Location = 0xDEADBEEFBAD0DAD3ULL; LogMan::Msg::DFmt("TPOFF without Entry? {:x} + {:x} + {:x}", GuestELFBase, TLSHeader._64->p_paddr, Entry->r_addend); if (1) { *Location = TLSHeader._64->p_paddr + Entry->r_addend; - } - else if (Entry->r_offset == 0x1e3dc8) { + } else if (Entry->r_offset == 0x1e3dc8) { *Location = 0xDEADBEEFBAD0DAD8ULL; - } - else { + } else { *Location = Entry->r_addend - 0xb00'0; } } - } - else { + } else { LogMan::Msg::DFmt("Unknown relocation type: {}(0x{:x})", Type, Type); } } @@ -1141,24 +1068,23 @@ void ELFContainer::FixupRelocations(void *ELFBase, uint64_t GuestELFBase, Symbol void ELFContainer::PrintInitArray() const { if (Mode == MODE_32BIT) { for (size_t i = 0; i < SectionHeaders.size(); ++i) { - const auto *hdr = SectionHeaders[i]._32; + const auto* hdr = SectionHeaders[i]._32; if (hdr->sh_type == SHT_INIT_ARRAY) { const size_t Entries = hdr->sh_size / hdr->sh_entsize; for (size_t j = 0; j < Entries; ++j) { LogMan::Msg::DFmt("init_array[{}]", j); - LogMan::Msg::DFmt("\t{}", *reinterpret_cast(&RawFile.at(hdr->sh_offset+ j * hdr->sh_entsize))); + LogMan::Msg::DFmt("\t{}", *reinterpret_cast< const uint64_t*>(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize))); } } } - } - else { + } else { for (size_t i = 0; i < SectionHeaders.size(); ++i) { - const auto *hdr = SectionHeaders[i]._64; + const auto* hdr = SectionHeaders[i]._64; if (hdr->sh_type == SHT_INIT_ARRAY) { const size_t Entries = hdr->sh_size / hdr->sh_entsize; for (size_t j = 0; j < Entries; ++j) { LogMan::Msg::DFmt("init_array[{}]", j); - LogMan::Msg::DFmt("\t{}", *reinterpret_cast(&RawFile.at(hdr->sh_offset+ j * hdr->sh_entsize))); + LogMan::Msg::DFmt("\t{}", *reinterpret_cast< const uint64_t*>(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize))); } } } @@ -1168,22 +1094,20 @@ void ELFContainer::PrintInitArray() const { void ELFContainer::PrintDynamicTable() const { if (Mode == MODE_32BIT) { for (size_t i = 0; i < SectionHeaders.size(); ++i) { - const auto *hdr = SectionHeaders[i]._32; + const auto* hdr = SectionHeaders[i]._32; if (hdr->sh_type == SHT_DYNAMIC) { - const auto *StrHeader = SectionHeaders.at(hdr->sh_link)._32; - const char *SHStrings = &RawFile.at(StrHeader->sh_offset); + const auto* StrHeader = SectionHeaders.at(hdr->sh_link)._32; + const char* SHStrings = &RawFile.at(StrHeader->sh_offset); const size_t Entries = hdr->sh_size / hdr->sh_entsize; for (size_t j = 0; i < Entries; ++j) { - const auto *Dynamic = reinterpret_cast(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize)); -#define PRINT(x, y, z) x (Dynamic->d_tag == DT_##y ) LogMan::Msg::DFmt("Dyn {}: (" #y ") 0x{:x}", j, Dynamic->d_un.z); + const auto* Dynamic = reinterpret_cast(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize)); +#define PRINT(x, y, z) x(Dynamic->d_tag == DT_##y) LogMan::Msg::DFmt("Dyn {}: (" #y ") 0x{:x}", j, Dynamic->d_un.z); if (Dynamic->d_tag == DT_NULL) { break; - } - else if (Dynamic->d_tag == DT_NEEDED) { + } else if (Dynamic->d_tag == DT_NEEDED) { LogMan::Msg::DFmt("Dyn {}: (NEEDED) '{}'", j, &SHStrings[Dynamic->d_un.d_val]); - } - else if (Dynamic->d_tag == DT_SONAME) { + } else if (Dynamic->d_tag == DT_SONAME) { LogMan::Msg::DFmt("Dyn {}: (SONAME) '{}'", j, &SHStrings[Dynamic->d_un.d_val]); } PRINT(else if, HASH, d_val) @@ -1220,31 +1144,27 @@ void ELFContainer::PrintDynamicTable() const { PRINT(else if, VERDEF, d_val) PRINT(else if, VERDEFNUM, d_val) PRINT(else if, FLAGS, d_val) - else - LogMan::Msg::DFmt("Unknown dynamic section: {}(0x{:x})", Dynamic->d_tag, Dynamic->d_tag); + else LogMan::Msg::DFmt("Unknown dynamic section: {}(0x{:x})", Dynamic->d_tag, Dynamic->d_tag); #undef PRINT } } } - } - else { + } else { for (size_t i = 0; i < SectionHeaders.size(); ++i) { - const auto *hdr = SectionHeaders[i]._64; + const auto* hdr = SectionHeaders[i]._64; if (hdr->sh_type == SHT_DYNAMIC) { - const auto *StrHeader = SectionHeaders.at(hdr->sh_link)._64; - const char *SHStrings = &RawFile.at(StrHeader->sh_offset); + const auto* StrHeader = SectionHeaders.at(hdr->sh_link)._64; + const char* SHStrings = &RawFile.at(StrHeader->sh_offset); const size_t Entries = hdr->sh_size / hdr->sh_entsize; for (size_t j = 0; i < Entries; ++j) { - const auto *Dynamic = reinterpret_cast(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize)); -#define PRINT(x, y, z) x (Dynamic->d_tag == DT_##y ) LogMan::Msg::DFmt("Dyn {}: (" #y ") 0x{:x}", j, Dynamic->d_un.z); + const auto* Dynamic = reinterpret_cast(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize)); +#define PRINT(x, y, z) x(Dynamic->d_tag == DT_##y) LogMan::Msg::DFmt("Dyn {}: (" #y ") 0x{:x}", j, Dynamic->d_un.z); if (Dynamic->d_tag == DT_NULL) { break; - } - else if (Dynamic->d_tag == DT_NEEDED) { + } else if (Dynamic->d_tag == DT_NEEDED) { LogMan::Msg::DFmt("Dyn {}: (NEEDED) '{}'", j, &SHStrings[Dynamic->d_un.d_val]); - } - else if (Dynamic->d_tag == DT_SONAME) { + } else if (Dynamic->d_tag == DT_SONAME) { LogMan::Msg::DFmt("Dyn {}: (SONAME) '{}'", j, &SHStrings[Dynamic->d_un.d_val]); } PRINT(else if, HASH, d_val) @@ -1281,8 +1201,7 @@ void ELFContainer::PrintDynamicTable() const { PRINT(else if, VERDEF, d_val) PRINT(else if, VERDEFNUM, d_val) PRINT(else if, FLAGS, d_val) - else - LogMan::Msg::DFmt("Unknown dynamic section: {}(0x{:x})", Dynamic->d_tag, Dynamic->d_tag); + else LogMan::Msg::DFmt("Unknown dynamic section: {}(0x{:x})", Dynamic->d_tag, Dynamic->d_tag); #undef PRINT } } @@ -1290,16 +1209,18 @@ void ELFContainer::PrintDynamicTable() const { } } -void ELFContainer::GetInitLocations(uint64_t GuestELFBase, fextl::vector *Locations) { +void ELFContainer::GetInitLocations(uint64_t GuestELFBase, fextl::vector* Locations) { if (Mode == MODE_32BIT) { // If INIT exists then add that first for (uint32_t i = 0; i < SectionHeaders.size(); ++i) { - Elf32_Shdr const *hdr = SectionHeaders.at(i)._32; + const Elf32_Shdr* hdr = SectionHeaders.at(i)._32; if (hdr->sh_type == SHT_DYNAMIC) { size_t Entries = hdr->sh_size / hdr->sh_entsize; for (size_t j = 0; i < Entries; ++j) { - Elf32_Dyn const *Dynamic = reinterpret_cast(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize)); - if (Dynamic->d_tag == DT_NULL) break; + const Elf32_Dyn* Dynamic = reinterpret_cast< const Elf32_Dyn*>(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize)); + if (Dynamic->d_tag == DT_NULL) { + break; + } if (Dynamic->d_tag == DT_INIT) { Locations->emplace_back(GuestELFBase + Dynamic->d_un.d_val); } @@ -1309,24 +1230,25 @@ void ELFContainer::GetInitLocations(uint64_t GuestELFBase, fextl::vectorsh_type == SHT_INIT_ARRAY) { size_t Entries = hdr->sh_size / hdr->sh_entsize; for (size_t j = 0; j < Entries; ++j) { - Locations->emplace_back(GuestELFBase + *reinterpret_cast(&RawFile.at(hdr->sh_offset+ j * hdr->sh_entsize))); + Locations->emplace_back(GuestELFBase + *reinterpret_cast< const uint64_t*>(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize))); } } } - } - else { + } else { // If INIT exists then add that first for (uint32_t i = 0; i < SectionHeaders.size(); ++i) { - Elf64_Shdr const *hdr = SectionHeaders.at(i)._64; + const Elf64_Shdr* hdr = SectionHeaders.at(i)._64; if (hdr->sh_type == SHT_DYNAMIC) { size_t Entries = hdr->sh_size / hdr->sh_entsize; for (size_t j = 0; i < Entries; ++j) { - Elf64_Dyn const *Dynamic = reinterpret_cast(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize)); - if (Dynamic->d_tag == DT_NULL) break; + const Elf64_Dyn* Dynamic = reinterpret_cast< const Elf64_Dyn*>(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize)); + if (Dynamic->d_tag == DT_NULL) { + break; + } if (Dynamic->d_tag == DT_INIT) { Locations->emplace_back(GuestELFBase + Dynamic->d_un.d_val); } @@ -1336,11 +1258,11 @@ void ELFContainer::GetInitLocations(uint64_t GuestELFBase, fextl::vectorsh_type == SHT_INIT_ARRAY) { size_t Entries = hdr->sh_size / hdr->sh_entsize; for (size_t j = 0; j < Entries; ++j) { - Locations->emplace_back(GuestELFBase + *reinterpret_cast(&RawFile.at(hdr->sh_offset+ j * hdr->sh_entsize))); + Locations->emplace_back(GuestELFBase + *reinterpret_cast< const uint64_t*>(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize))); } } } diff --git a/Source/Tools/CommonTools/Linux/Utils/ELFSymbolDatabase.cpp b/Source/Tools/CommonTools/Linux/Utils/ELFSymbolDatabase.cpp index 48c19b0dc8..5d8d8dcdb0 100644 --- a/Source/Tools/CommonTools/Linux/Utils/ELFSymbolDatabase.cpp +++ b/Source/Tools/CommonTools/Linux/Utils/ELFSymbolDatabase.cpp @@ -2,8 +2,8 @@ /* $info$ tags: glue|elf-parsing -desc: Part of our now defunct ld-linux replacement, keeps tracks of all symbols, loads elfs, handles relocations. Small parts of this are used. -$end_info$ +desc: Part of our now defunct ld-linux replacement, keeps tracks of all symbols, loads elfs, handles relocations. Small parts of this are +used. $end_info$ */ #include "Linux/Utils/ELFSymbolDatabase.h" @@ -33,8 +33,7 @@ void ELFSymbolDatabase::FillLibrarySearchPaths() { LibrarySearchPaths.emplace_back("/usr/local/lib/x86_64-linux-gnu"); LibrarySearchPaths.emplace_back("/lib/x86_64-linux-gnu"); LibrarySearchPaths.emplace_back("/usr/lib/x86_64-linux-gnu"); - } - else { + } else { LibrarySearchPaths.emplace_back("/usr/local/lib/i386-linux-gnu"); LibrarySearchPaths.emplace_back("/lib/i386-linux-gnu"); LibrarySearchPaths.emplace_back("/usr/lib/i386-linux-gnu"); @@ -52,8 +51,8 @@ void ELFSymbolDatabase::FillLibrarySearchPaths() { } } -bool ELFSymbolDatabase::FindLibraryFile(fextl::string *Result, const char *Library) { - for (auto &Path : LibrarySearchPaths) { +bool ELFSymbolDatabase::FindLibraryFile(fextl::string* Result, const char* Library) { + for (auto& Path : LibrarySearchPaths) { const fextl::string TmpPath = fextl::fmt::format("{}/{}", Path, Library); if (FHU::Filesystem::Exists(TmpPath)) { *Result = TmpPath; @@ -63,7 +62,7 @@ bool ELFSymbolDatabase::FindLibraryFile(fextl::string *Result, const char *Libra return false; } -ELFSymbolDatabase::ELFSymbolDatabase(::ELFLoader::ELFContainer *file) +ELFSymbolDatabase::ELFSymbolDatabase(::ELFLoader::ELFContainer* file) : File {file} { FillLibrarySearchPaths(); @@ -75,8 +74,8 @@ ELFSymbolDatabase::ELFSymbolDatabase(::ELFLoader::ELFContainer *file) fextl::vector UnfilledDependencies; fextl::vector NewLibraries; - auto FillDependencies = [&UnfilledDependencies, this](ELFInfo *ELF) { - for (auto &Lib : *ELF->Container->GetNecessaryLibs()) { + auto FillDependencies = [&UnfilledDependencies, this](ELFInfo* ELF) { + for (auto& Lib : *ELF->Container->GetNecessaryLibs()) { if (NameToELF.find(Lib) == NameToELF.end()) { UnfilledDependencies.emplace_back(Lib); } @@ -84,15 +83,15 @@ ELFSymbolDatabase::ELFSymbolDatabase(::ELFLoader::ELFContainer *file) }; auto LoadDependencies = [&UnfilledDependencies, &NewLibraries, this]() { - for (auto &Lib : UnfilledDependencies) { + for (auto& Lib : UnfilledDependencies) { if (NameToELF.find(Lib) == NameToELF.end()) { fextl::string LibraryPath; #if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED bool Found = #endif - FindLibraryFile(&LibraryPath, Lib.c_str()); + FindLibraryFile(&LibraryPath, Lib.c_str()); LOGMAN_THROW_A_FMT(Found, "Couldn't find library '{}'", Lib); - auto Info = DynamicELFInfo.emplace_back(new ELFInfo{}); + auto Info = DynamicELFInfo.emplace_back(new ELFInfo {}); Info->Name = Lib; Info->Container = new ::ELFLoader::ELFContainer(LibraryPath, {}, true); NewLibraries.emplace_back(Info); @@ -141,8 +140,7 @@ void ELFSymbolDatabase::FillMemoryLayouts(uint64_t DefinedBase) { if (!DefinedBase) { if (File->GetMode() == ELFContainer::MODE_64BIT) { ELFBases = 0x1'0000'0000; - } - else { + } else { // 32bit we will just load at the lowest memory address we can // Which on Linux is at 0x1'0000 ELFBases = 0x1'0000; @@ -166,8 +164,7 @@ void ELFSymbolDatabase::FillMemoryLayouts(uint64_t DefinedBase) { ELFBases += CurrentELFAlignedSize; ELFMemorySize += CurrentELFAlignedSize; - } - else { + } else { LocalInfo.CustomLayout = File->GetLayout(); uint64_t CurrentELFBase = std::get<0>(LocalInfo.CustomLayout); uint64_t CurrentELFAlignedSize = FEXCore::AlignUp(std::get<2>(LocalInfo.CustomLayout), 4096); @@ -207,16 +204,18 @@ void ELFSymbolDatabase::FillInitializationOrder() { std::set AlreadyInList; while (true) { - if (InitializationOrder.size() == DynamicELFInfo.size()) + if (InitializationOrder.size() == DynamicELFInfo.size()) { break; + } - for (auto &ELF : DynamicELFInfo) { + for (auto& ELF : DynamicELFInfo) { // If this ELF is already in the list then skip it - if (AlreadyInList.find(ELF->Name) != AlreadyInList.end()) + if (AlreadyInList.find(ELF->Name) != AlreadyInList.end()) { continue; + } bool AllLibsLoaded = true; - for (auto &Lib : *ELF->Container->GetNecessaryLibs()) { + for (auto& Lib : *ELF->Container->GetNecessaryLibs()) { if (AlreadyInList.find(Lib) == AlreadyInList.end()) { AllLibsLoaded = false; break; @@ -232,7 +231,7 @@ void ELFSymbolDatabase::FillInitializationOrder() { } void ELFSymbolDatabase::FillSymbols() { - auto LocalSymbolFiller = [this](ELFLoader::ELFSymbol *Symbol) { + auto LocalSymbolFiller = [this](ELFLoader::ELFSymbol* Symbol) { Symbols.emplace_back(Symbol); Symbol->Address += LocalInfo.GuestBase; SymbolMap[Symbol->Name] = Symbol; @@ -249,7 +248,7 @@ void ELFSymbolDatabase::FillSymbols() { // Let us fill symbols based on initialization order for (auto ELF : InitializationOrder) { - auto SymbolFiller = [this, &ELF](ELFLoader::ELFSymbol *Symbol) { + auto SymbolFiller = [this, &ELF](ELFLoader::ELFSymbol* Symbol) { Symbols.emplace_back(Symbol); // Offset the address by the guest base Symbol->Address += ELF->GuestBase; @@ -267,7 +266,6 @@ void ELFSymbolDatabase::FillSymbols() { ELF->Container->AddSymbols(SymbolFiller); } - } void ELFSymbolDatabase::MapMemoryRegions(std::function Mapper) { @@ -279,7 +277,7 @@ void ELFSymbolDatabase::MapMemoryRegions(std::function ELFLoader::ELFSymbol* { - SymbolTableType &TablePtr = SymbolMap; - if (Table == 0) + auto SymbolGetter = [this](const char* SymbolName, uint8_t Table) -> ELFLoader::ELFSymbol* { + SymbolTableType& TablePtr = SymbolMap; + if (Table == 0) { TablePtr = SymbolMap; - else if (Table == 1) // Global + } else if (Table == 1) { // Global TablePtr = SymbolMapGlobalOnly; - else if (Table == 2) // NoWeak + } else if (Table == 2) { // NoWeak TablePtr = SymbolMapNoWeak; - else if (Table == 3) // No Main + } else if (Table == 3) { // No Main TablePtr = SymbolMapNoMain; - else if (Table == 4) // No Main No Weak + } else if (Table == 4) { // No Main No Weak TablePtr = SymbolMapNoMainNoWeak; + } auto Sym = TablePtr.find(SymbolName); - if (Sym == TablePtr.end()) + if (Sym == TablePtr.end()) { return nullptr; + } return Sym->second; }; @@ -331,27 +331,31 @@ uint64_t ELFSymbolDatabase::DefaultRIP() const { return File->GetEntryPoint() + LocalInfo.GuestBase; } -ELFSymbol const *ELFSymbolDatabase::GetSymbolInRange(RangeType Address) { +const ELFSymbol* ELFSymbolDatabase::GetSymbolInRange(RangeType Address) { auto Sym = SymbolMapByAddress.upper_bound(Address.first); - if (Sym != SymbolMapByAddress.begin()) + if (Sym != SymbolMapByAddress.begin()) { --Sym; - if (Sym == SymbolMapByAddress.end()) + } + if (Sym == SymbolMapByAddress.end()) { return nullptr; + } - if ((Sym->second->Address + Sym->second->Size) < Address.first) + if ((Sym->second->Address + Sym->second->Size) < Address.first) { return nullptr; + } - if (Sym->second->Address > Address.first) + if (Sym->second->Address > Address.first) { return nullptr; + } return Sym->second; } -void ELFSymbolDatabase::GetInitLocations(fextl::vector *Locations) { +void ELFSymbolDatabase::GetInitLocations(fextl::vector* Locations) { // Walk the initialization order and fill the locations for initializations for (auto ELF : InitializationOrder) { ELF->Container->GetInitLocations(ELF->GuestBase, Locations); } } -} +} // namespace ELFLoader diff --git a/Source/Tools/FEXBash/FEXBash.cpp b/Source/Tools/FEXBash/FEXBash.cpp index a8bee59600..f52d8ff02b 100644 --- a/Source/Tools/FEXBash/FEXBash.cpp +++ b/Source/Tools/FEXBash/FEXBash.cpp @@ -23,7 +23,7 @@ desc: Launches bash under FEX and passes arguments via -c to it #include #include -int main(int argc, char **argv, char **const envp) { +int main(int argc, char** argv, char** const envp) { FEXCore::Config::Initialize(); FEXCore::Config::AddLayer(FEX::Config::CreateGlobalMainLayer()); FEXCore::Config::AddLayer(FEX::Config::CreateMainLayer()); @@ -53,7 +53,7 @@ int main(int argc, char **argv, char **const envp) { if (!std::filesystem::exists(FEXInterpreterPath)) { FEXInterpreterPath = FEXCore::Config::FindContainerPrefix() + FEXINTERPRETER_PATH; } - const char *FEXArgs[] = { + const char* FEXArgs[] = { FEXInterpreterPath.c_str(), Args.empty() ? BinBashPath.c_str() : BinShPath.c_str(), "-c", @@ -88,15 +88,15 @@ int main(int argc, char **argv, char **const envp) { // But in case the user has set the PS1 environment variable then still prepend // // To get the shell variables as an environment variable then you can do `PS1=$PS1 FEXBash` - std::vector Envp{}; - char *PS1Env{}; + std::vector Envp {}; + char* PS1Env {}; for (unsigned i = 0;; ++i) { - if (envp[i] == nullptr) + if (envp[i] == nullptr) { break; + } if (strstr(envp[i], "PS1=") == envp[i]) { PS1Env = envp[i]; - } - else { + } else { Envp.emplace_back(envp[i]); } } @@ -108,5 +108,5 @@ int main(int argc, char **argv, char **const envp) { Envp.emplace_back(PS1.c_str()); Envp.emplace_back(nullptr); - return execve(Argv[0], const_cast(&Argv.at(0)), const_cast(&Envp[0])); + return execve(Argv[0], const_cast(&Argv.at(0)), const_cast(&Envp[0])); } diff --git a/Source/Tools/FEXConfig/Main.cpp b/Source/Tools/FEXConfig/Main.cpp index c9798fd733..122dc2bc6c 100644 --- a/Source/Tools/FEXConfig/Main.cpp +++ b/Source/Tools/FEXConfig/Main.cpp @@ -19,855 +19,816 @@ #include namespace fextl { - // Helper to convert a std::filesystem::path to a fextl::string. - inline fextl::string string_from_path(std::filesystem::path const &Path) { - return Path.string().c_str(); - } +// Helper to convert a std::filesystem::path to a fextl::string. +inline fextl::string string_from_path(const std::filesystem::path& Path) { + return Path.string().c_str(); } +} // namespace fextl namespace { - static std::chrono::time_point GlobalTime{}; - - static bool ConfigOpen{}; - static bool ConfigChanged{}; - static int EnvironmentVariableSelected{}; - static int HostEnvironmentVariableSelected{}; - static int NamedRootFSSelected{-1}; - - static fextl::string ConfigFilename{}; - static fextl::unique_ptr LoadedConfig{}; - - static const char EnvironmentPopupName[] = "#New Environment Variable"; - static const char HostEnvironmentPopupName[] = "#New Host Environment Variable"; - static const char SavedPopupAppName[] = "#SavedApp"; - static const char OpenedPopupAppName[] = "#OpenedApp"; - - static bool OpenMsgPopup{}; - static bool SaveMsgIsOpen{}; - static std::string MsgMessage{}; - static const char MsgPopupName[] = "#Msg"; - static std::chrono::high_resolution_clock::time_point MsgTimerStart{}; - - static bool SelectedOpenFile{}; - static bool SelectedSaveFileAs{}; - static ImGuiFs::Dialog DialogSaveAs{}; - static ImGuiFs::Dialog DialogOpen{}; - - // Named rootfs - static std::vector NamedRootFS{}; - static std::mutex NamedRootFSUpdator{}; - - static std::atomic INotifyFD{-1}; - static int INotifyFolderFD{}; - static std::thread INotifyThreadHandle{}; - static std::atomic_bool INotifyShutdown{}; - - void OpenMsgMessagePopup(fextl::string Message) { - OpenMsgPopup = true; - MsgMessage = Message; - MsgTimerStart = std::chrono::high_resolution_clock::now(); - FEX::GUI::HadUpdate(); - } +static std::chrono::time_point GlobalTime {}; + +static bool ConfigOpen {}; +static bool ConfigChanged {}; +static int EnvironmentVariableSelected {}; +static int HostEnvironmentVariableSelected {}; +static int NamedRootFSSelected {-1}; + +static fextl::string ConfigFilename {}; +static fextl::unique_ptr LoadedConfig {}; + +static const char EnvironmentPopupName[] = "#New Environment Variable"; +static const char HostEnvironmentPopupName[] = "#New Host Environment Variable"; +static const char SavedPopupAppName[] = "#SavedApp"; +static const char OpenedPopupAppName[] = "#OpenedApp"; + +static bool OpenMsgPopup {}; +static bool SaveMsgIsOpen {}; +static std::string MsgMessage {}; +static const char MsgPopupName[] = "#Msg"; +static std::chrono::high_resolution_clock::time_point MsgTimerStart {}; + +static bool SelectedOpenFile {}; +static bool SelectedSaveFileAs {}; +static ImGuiFs::Dialog DialogSaveAs {}; +static ImGuiFs::Dialog DialogOpen {}; + +// Named rootfs +static std::vector NamedRootFS {}; +static std::mutex NamedRootFSUpdator {}; + +static std::atomic INotifyFD {-1}; +static int INotifyFolderFD {}; +static std::thread INotifyThreadHandle {}; +static std::atomic_bool INotifyShutdown {}; + +void OpenMsgMessagePopup(fextl::string Message) { + OpenMsgPopup = true; + MsgMessage = Message; + MsgTimerStart = std::chrono::high_resolution_clock::now(); + FEX::GUI::HadUpdate(); +} - void LoadDefaultSettings() { - ConfigOpen = true; - ConfigFilename = {}; - LoadedConfig = fextl::make_unique(); -#define OPT_BASE(type, group, enum, json, default) \ - LoadedConfig->Set(FEXCore::Config::ConfigOption::CONFIG_##enum, std::to_string(default)); -#define OPT_STR(group, enum, json, default) \ - LoadedConfig->Set(FEXCore::Config::ConfigOption::CONFIG_##enum, default); -#define OPT_STRARRAY(group, enum, json, default) // Do nothing +void LoadDefaultSettings() { + ConfigOpen = true; + ConfigFilename = {}; + LoadedConfig = fextl::make_unique(); +#define OPT_BASE(type, group, enum, json, default) LoadedConfig->Set(FEXCore::Config::ConfigOption::CONFIG_##enum, std::to_string(default)); +#define OPT_STR(group, enum, json, default) LoadedConfig->Set(FEXCore::Config::ConfigOption::CONFIG_##enum, default); +#define OPT_STRARRAY(group, enum, json, default) // Do nothing #define OPT_STRENUM(group, enum, json, default) \ - LoadedConfig->Set(FEXCore::Config::ConfigOption::CONFIG_##enum, std::to_string(FEXCore::ToUnderlying(default))); + LoadedConfig->Set(FEXCore::Config::ConfigOption::CONFIG_##enum, std::to_string(FEXCore::ToUnderlying(default))); #include - // Erase unnamed options which shouldn't be set - LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_IS_INTERPRETER); - LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_INTERPRETER_INSTALLED); - LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_APP_FILENAME); - LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_APP_CONFIG_NAME); - LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_IS64BIT_MODE); - } - - bool OpenFile(fextl::string Filename, bool LoadDefault = false) { - std::error_code ec{}; - if (!std::filesystem::exists(Filename, ec)) { - if (LoadDefault) { - LoadDefaultSettings(); - ConfigFilename = Filename; - OpenMsgMessagePopup("Opened with default options: " + Filename); - return true; - } - OpenMsgMessagePopup("Couldn't open: " + Filename); - return false; + // Erase unnamed options which shouldn't be set + LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_IS_INTERPRETER); + LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_INTERPRETER_INSTALLED); + LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_APP_FILENAME); + LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_APP_CONFIG_NAME); + LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_IS64BIT_MODE); +} + +bool OpenFile(fextl::string Filename, bool LoadDefault = false) { + std::error_code ec {}; + if (!std::filesystem::exists(Filename, ec)) { + if (LoadDefault) { + LoadDefaultSettings(); + ConfigFilename = Filename; + OpenMsgMessagePopup("Opened with default options: " + Filename); + return true; } - ConfigOpen = true; - ConfigFilename = Filename; - LoadedConfig = FEX::Config::CreateMainLayer(&Filename); - LoadedConfig->Load(); + OpenMsgMessagePopup("Couldn't open: " + Filename); + return false; + } + ConfigOpen = true; + ConfigFilename = Filename; + LoadedConfig = FEX::Config::CreateMainLayer(&Filename); + LoadedConfig->Load(); - // Load default options and only overwrite only if the option didn't exist + // Load default options and only overwrite only if the option didn't exist #define OPT_BASE(type, group, enum, json, default) \ - if (!LoadedConfig->OptionExists(FEXCore::Config::ConfigOption::CONFIG_##enum)) { LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_##enum, std::to_string(default)); } + if (!LoadedConfig->OptionExists(FEXCore::Config::ConfigOption::CONFIG_##enum)) { \ + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_##enum, std::to_string(default)); \ + } #define OPT_STR(group, enum, json, default) \ - if (!LoadedConfig->OptionExists(FEXCore::Config::ConfigOption::CONFIG_##enum)) { LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_##enum, default); } -#define OPT_STRARRAY(group, enum, json, default) // Do nothing + if (!LoadedConfig->OptionExists(FEXCore::Config::ConfigOption::CONFIG_##enum)) { \ + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_##enum, default); \ + } +#define OPT_STRARRAY(group, enum, json, default) // Do nothing #define OPT_STRENUM(group, enum, json, default) \ - if (!LoadedConfig->OptionExists(FEXCore::Config::ConfigOption::CONFIG_##enum)) { LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_##enum, std::to_string(FEXCore::ToUnderlying(default))); } + if (!LoadedConfig->OptionExists(FEXCore::Config::ConfigOption::CONFIG_##enum)) { \ + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_##enum, std::to_string(FEXCore::ToUnderlying(default))); \ + } #include - // Erase unnamed options which shouldn't be set - LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_IS_INTERPRETER); - LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_INTERPRETER_INSTALLED); - LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_APP_FILENAME); - LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_APP_CONFIG_NAME); - LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_IS64BIT_MODE); + // Erase unnamed options which shouldn't be set + LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_IS_INTERPRETER); + LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_INTERPRETER_INSTALLED); + LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_APP_FILENAME); + LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_APP_CONFIG_NAME); + LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_IS64BIT_MODE); - return true; - } + return true; +} - void LoadNamedRootFSFolder() { - std::scoped_lock lk{NamedRootFSUpdator}; - NamedRootFS.clear(); - fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/"; - std::error_code ec{}; - if (!std::filesystem::exists(RootFS, ec)) { - // Doesn't exist, create the the folder as a user convenience - if (!std::filesystem::create_directories(RootFS, ec)) { - // Well I guess we failed - return; - } +void LoadNamedRootFSFolder() { + std::scoped_lock lk {NamedRootFSUpdator}; + NamedRootFS.clear(); + fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/"; + std::error_code ec {}; + if (!std::filesystem::exists(RootFS, ec)) { + // Doesn't exist, create the the folder as a user convenience + if (!std::filesystem::create_directories(RootFS, ec)) { + // Well I guess we failed + return; } - for (auto &it : std::filesystem::directory_iterator(RootFS)) { - if (it.is_directory()) { + } + for (auto& it : std::filesystem::directory_iterator(RootFS)) { + if (it.is_directory()) { + NamedRootFS.emplace_back(it.path().filename()); + } else if (it.is_regular_file()) { + // If it is a regular file then we need to check if it is a valid archive + if (it.path().extension() == ".sqsh" && FEX::FormatCheck::IsSquashFS(fextl::string_from_path(it.path()))) { + NamedRootFS.emplace_back(it.path().filename()); + } else if (it.path().extension() == ".ero" && FEX::FormatCheck::IsEroFS(fextl::string_from_path(it.path()))) { NamedRootFS.emplace_back(it.path().filename()); } - else if (it.is_regular_file()) { - // If it is a regular file then we need to check if it is a valid archive - if (it.path().extension() == ".sqsh" && - FEX::FormatCheck::IsSquashFS(fextl::string_from_path(it.path()))) { - NamedRootFS.emplace_back(it.path().filename()); - } - else if (it.path().extension() == ".ero" && - FEX::FormatCheck::IsEroFS(fextl::string_from_path(it.path()))) { - NamedRootFS.emplace_back(it.path().filename()); - } - } - } - std::sort(NamedRootFS.begin(), NamedRootFS.end()); - } - - void INotifyThread() { - while (!INotifyShutdown) { - constexpr size_t DATA_SIZE = (16 * (sizeof(struct inotify_event) + NAME_MAX + 1)); - char buf[DATA_SIZE]; - int Ret{}; - do { - fd_set Set{}; - FD_ZERO(&Set); - FD_SET(INotifyFD, &Set); - struct timeval tv{}; - // 50 ms - tv.tv_usec = 50000; - Ret = select(INotifyFD + 1, &Set, nullptr, nullptr, &tv); - } while (Ret == 0 && INotifyFD != -1); - - if (Ret == -1 || INotifyFD == -1) { - // Just return on error - INotifyShutdown = true; - return; - } - - // Spin through the events, we don't actually care what they are - while (read(INotifyFD, buf, DATA_SIZE) > 0); - - // Now update the named vector - LoadNamedRootFSFolder(); - - FEX::GUI::HadUpdate(); } } + std::sort(NamedRootFS.begin(), NamedRootFS.end()); +} - void SetupINotify() { - if (INotifyFD != -1) { - // Already setup +void INotifyThread() { + while (!INotifyShutdown) { + constexpr size_t DATA_SIZE = (16 * (sizeof(struct inotify_event) + NAME_MAX + 1)); + char buf[DATA_SIZE]; + int Ret {}; + do { + fd_set Set {}; + FD_ZERO(&Set); + FD_SET(INotifyFD, &Set); + struct timeval tv {}; + // 50 ms + tv.tv_usec = 50000; + Ret = select(INotifyFD + 1, &Set, nullptr, nullptr, &tv); + } while (Ret == 0 && INotifyFD != -1); + + if (Ret == -1 || INotifyFD == -1) { + // Just return on error + INotifyShutdown = true; return; } - INotifyFD = inotify_init1(IN_NONBLOCK | IN_CLOEXEC); - INotifyShutdown = false; + // Spin through the events, we don't actually care what they are + while (read(INotifyFD, buf, DATA_SIZE) > 0) + ; - fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/"; - INotifyFolderFD = inotify_add_watch(INotifyFD, RootFS.c_str(), IN_CREATE | IN_DELETE); - if (INotifyFolderFD != -1) { - INotifyThreadHandle = std::thread(INotifyThread); - } + // Now update the named vector + LoadNamedRootFSFolder(); + + FEX::GUI::HadUpdate(); } +} - void ShutdownINotify() { - close(INotifyFD); - INotifyFD = -1; - if (INotifyThreadHandle.joinable()) { - INotifyThreadHandle.join(); - } +void SetupINotify() { + if (INotifyFD != -1) { + // Already setup + return; } - void SaveFile(fextl::string Filename) { - if (SaveMsgIsOpen) { - // Don't try saving a file while the message is already open. - // Stops us from spam saving the file to the filesystem. - return; - } + INotifyFD = inotify_init1(IN_NONBLOCK | IN_CLOEXEC); + INotifyShutdown = false; - if (!ConfigOpen) { - OpenMsgMessagePopup("Can't save file when config isn't open"); - return; - } + fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/"; + INotifyFolderFD = inotify_add_watch(INotifyFD, RootFS.c_str(), IN_CREATE | IN_DELETE); + if (INotifyFolderFD != -1) { + INotifyThreadHandle = std::thread(INotifyThread); + } +} - FEX::Config::SaveLayerToJSON(Filename, LoadedConfig.get()); - ConfigChanged = false; - ConfigFilename = Filename; - OpenMsgMessagePopup("Config Saved to: '" + Filename + "'"); - SaveMsgIsOpen = true; +void ShutdownINotify() { + close(INotifyFD); + INotifyFD = -1; + if (INotifyThreadHandle.joinable()) { + INotifyThreadHandle.join(); + } +} - // Output in terminal as well - printf("Config Saved to: '%s'\n", ConfigFilename.c_str()); +void SaveFile(fextl::string Filename) { + if (SaveMsgIsOpen) { + // Don't try saving a file while the message is already open. + // Stops us from spam saving the file to the filesystem. + return; } - void CloseConfig() { - ConfigOpen = false; - ConfigFilename = {}; - ConfigChanged = false; - LoadedConfig.reset(); + if (!ConfigOpen) { + OpenMsgMessagePopup("Can't save file when config isn't open"); + return; } - void FillCPUConfig() { - char BlockSize[32]{}; + FEX::Config::SaveLayerToJSON(Filename, LoadedConfig.get()); + ConfigChanged = false; + ConfigFilename = Filename; + OpenMsgMessagePopup("Config Saved to: '" + Filename + "'"); + SaveMsgIsOpen = true; - if (ImGui::BeginTabItem("CPU")) { - std::optional Value{}; - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_MAXINST); - if (Value.has_value() && !(*Value)->empty()) { - strncpy(BlockSize, &(*Value)->at(0), 32); - } - if (ImGui::InputText("Block Size:", BlockSize, 32, ImGuiInputTextFlags_EnterReturnsTrue)) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_MAXINST, BlockSize); - ConfigChanged = true; - } + // Output in terminal as well + printf("Config Saved to: '%s'\n", ConfigFilename.c_str()); +} - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_MULTIBLOCK); - bool Multiblock = Value.has_value() && **Value == "1"; - if (ImGui::Checkbox("Multiblock", &Multiblock)) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_MULTIBLOCK, Multiblock ? "1" : "0"); - ConfigChanged = true; - } +void CloseConfig() { + ConfigOpen = false; + ConfigFilename = {}; + ConfigChanged = false; + LoadedConfig.reset(); +} - ImGui::EndTabItem(); +void FillCPUConfig() { + char BlockSize[32] {}; + + if (ImGui::BeginTabItem("CPU")) { + std::optional Value {}; + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_MAXINST); + if (Value.has_value() && !(*Value)->empty()) { + strncpy(BlockSize, &(*Value)->at(0), 32); + } + if (ImGui::InputText("Block Size:", BlockSize, 32, ImGuiInputTextFlags_EnterReturnsTrue)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_MAXINST, BlockSize); + ConfigChanged = true; } - } - template - bool EnvironmentVariableFiller(void *data, int idx, const char** out_text) { - static char TmpString[256]; - auto Value = LoadedConfig->All(Option); - if (Value.has_value()) { - auto List = (*Value); - auto it = List->begin(); + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_MULTIBLOCK); + bool Multiblock = Value.has_value() && **Value == "1"; + if (ImGui::Checkbox("Multiblock", &Multiblock)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_MULTIBLOCK, Multiblock ? "1" : "0"); + ConfigChanged = true; + } - // Since this is a list, we don't have a linear allocator that we can just jump to an element - // Just do a quick spin - for (int i = 0; i < idx; ++i) - ++it; + ImGui::EndTabItem(); + } +} - snprintf(TmpString, 256, "%s", it->c_str()); - *out_text = TmpString; +template +bool EnvironmentVariableFiller(void* data, int idx, const char** out_text) { + static char TmpString[256]; + auto Value = LoadedConfig->All(Option); + if (Value.has_value()) { + auto List = (*Value); + auto it = List->begin(); - return true; + // Since this is a list, we don't have a linear allocator that we can just jump to an element + // Just do a quick spin + for (int i = 0; i < idx; ++i) { + ++it; } - return false; + snprintf(TmpString, 256, "%s", it->c_str()); + *out_text = TmpString; + + return true; } - bool NamedRootFSVariableFiller(void *data, int idx, const char** out_text) { - std::scoped_lock lk{NamedRootFSUpdator}; - static char TmpString[256]; - if (idx >= 0 && idx < NamedRootFS.size()) { - // Since this is a list, we don't have a linear allocator that we can just jump to an element - // Just do a quick spin - snprintf(TmpString, 256, "%s", NamedRootFS.at(idx).c_str()); - *out_text = TmpString; + return false; +} - return true; - } +bool NamedRootFSVariableFiller(void* data, int idx, const char** out_text) { + std::scoped_lock lk {NamedRootFSUpdator}; + static char TmpString[256]; + if (idx >= 0 && idx < NamedRootFS.size()) { + // Since this is a list, we don't have a linear allocator that we can just jump to an element + // Just do a quick spin + snprintf(TmpString, 256, "%s", NamedRootFS.at(idx).c_str()); + *out_text = TmpString; - return false; + return true; } + return false; +} - template - void DeleteEnvironmentVariable(int idx) { - auto Value = LoadedConfig->All(Option); - auto List = (*Value); - auto it = List->begin(); - // Since this is a list, we don't have a linear allocator that we can just jump to an element - // Just do a quick spin - for (int i = 0; i < idx; ++i) - ++it; +template +void DeleteEnvironmentVariable(int idx) { + auto Value = LoadedConfig->All(Option); + auto List = (*Value); + auto it = List->begin(); - List->erase(it); - ConfigChanged = true; + // Since this is a list, we don't have a linear allocator that we can just jump to an element + // Just do a quick spin + for (int i = 0; i < idx; ++i) { + ++it; } - void AddNewEnvironmentVariable() { - char Environment[256]{}; - char HostEnvironment[256]{}; + List->erase(it); + ConfigChanged = true; +} - if (ImGui::BeginPopup(EnvironmentPopupName)) { - if (ImGui::InputText("New Environment", Environment, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { - LoadedConfig->Set(FEXCore::Config::ConfigOption::CONFIG_ENV, Environment); - ImGui::CloseCurrentPopup(); - ConfigChanged = true; - } +void AddNewEnvironmentVariable() { + char Environment[256] {}; + char HostEnvironment[256] {}; - ImGui::EndPopup(); + if (ImGui::BeginPopup(EnvironmentPopupName)) { + if (ImGui::InputText("New Environment", Environment, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { + LoadedConfig->Set(FEXCore::Config::ConfigOption::CONFIG_ENV, Environment); + ImGui::CloseCurrentPopup(); + ConfigChanged = true; } - ImGui::PushID(1); - if (ImGui::BeginPopup(HostEnvironmentPopupName)) { - if (ImGui::InputText("New Environment", HostEnvironment, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { - LoadedConfig->Set(FEXCore::Config::ConfigOption::CONFIG_HOSTENV, HostEnvironment); - ImGui::CloseCurrentPopup(); - ConfigChanged = true; - } + ImGui::EndPopup(); + } - ImGui::EndPopup(); + ImGui::PushID(1); + if (ImGui::BeginPopup(HostEnvironmentPopupName)) { + if (ImGui::InputText("New Environment", HostEnvironment, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { + LoadedConfig->Set(FEXCore::Config::ConfigOption::CONFIG_HOSTENV, HostEnvironment); + ImGui::CloseCurrentPopup(); + ConfigChanged = true; } - ImGui::PopID(); + + ImGui::EndPopup(); } + ImGui::PopID(); +} - void FillEmulationConfig() { - char RootFS[256]{}; - char ThunkHostPath[256]{}; - char ThunkGuestPath[256]{}; - char ThunkConfigPath[256]{}; +void FillEmulationConfig() { + char RootFS[256] {}; + char ThunkHostPath[256] {}; + char ThunkGuestPath[256] {}; + char ThunkConfigPath[256] {}; - int NumEnvironmentVariables{}; - int NumHostEnvironmentVariables{}; - int NumRootFSPaths = NamedRootFS.size(); + int NumEnvironmentVariables {}; + int NumHostEnvironmentVariables {}; + int NumRootFSPaths = NamedRootFS.size(); - if (ImGui::BeginTabItem("Emulation")) { - auto Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_ROOTFS); - if (Value.has_value() && !(*Value)->empty()) { - strncpy(RootFS, &(*Value)->at(0), 256); - } - ImGui::Text("Available named RootFS folders: %d", NumRootFSPaths); + if (ImGui::BeginTabItem("Emulation")) { + auto Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_ROOTFS); + if (Value.has_value() && !(*Value)->empty()) { + strncpy(RootFS, &(*Value)->at(0), 256); + } + ImGui::Text("Available named RootFS folders: %d", NumRootFSPaths); - if (ImGui::ListBox("Named RootFS folders", &NamedRootFSSelected, NamedRootFSVariableFiller, nullptr, NumRootFSPaths)) { - strncpy(RootFS, NamedRootFS.at(NamedRootFSSelected).c_str(), 256); - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_ROOTFS, RootFS); - ConfigChanged = true; - } + if (ImGui::ListBox("Named RootFS folders", &NamedRootFSSelected, NamedRootFSVariableFiller, nullptr, NumRootFSPaths)) { + strncpy(RootFS, NamedRootFS.at(NamedRootFSSelected).c_str(), 256); + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_ROOTFS, RootFS); + ConfigChanged = true; + } - if (ImGui::InputText("RootFS:", RootFS, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { - NamedRootFSSelected = -1; - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_ROOTFS, RootFS); - ConfigChanged = true; - } + if (ImGui::InputText("RootFS:", RootFS, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { + NamedRootFSSelected = -1; + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_ROOTFS, RootFS); + ConfigChanged = true; + } - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_THUNKHOSTLIBS); - if (Value.has_value() && !(*Value)->empty()) { - strncpy(ThunkHostPath, &(*Value)->at(0), 256); - } - if (ImGui::InputText("Thunk Host library folder:", ThunkHostPath, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_THUNKHOSTLIBS, ThunkHostPath); - ConfigChanged = true; - } + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_THUNKHOSTLIBS); + if (Value.has_value() && !(*Value)->empty()) { + strncpy(ThunkHostPath, &(*Value)->at(0), 256); + } + if (ImGui::InputText("Thunk Host library folder:", ThunkHostPath, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_THUNKHOSTLIBS, ThunkHostPath); + ConfigChanged = true; + } - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_THUNKGUESTLIBS); - if (Value.has_value() && !(*Value)->empty()) { - strncpy(ThunkGuestPath, &(*Value)->at(0), 256); - } - if (ImGui::InputText("Thunk Guest library folder:", ThunkGuestPath, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_THUNKGUESTLIBS, ThunkGuestPath); - ConfigChanged = true; - } + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_THUNKGUESTLIBS); + if (Value.has_value() && !(*Value)->empty()) { + strncpy(ThunkGuestPath, &(*Value)->at(0), 256); + } + if (ImGui::InputText("Thunk Guest library folder:", ThunkGuestPath, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_THUNKGUESTLIBS, ThunkGuestPath); + ConfigChanged = true; + } - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_THUNKCONFIG); - if (Value.has_value() && !(*Value)->empty()) { - strncpy(ThunkConfigPath, &(*Value)->at(0), 256); - } - if (ImGui::InputText("Thunk Config file:", ThunkConfigPath, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_THUNKCONFIG, ThunkConfigPath); - ConfigChanged = true; - } + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_THUNKCONFIG); + if (Value.has_value() && !(*Value)->empty()) { + strncpy(ThunkConfigPath, &(*Value)->at(0), 256); + } + if (ImGui::InputText("Thunk Config file:", ThunkConfigPath, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_THUNKCONFIG, ThunkConfigPath); + ConfigChanged = true; + } - auto ValueList = LoadedConfig->All(FEXCore::Config::ConfigOption::CONFIG_ENV); - auto ValueHostList = LoadedConfig->All(FEXCore::Config::ConfigOption::CONFIG_HOSTENV); - if (ValueList.has_value()) { - NumEnvironmentVariables = (*ValueList)->size(); - } + auto ValueList = LoadedConfig->All(FEXCore::Config::ConfigOption::CONFIG_ENV); + auto ValueHostList = LoadedConfig->All(FEXCore::Config::ConfigOption::CONFIG_HOSTENV); + if (ValueList.has_value()) { + NumEnvironmentVariables = (*ValueList)->size(); + } - if (ValueHostList.has_value()) { - NumHostEnvironmentVariables = (*ValueHostList)->size(); - } + if (ValueHostList.has_value()) { + NumHostEnvironmentVariables = (*ValueHostList)->size(); + } - ImGui::Text("Number of environment variables: %d", NumEnvironmentVariables); + ImGui::Text("Number of environment variables: %d", NumEnvironmentVariables); - ImGui::ListBox("Environment variables", &EnvironmentVariableSelected, EnvironmentVariableFiller, nullptr, NumEnvironmentVariables); + ImGui::ListBox("Environment variables", &EnvironmentVariableSelected, + EnvironmentVariableFiller, nullptr, NumEnvironmentVariables); - if (ImGui::SmallButton("+")) { - ImGui::OpenPopup(EnvironmentPopupName); - } + if (ImGui::SmallButton("+")) { + ImGui::OpenPopup(EnvironmentPopupName); + } - if (NumEnvironmentVariables) { - ImGui::SameLine(); - if (ImGui::SmallButton("-")) { - DeleteEnvironmentVariable(EnvironmentVariableSelected); - EnvironmentVariableSelected = std::max(0, EnvironmentVariableSelected - 1); - } + if (NumEnvironmentVariables) { + ImGui::SameLine(); + if (ImGui::SmallButton("-")) { + DeleteEnvironmentVariable(EnvironmentVariableSelected); + EnvironmentVariableSelected = std::max(0, EnvironmentVariableSelected - 1); } + } - ImGui::PushID(1); - ImGui::Text("Number of Host environment variables: %d", NumHostEnvironmentVariables); + ImGui::PushID(1); + ImGui::Text("Number of Host environment variables: %d", NumHostEnvironmentVariables); - ImGui::ListBox("Host Env variables", &HostEnvironmentVariableSelected, EnvironmentVariableFiller, nullptr, NumHostEnvironmentVariables); + ImGui::ListBox("Host Env variables", &HostEnvironmentVariableSelected, + EnvironmentVariableFiller, nullptr, NumHostEnvironmentVariables); - if (ImGui::SmallButton("+")) { - ImGui::OpenPopup(HostEnvironmentPopupName); - } + if (ImGui::SmallButton("+")) { + ImGui::OpenPopup(HostEnvironmentPopupName); + } - if (NumHostEnvironmentVariables) { - ImGui::SameLine(); - if (ImGui::SmallButton("-")) { - DeleteEnvironmentVariable(HostEnvironmentVariableSelected); - HostEnvironmentVariableSelected = std::max(0, HostEnvironmentVariableSelected - 1); - } + if (NumHostEnvironmentVariables) { + ImGui::SameLine(); + if (ImGui::SmallButton("-")) { + DeleteEnvironmentVariable(HostEnvironmentVariableSelected); + HostEnvironmentVariableSelected = std::max(0, HostEnvironmentVariableSelected - 1); } - ImGui::PopID(); + } + ImGui::PopID(); - // Only draws if popup is open - AddNewEnvironmentVariable(); + // Only draws if popup is open + AddNewEnvironmentVariable(); - ImGui::Text("Debugging:"); - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_O0); - bool DisablePasses = Value.has_value() && **Value == "1"; - if (ImGui::Checkbox("Disable Optimization Passes", &DisablePasses)) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_O0, DisablePasses ? "1" : "0"); - ConfigChanged = true; - } + ImGui::Text("Debugging:"); + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_O0); + bool DisablePasses = Value.has_value() && **Value == "1"; + if (ImGui::Checkbox("Disable Optimization Passes", &DisablePasses)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_O0, DisablePasses ? "1" : "0"); + ConfigChanged = true; + } - ImGui::Text("Ahead Of Time JIT Options:"); - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_AOTIRGENERATE); - bool AOTGenerate = Value.has_value() && **Value == "1"; - if (ImGui::Checkbox("Generate", &AOTGenerate)) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_AOTIRGENERATE, AOTGenerate ? "1" : "0"); - ConfigChanged = true; - } + ImGui::Text("Ahead Of Time JIT Options:"); + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_AOTIRGENERATE); + bool AOTGenerate = Value.has_value() && **Value == "1"; + if (ImGui::Checkbox("Generate", &AOTGenerate)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_AOTIRGENERATE, AOTGenerate ? "1" : "0"); + ConfigChanged = true; + } - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_AOTIRCAPTURE); - bool AOTCapture = Value.has_value() && **Value == "1"; - if (ImGui::Checkbox("Capture", &AOTCapture)) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_AOTIRCAPTURE, AOTCapture ? "1" : "0"); - ConfigChanged = true; - } + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_AOTIRCAPTURE); + bool AOTCapture = Value.has_value() && **Value == "1"; + if (ImGui::Checkbox("Capture", &AOTCapture)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_AOTIRCAPTURE, AOTCapture ? "1" : "0"); + ConfigChanged = true; + } - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_AOTIRLOAD); - bool AOTLoad = Value.has_value() && **Value == "1"; - if (ImGui::Checkbox("Load", &AOTLoad)) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_AOTIRLOAD, AOTLoad ? "1" : "0"); - ConfigChanged = true; - } + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_AOTIRLOAD); + bool AOTLoad = Value.has_value() && **Value == "1"; + if (ImGui::Checkbox("Load", &AOTLoad)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_AOTIRLOAD, AOTLoad ? "1" : "0"); + ConfigChanged = true; + } - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_CACHEOBJECTCODECOMPILATION); + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_CACHEOBJECTCODECOMPILATION); - ImGui::Text("Cache JIT object code:"); - int CacheJITObjectCode = 0; + ImGui::Text("Cache JIT object code:"); + int CacheJITObjectCode = 0; - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_CACHEOBJECTCODECOMPILATION); - if (Value.has_value()) { - if (**Value == "0") { - CacheJITObjectCode = FEXCore::Config::ConfigObjectCodeHandler::CONFIG_NONE; - } else if (**Value == "1") { - CacheJITObjectCode = FEXCore::Config::ConfigObjectCodeHandler::CONFIG_READ; - } else if (**Value == "2") { - CacheJITObjectCode = FEXCore::Config::ConfigObjectCodeHandler::CONFIG_READWRITE; - } + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_CACHEOBJECTCODECOMPILATION); + if (Value.has_value()) { + if (**Value == "0") { + CacheJITObjectCode = FEXCore::Config::ConfigObjectCodeHandler::CONFIG_NONE; + } else if (**Value == "1") { + CacheJITObjectCode = FEXCore::Config::ConfigObjectCodeHandler::CONFIG_READ; + } else if (**Value == "2") { + CacheJITObjectCode = FEXCore::Config::ConfigObjectCodeHandler::CONFIG_READWRITE; } + } - bool CacheChanged = false; - CacheChanged |= ImGui::RadioButton("Off", &CacheJITObjectCode, FEXCore::Config::ConfigObjectCodeHandler::CONFIG_NONE); ImGui::SameLine(); - CacheChanged |= ImGui::RadioButton("Read-only", &CacheJITObjectCode, FEXCore::Config::ConfigObjectCodeHandler::CONFIG_READ); ImGui::SameLine(); - CacheChanged |= ImGui::RadioButton("Read/Write", &CacheJITObjectCode, FEXCore::Config::ConfigObjectCodeHandler::CONFIG_READWRITE); - - if (CacheChanged) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_CACHEOBJECTCODECOMPILATION, std::to_string(CacheJITObjectCode)); - ConfigChanged = true; - } + bool CacheChanged = false; + CacheChanged |= ImGui::RadioButton("Off", &CacheJITObjectCode, FEXCore::Config::ConfigObjectCodeHandler::CONFIG_NONE); + ImGui::SameLine(); + CacheChanged |= ImGui::RadioButton("Read-only", &CacheJITObjectCode, FEXCore::Config::ConfigObjectCodeHandler::CONFIG_READ); + ImGui::SameLine(); + CacheChanged |= ImGui::RadioButton("Read/Write", &CacheJITObjectCode, FEXCore::Config::ConfigObjectCodeHandler::CONFIG_READWRITE); - ImGui::EndTabItem(); + if (CacheChanged) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_CACHEOBJECTCODECOMPILATION, std::to_string(CacheJITObjectCode)); + ConfigChanged = true; } - } - void FillLoggingConfig() { - char LogFile[256]{}; - char IRDump[256]{}; + ImGui::EndTabItem(); + } +} - if (ImGui::BeginTabItem("Logging")) { - auto Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_SILENTLOG); - bool SilentLog = Value.has_value() && **Value == "1"; - if (ImGui::Checkbox("Silent Logging", &SilentLog)) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_SILENTLOG, SilentLog ? "1" : "0"); - ConfigChanged = true; - } +void FillLoggingConfig() { + char LogFile[256] {}; + char IRDump[256] {}; - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_OUTPUTLOG); - if (Value.has_value() && !(*Value)->empty()) { - strncpy(LogFile, &(*Value)->at(0), 256); - } - if (ImGui::InputText("Output log file:", LogFile, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_OUTPUTLOG, LogFile); - ConfigChanged = true; - } + if (ImGui::BeginTabItem("Logging")) { + auto Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_SILENTLOG); + bool SilentLog = Value.has_value() && **Value == "1"; + if (ImGui::Checkbox("Silent Logging", &SilentLog)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_SILENTLOG, SilentLog ? "1" : "0"); + ConfigChanged = true; + } - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_DUMPIR); - if (Value.has_value() && !(*Value)->empty()) { - strncpy(IRDump, &(*Value)->at(0), 256); - } - if (ImGui::InputText("IR Dump location:", IRDump, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_DUMPIR, IRDump); - ConfigChanged = true; - } + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_OUTPUTLOG); + if (Value.has_value() && !(*Value)->empty()) { + strncpy(LogFile, &(*Value)->at(0), 256); + } + if (ImGui::InputText("Output log file:", LogFile, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_OUTPUTLOG, LogFile); + ConfigChanged = true; + } - ImGui::EndTabItem(); + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_DUMPIR); + if (Value.has_value() && !(*Value)->empty()) { + strncpy(IRDump, &(*Value)->at(0), 256); + } + if (ImGui::InputText("IR Dump location:", IRDump, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_DUMPIR, IRDump); + ConfigChanged = true; } - } - void FillHackConfig() { - if (ImGui::BeginTabItem("Hacks")) { - auto Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_TSOENABLED); - bool TSOEnabled = Value.has_value() && **Value == "1"; - if (ImGui::Checkbox("TSO Enabled", &TSOEnabled)) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_TSOENABLED, TSOEnabled ? "1" : "0"); - ConfigChanged = true; - } + ImGui::EndTabItem(); + } +} - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_PARANOIDTSO); - bool ParanoidTSOEnabled = Value.has_value() && **Value == "1"; - if (ImGui::Checkbox("Paranoid TSO Enabled", &ParanoidTSOEnabled)) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_PARANOIDTSO, ParanoidTSOEnabled ? "1" : "0"); - ConfigChanged = true; - } +void FillHackConfig() { + if (ImGui::BeginTabItem("Hacks")) { + auto Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_TSOENABLED); + bool TSOEnabled = Value.has_value() && **Value == "1"; + if (ImGui::Checkbox("TSO Enabled", &TSOEnabled)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_TSOENABLED, TSOEnabled ? "1" : "0"); + ConfigChanged = true; + } - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_X87REDUCEDPRECISION); - bool X87ReducedPrecision = Value.has_value() && **Value == "1"; - if (ImGui::Checkbox("X87 Reduced Precision", &X87ReducedPrecision)) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_X87REDUCEDPRECISION, X87ReducedPrecision ? "1" : "0"); - ConfigChanged = true; - } + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_PARANOIDTSO); + bool ParanoidTSOEnabled = Value.has_value() && **Value == "1"; + if (ImGui::Checkbox("Paranoid TSO Enabled", &ParanoidTSOEnabled)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_PARANOIDTSO, ParanoidTSOEnabled ? "1" : "0"); + ConfigChanged = true; + } - ImGui::Text("SMC Checks: "); - int SMCChecks = FEXCore::Config::CONFIG_SMC_MMAN; - - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_SMCCHECKS); - if (Value.has_value()) { - if (**Value == "0") { - SMCChecks = FEXCore::Config::CONFIG_SMC_NONE; - } else if (**Value == "1") { - SMCChecks = FEXCore::Config::CONFIG_SMC_MTRACK; - } else if (**Value == "2") { - SMCChecks = FEXCore::Config::CONFIG_SMC_FULL; - } else if (**Value == "3") { - SMCChecks = FEXCore::Config::CONFIG_SMC_MMAN; - } - } + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_X87REDUCEDPRECISION); + bool X87ReducedPrecision = Value.has_value() && **Value == "1"; + if (ImGui::Checkbox("X87 Reduced Precision", &X87ReducedPrecision)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_X87REDUCEDPRECISION, X87ReducedPrecision ? "1" : "0"); + ConfigChanged = true; + } - bool SMCChanged = false; - SMCChanged |= ImGui::RadioButton("None", &SMCChecks, FEXCore::Config::CONFIG_SMC_NONE); ImGui::SameLine(); - SMCChanged |= ImGui::RadioButton("MTrack (Default)", &SMCChecks, FEXCore::Config::CONFIG_SMC_MTRACK); ImGui::SameLine(); - SMCChanged |= ImGui::RadioButton("Full", &SMCChecks, FEXCore::Config::CONFIG_SMC_FULL); - SMCChanged |= ImGui::RadioButton("MMan (Deprecated)", &SMCChecks, FEXCore::Config::CONFIG_SMC_MMAN); ImGui::SameLine(); + ImGui::Text("SMC Checks: "); + int SMCChecks = FEXCore::Config::CONFIG_SMC_MMAN; - if (SMCChanged) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_SMCCHECKS, std::to_string(SMCChecks)); + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_SMCCHECKS); + if (Value.has_value()) { + if (**Value == "0") { + SMCChecks = FEXCore::Config::CONFIG_SMC_NONE; + } else if (**Value == "1") { + SMCChecks = FEXCore::Config::CONFIG_SMC_MTRACK; + } else if (**Value == "2") { + SMCChecks = FEXCore::Config::CONFIG_SMC_FULL; + } else if (**Value == "3") { + SMCChecks = FEXCore::Config::CONFIG_SMC_MMAN; } + } - Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_ABILOCALFLAGS); - bool UnsafeLocalFlags = Value.has_value() && **Value == "1"; - if (ImGui::Checkbox("Unsafe local flags optimization", &UnsafeLocalFlags)) { - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_ABILOCALFLAGS, UnsafeLocalFlags ? "1" : "0"); - ConfigChanged = true; - } + bool SMCChanged = false; + SMCChanged |= ImGui::RadioButton("None", &SMCChecks, FEXCore::Config::CONFIG_SMC_NONE); + ImGui::SameLine(); + SMCChanged |= ImGui::RadioButton("MTrack (Default)", &SMCChecks, FEXCore::Config::CONFIG_SMC_MTRACK); + ImGui::SameLine(); + SMCChanged |= ImGui::RadioButton("Full", &SMCChecks, FEXCore::Config::CONFIG_SMC_FULL); + SMCChanged |= ImGui::RadioButton("MMan (Deprecated)", &SMCChecks, FEXCore::Config::CONFIG_SMC_MMAN); + ImGui::SameLine(); + + if (SMCChanged) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_SMCCHECKS, std::to_string(SMCChecks)); + } - ImGui::EndTabItem(); + Value = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_ABILOCALFLAGS); + bool UnsafeLocalFlags = Value.has_value() && **Value == "1"; + if (ImGui::Checkbox("Unsafe local flags optimization", &UnsafeLocalFlags)) { + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_ABILOCALFLAGS, UnsafeLocalFlags ? "1" : "0"); + ConfigChanged = true; } + + ImGui::EndTabItem(); } +} - static const std::map ConfigToNameLookup = {{ +static const std::map ConfigToNameLookup = {{ #define OPT_BASE(type, group, enum, json, default) {FEXCore::Config::ConfigOption::CONFIG_##enum, #json}, #include - }}; - - struct TmpString { - char Str[256]; - std::string Name; - }; - static std::vector> AdvancedOptions{}; - - void UpdateAdvancedOptionsVector() { - AdvancedOptions.clear(); - // Push everything in to our vector table that we can modify instead of the map - auto Options = LoadedConfig->GetOptionMap(); - AdvancedOptions.resize(Options.size()); - - size_t i = 0; - for (auto &Option : Options) { - auto ConfigName = ConfigToNameLookup.find(Option.first); - auto &AdvancedOption = AdvancedOptions.at(i); - AdvancedOption.resize(Option.second.size()); - size_t j = 0; - for (auto &OptionList : Option.second) { - strcpy(AdvancedOption[j].Str, OptionList.c_str()); - AdvancedOption[j].Name = ConfigName->second; - if (Option.second.size() > 1) { - AdvancedOption[j].Name += " " + std::to_string(j); - } - ++j; - } - ++i; - } +}}; + +struct TmpString { + char Str[256]; + std::string Name; +}; +static std::vector> AdvancedOptions {}; + +void UpdateAdvancedOptionsVector() { + AdvancedOptions.clear(); + // Push everything in to our vector table that we can modify instead of the map + auto Options = LoadedConfig->GetOptionMap(); + AdvancedOptions.resize(Options.size()); + + size_t i = 0; + for (auto& Option : Options) { + auto ConfigName = ConfigToNameLookup.find(Option.first); + auto& AdvancedOption = AdvancedOptions.at(i); + AdvancedOption.resize(Option.second.size()); + size_t j = 0; + for (auto& OptionList : Option.second) { + strcpy(AdvancedOption[j].Str, OptionList.c_str()); + AdvancedOption[j].Name = ConfigName->second; + if (Option.second.size() > 1) { + AdvancedOption[j].Name += " " + std::to_string(j); + } + ++j; + } + ++i; } +} - void FillAdvancedConfig() { - if (ImGui::BeginTabItem("Advanced")) { - auto Options = LoadedConfig->GetOptionMap(); - - if (ImGui::SmallButton("Refresh") || AdvancedOptions.size() != Options.size()) { - UpdateAdvancedOptionsVector(); - } - - if (Options.size()) { - ImGui::Columns(2); - size_t i = 0; - for (auto &Option : Options) { - auto ConfigName = ConfigToNameLookup.find(Option.first); - ImGui::Text("%s", ConfigName->second.c_str()); - ImGui::NextColumn(); - - auto &AdvancedOption = AdvancedOptions.at(i); - size_t j = 0; - - bool Stop = false; - for (auto &OptionList : AdvancedOption) { - //ImGui::Text("%s", OptionList.Str); - if (ImGui::InputText(OptionList.Name.c_str(), OptionList.Str, sizeof(TmpString), ImGuiInputTextFlags_EnterReturnsTrue)) { - if (Option.second.size() == 1) { - LoadedConfig->EraseSet(Option.first, OptionList.Str); - } - else { - auto &All = Option.second; - auto Iter = All.begin(); - std::advance(Iter, j); - *Iter = OptionList.Str; - - LoadedConfig->Erase(Option.first); - for (auto &Value : All) { - LoadedConfig->Set(Option.first, Value); - } +void FillAdvancedConfig() { + if (ImGui::BeginTabItem("Advanced")) { + auto Options = LoadedConfig->GetOptionMap(); + if (ImGui::SmallButton("Refresh") || AdvancedOptions.size() != Options.size()) { + UpdateAdvancedOptionsVector(); + } + + if (Options.size()) { + ImGui::Columns(2); + size_t i = 0; + for (auto& Option : Options) { + auto ConfigName = ConfigToNameLookup.find(Option.first); + ImGui::Text("%s", ConfigName->second.c_str()); + ImGui::NextColumn(); + + auto& AdvancedOption = AdvancedOptions.at(i); + size_t j = 0; + + bool Stop = false; + for (auto& OptionList : AdvancedOption) { + // ImGui::Text("%s", OptionList.Str); + if (ImGui::InputText(OptionList.Name.c_str(), OptionList.Str, sizeof(TmpString), ImGuiInputTextFlags_EnterReturnsTrue)) { + if (Option.second.size() == 1) { + LoadedConfig->EraseSet(Option.first, OptionList.Str); + } else { + auto& All = Option.second; + auto Iter = All.begin(); + std::advance(Iter, j); + *Iter = OptionList.Str; + + LoadedConfig->Erase(Option.first); + for (auto& Value : All) { + LoadedConfig->Set(Option.first, Value); } - ConfigChanged = true; - UpdateAdvancedOptionsVector(); } + ConfigChanged = true; + UpdateAdvancedOptionsVector(); + } - ImGui::SameLine(); - - ImGui::PushID(OptionList.Name.c_str()); - if (ImGui::SmallButton("-")) { - if (Option.second.size() == 1) { - LoadedConfig->Erase(Option.first); - } - else { - auto &All = Option.second; - auto Iter = All.begin(); - std::advance(Iter, j); - All.erase(Iter); - - LoadedConfig->Erase(Option.first); - for (auto &Value : All) { - LoadedConfig->Set(Option.first, Value); - } + ImGui::SameLine(); + + ImGui::PushID(OptionList.Name.c_str()); + if (ImGui::SmallButton("-")) { + if (Option.second.size() == 1) { + LoadedConfig->Erase(Option.first); + } else { + auto& All = Option.second; + auto Iter = All.begin(); + std::advance(Iter, j); + All.erase(Iter); + + LoadedConfig->Erase(Option.first); + for (auto& Value : All) { + LoadedConfig->Set(Option.first, Value); } - Stop = true; - ConfigChanged = true; - UpdateAdvancedOptionsVector(); } - ImGui::PopID(); - - ++j; + Stop = true; + ConfigChanged = true; + UpdateAdvancedOptionsVector(); } + ImGui::PopID(); - ImGui::NextColumn(); - ++i; - if (Stop) { - break; - } + ++j; } - } - ImGui::EndTabItem(); - } - } - - void FillConfigWindow() { - ImGui::BeginTabBar("Config"); - FillCPUConfig(); - FillEmulationConfig(); - FillLoggingConfig(); - FillHackConfig(); - FillAdvancedConfig(); - ImGui::EndTabBar(); - } - - bool DrawUI() { - ImGuiIO& io = ImGui::GetIO(); - auto current_time = std::chrono::high_resolution_clock::now(); - auto Diff = std::chrono::duration_cast(current_time - GlobalTime); - io.DeltaTime = Diff.count() > 0 ? Diff.count() : 1.0f/60.0f; - GlobalTime = current_time; - - ImGui::NewFrame(); - - // We are using the ImGuiWindowFlags_NoDocking flag to make the parent window not dockable into, - // because it would be confusing to have two docking targets within each others. - ImGuiWindowFlags window_flags = ImGuiWindowFlags_MenuBar | ImGuiWindowFlags_NoDocking; - ImGuiViewport* viewport = ImGui::GetMainViewport(); - ImGui::SetNextWindowPos(viewport->Pos); - ImGui::SetNextWindowSize(viewport->Size); - ImGui::SetNextWindowViewport(viewport->ID); - ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 0.0f); - ImGui::PushStyleVar(ImGuiStyleVar_WindowBorderSize, 0.0f); - window_flags |= ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove; - window_flags |= ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoNavFocus; - - ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0.0f, 0.0f)); - ImGui::Begin("DockSpace", nullptr, window_flags); - ImGui::PopStyleVar(3); - - struct { - bool Open{}; - bool OpenDefault{}; - bool OpenAppProfile{}; - bool Save{}; - bool SaveAs{}; - bool SaveDefault{}; - bool Close{}; - bool Quit{}; - } Selected; - - char AppName[256]{}; - - if (ImGui::BeginMenuBar()) { - if (ImGui::BeginMenu("File")) { - ImGui::MenuItem("Open", "CTRL+O", &Selected.Open, true); - ImGui::MenuItem("Open from default location", "CTRL+SHIFT+O", &Selected.OpenDefault, true); - ImGui::MenuItem("Open App profile", "CTRL+I", &Selected.OpenAppProfile, true); - - ImGui::MenuItem("Save", "CTRL+S", &Selected.Save, true); - ImGui::MenuItem("Save As", "CTRL+SHIFT+S", &Selected.SaveAs, true); - ImGui::MenuItem("Save As App profile", "CTRL+E", nullptr, true); - ImGui::MenuItem("Save Default", "CTRL+SHIFT+P", &Selected.SaveDefault, true); - - ImGui::MenuItem("Close", "CTRL+W", &Selected.Close, true); - ImGui::MenuItem("Quit", "CTRL+Q", &Selected.Quit, true); - - ImGui::EndMenu(); - } - ImGui::SameLine(ImGui::GetWindowWidth() - ImGui::GetFrameHeight()); - - if (ConfigOpen) { - if (ConfigChanged) { - ImGui::PushStyleColor(ImGuiCol_FrameBg, ImVec4(1.0, 1.0, 0.0, 1.0)); - ImGui::PushStyleColor(ImGuiCol_CheckMark, ImVec4(1.0, 1.0, 0.0, 1.0)); - } - else { - ImGui::PushStyleColor(ImGuiCol_FrameBg, ImVec4(0.0, 1.0, 0.0, 1.0)); - ImGui::PushStyleColor(ImGuiCol_CheckMark, ImVec4(0.0, 1.0, 0.0, 1.0)); + ImGui::NextColumn(); + ++i; + if (Stop) { + break; } } - else { - ImGui::PushStyleColor(ImGuiCol_FrameBg, ImVec4(1.0, 0.0, 0.0, 1.0)); - ImGui::PushStyleColor(ImGuiCol_CheckMark, ImVec4(1.0, 0.0, 0.0, 1.0)); - } - - ImGui::RadioButton("", true); - ImGui::PopStyleColor(2); - - ImGui::EndMenuBar(); } + ImGui::EndTabItem(); + } +} - if (ConfigOpen) { - if (ImGui::BeginChild("#Config")) { - FillConfigWindow(); - } - - if (ImGui::IsKeyPressed(SDL_SCANCODE_E) && io.KeyCtrl && !io.KeyShift) { - ImGui::OpenPopup(SavedPopupAppName); - } +void FillConfigWindow() { + ImGui::BeginTabBar("Config"); + FillCPUConfig(); + FillEmulationConfig(); + FillLoggingConfig(); + FillHackConfig(); + FillAdvancedConfig(); + ImGui::EndTabBar(); +} - ImGui::SetNextWindowPos(ImVec2(viewport->Pos.x + viewport->Size.x / 2, viewport->Pos.y + viewport->Size.y / 2), ImGuiCond_Appearing, ImVec2(0.5f, 0.5f)); - if (ImGui::BeginPopupModal(SavedPopupAppName)) { - ImGui::SetKeyboardFocusHere(); - if (ImGui::InputText("App name", AppName, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { - fextl::string AppNameString = AppName; - fextl::string Filename = FEXCore::Config::GetApplicationConfig(AppNameString, false); - SaveFile(Filename); - ImGui::CloseCurrentPopup(); - } +bool DrawUI() { + ImGuiIO& io = ImGui::GetIO(); + auto current_time = std::chrono::high_resolution_clock::now(); + auto Diff = std::chrono::duration_cast(current_time - GlobalTime); + io.DeltaTime = Diff.count() > 0 ? Diff.count() : 1.0f / 60.0f; + GlobalTime = current_time; + + ImGui::NewFrame(); + + // We are using the ImGuiWindowFlags_NoDocking flag to make the parent window not dockable into, + // because it would be confusing to have two docking targets within each others. + ImGuiWindowFlags window_flags = ImGuiWindowFlags_MenuBar | ImGuiWindowFlags_NoDocking; + ImGuiViewport* viewport = ImGui::GetMainViewport(); + ImGui::SetNextWindowPos(viewport->Pos); + ImGui::SetNextWindowSize(viewport->Size); + ImGui::SetNextWindowViewport(viewport->ID); + ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 0.0f); + ImGui::PushStyleVar(ImGuiStyleVar_WindowBorderSize, 0.0f); + window_flags |= ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove; + window_flags |= ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoNavFocus; + + ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0.0f, 0.0f)); + ImGui::Begin("DockSpace", nullptr, window_flags); + ImGui::PopStyleVar(3); + + struct { + bool Open {}; + bool OpenDefault {}; + bool OpenAppProfile {}; + bool Save {}; + bool SaveAs {}; + bool SaveDefault {}; + bool Close {}; + bool Quit {}; + } Selected; + + char AppName[256] {}; + + if (ImGui::BeginMenuBar()) { + if (ImGui::BeginMenu("File")) { + ImGui::MenuItem("Open", "CTRL+O", &Selected.Open, true); + ImGui::MenuItem("Open from default location", "CTRL+SHIFT+O", &Selected.OpenDefault, true); + ImGui::MenuItem("Open App profile", "CTRL+I", &Selected.OpenAppProfile, true); + + ImGui::MenuItem("Save", "CTRL+S", &Selected.Save, true); + ImGui::MenuItem("Save As", "CTRL+SHIFT+S", &Selected.SaveAs, true); + ImGui::MenuItem("Save As App profile", "CTRL+E", nullptr, true); + ImGui::MenuItem("Save Default", "CTRL+SHIFT+P", &Selected.SaveDefault, true); + + ImGui::MenuItem("Close", "CTRL+W", &Selected.Close, true); + ImGui::MenuItem("Quit", "CTRL+Q", &Selected.Quit, true); + + ImGui::EndMenu(); + } + + ImGui::SameLine(ImGui::GetWindowWidth() - ImGui::GetFrameHeight()); - if (ImGui::IsKeyPressed(SDL_SCANCODE_ESCAPE)) { - ImGui::CloseCurrentPopup(); - } - ImGui::EndPopup(); + if (ConfigOpen) { + if (ConfigChanged) { + ImGui::PushStyleColor(ImGuiCol_FrameBg, ImVec4(1.0, 1.0, 0.0, 1.0)); + ImGui::PushStyleColor(ImGuiCol_CheckMark, ImVec4(1.0, 1.0, 0.0, 1.0)); + } else { + ImGui::PushStyleColor(ImGuiCol_FrameBg, ImVec4(0.0, 1.0, 0.0, 1.0)); + ImGui::PushStyleColor(ImGuiCol_CheckMark, ImVec4(0.0, 1.0, 0.0, 1.0)); } - - ImGui::EndChild(); - } - - // Need this frame delay loop since ImGui doesn't allow us to enable a popup near the end of the frame - if (OpenMsgPopup) { - ImGui::OpenPopup(MsgPopupName); - OpenMsgPopup = false; + } else { + ImGui::PushStyleColor(ImGuiCol_FrameBg, ImVec4(1.0, 0.0, 0.0, 1.0)); + ImGui::PushStyleColor(ImGuiCol_CheckMark, ImVec4(1.0, 0.0, 0.0, 1.0)); } - if (Selected.OpenAppProfile || - (ImGui::IsKeyPressed(SDL_SCANCODE_I) && io.KeyCtrl && !io.KeyShift)) { - ImGui::OpenPopup(OpenedPopupAppName); - } + ImGui::RadioButton("", true); + ImGui::PopStyleColor(2); - // Center the saved popup in the center of the window - ImGui::SetNextWindowPos(ImVec2(viewport->Pos.x + viewport->Size.x / 2, viewport->Pos.y + viewport->Size.y / 2), ImGuiCond_Appearing, ImVec2(0.5f, 0.5f)); + ImGui::EndMenuBar(); + } - if (ImGui::BeginPopup(MsgPopupName)) { - ImGui::Text("%s", MsgMessage.c_str()); - if ((std::chrono::high_resolution_clock::now() - MsgTimerStart) >= std::chrono::seconds(2)) { - ImGui::CloseCurrentPopup(); - } - ImGui::EndPopup(); - } - else if (SaveMsgIsOpen) { - SaveMsgIsOpen = false; + if (ConfigOpen) { + if (ImGui::BeginChild("#Config")) { + FillConfigWindow(); } - ImGui::SetNextWindowPos(ImVec2(viewport->Pos.x + viewport->Size.x / 2, viewport->Pos.y + viewport->Size.y / 2), ImGuiCond_Appearing, ImVec2(0.5f, 0.5f)); - if (ImGui::BeginPopupModal(OpenedPopupAppName)) { + if (ImGui::IsKeyPressed(SDL_SCANCODE_E) && io.KeyCtrl && !io.KeyShift) { + ImGui::OpenPopup(SavedPopupAppName); + } + ImGui::SetNextWindowPos(ImVec2(viewport->Pos.x + viewport->Size.x / 2, viewport->Pos.y + viewport->Size.y / 2), ImGuiCond_Appearing, + ImVec2(0.5f, 0.5f)); + if (ImGui::BeginPopupModal(SavedPopupAppName)) { ImGui::SetKeyboardFocusHere(); if (ImGui::InputText("App name", AppName, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { fextl::string AppNameString = AppName; fextl::string Filename = FEXCore::Config::GetApplicationConfig(AppNameString, false); - OpenFile(Filename, false); + SaveFile(Filename); ImGui::CloseCurrentPopup(); } @@ -877,70 +838,108 @@ namespace { ImGui::EndPopup(); } - if (Selected.Open || - (ImGui::IsKeyPressed(SDL_SCANCODE_O) && io.KeyCtrl && !io.KeyShift)) { - SelectedOpenFile = true; - } - if (Selected.OpenDefault || - (ImGui::IsKeyPressed(SDL_SCANCODE_O) && io.KeyCtrl && io.KeyShift)) { - if (OpenFile(FEXCore::Config::GetConfigFileLocation(), true)) { - LoadNamedRootFSFolder(); - SetupINotify(); - } - } + ImGui::EndChild(); + } - if (Selected.Save || - (ImGui::IsKeyPressed(SDL_SCANCODE_S) && io.KeyCtrl && !io.KeyShift)) { - SaveFile(ConfigFilename); - } - if (Selected.SaveAs || - (ImGui::IsKeyPressed(SDL_SCANCODE_S) && io.KeyCtrl && io.KeyShift)) { - SelectedSaveFileAs = true; + // Need this frame delay loop since ImGui doesn't allow us to enable a popup near the end of the frame + if (OpenMsgPopup) { + ImGui::OpenPopup(MsgPopupName); + OpenMsgPopup = false; + } + + if (Selected.OpenAppProfile || (ImGui::IsKeyPressed(SDL_SCANCODE_I) && io.KeyCtrl && !io.KeyShift)) { + ImGui::OpenPopup(OpenedPopupAppName); + } + + // Center the saved popup in the center of the window + ImGui::SetNextWindowPos(ImVec2(viewport->Pos.x + viewport->Size.x / 2, viewport->Pos.y + viewport->Size.y / 2), ImGuiCond_Appearing, + ImVec2(0.5f, 0.5f)); + + if (ImGui::BeginPopup(MsgPopupName)) { + ImGui::Text("%s", MsgMessage.c_str()); + if ((std::chrono::high_resolution_clock::now() - MsgTimerStart) >= std::chrono::seconds(2)) { + ImGui::CloseCurrentPopup(); } + ImGui::EndPopup(); + } else if (SaveMsgIsOpen) { + SaveMsgIsOpen = false; + } - if (Selected.SaveDefault || - (ImGui::IsKeyPressed(SDL_SCANCODE_P) && io.KeyCtrl && io.KeyShift)) { - SaveFile(FEXCore::Config::GetConfigFileLocation()); + ImGui::SetNextWindowPos(ImVec2(viewport->Pos.x + viewport->Size.x / 2, viewport->Pos.y + viewport->Size.y / 2), ImGuiCond_Appearing, + ImVec2(0.5f, 0.5f)); + if (ImGui::BeginPopupModal(OpenedPopupAppName)) { + + ImGui::SetKeyboardFocusHere(); + if (ImGui::InputText("App name", AppName, 256, ImGuiInputTextFlags_EnterReturnsTrue)) { + fextl::string AppNameString = AppName; + fextl::string Filename = FEXCore::Config::GetApplicationConfig(AppNameString, false); + OpenFile(Filename, false); + ImGui::CloseCurrentPopup(); } - if (Selected.Close || - (ImGui::IsKeyPressed(SDL_SCANCODE_W) && io.KeyCtrl && !io.KeyShift)) { - CloseConfig(); - ShutdownINotify(); + + if (ImGui::IsKeyPressed(SDL_SCANCODE_ESCAPE)) { + ImGui::CloseCurrentPopup(); } + ImGui::EndPopup(); + } - if (Selected.Quit || - (ImGui::IsKeyPressed(SDL_SCANCODE_Q) && io.KeyCtrl && !io.KeyShift)) { - Selected.Quit = true; + if (Selected.Open || (ImGui::IsKeyPressed(SDL_SCANCODE_O) && io.KeyCtrl && !io.KeyShift)) { + SelectedOpenFile = true; + } + if (Selected.OpenDefault || (ImGui::IsKeyPressed(SDL_SCANCODE_O) && io.KeyCtrl && io.KeyShift)) { + if (OpenFile(FEXCore::Config::GetConfigFileLocation(), true)) { + LoadNamedRootFSFolder(); + SetupINotify(); } + } - ImGui::End(); // End dockspace + if (Selected.Save || (ImGui::IsKeyPressed(SDL_SCANCODE_S) && io.KeyCtrl && !io.KeyShift)) { + SaveFile(ConfigFilename); + } + if (Selected.SaveAs || (ImGui::IsKeyPressed(SDL_SCANCODE_S) && io.KeyCtrl && io.KeyShift)) { + SelectedSaveFileAs = true; + } - char const *InitialPath; - char const *File; + if (Selected.SaveDefault || (ImGui::IsKeyPressed(SDL_SCANCODE_P) && io.KeyCtrl && io.KeyShift)) { + SaveFile(FEXCore::Config::GetConfigFileLocation()); + } + if (Selected.Close || (ImGui::IsKeyPressed(SDL_SCANCODE_W) && io.KeyCtrl && !io.KeyShift)) { + CloseConfig(); + ShutdownINotify(); + } - InitialPath = DialogOpen.chooseFileDialog(SelectedOpenFile, "./", ".json", "#Chose a config to load"); - File = DialogOpen.getChosenPath(); - if (strlen(InitialPath) > 0 && strlen(File) > 0) { - OpenFile(File); - } + if (Selected.Quit || (ImGui::IsKeyPressed(SDL_SCANCODE_Q) && io.KeyCtrl && !io.KeyShift)) { + Selected.Quit = true; + } - InitialPath = DialogSaveAs.saveFileDialog(SelectedSaveFileAs, "./", "Config.json", ".json", "#Choose where to save a config"); - File = DialogSaveAs.getChosenPath(); - if (strlen(InitialPath) > 0 && strlen(File) > 0) { - SaveFile(File); - } + ImGui::End(); // End dockspace - SelectedOpenFile = false; - SelectedSaveFileAs = false; + const char* InitialPath; + const char* File; - ImGui::Render(); + InitialPath = DialogOpen.chooseFileDialog(SelectedOpenFile, "./", ".json", "#Chose a config to load"); + File = DialogOpen.getChosenPath(); + if (strlen(InitialPath) > 0 && strlen(File) > 0) { + OpenFile(File); + } - // Return true to keep rendering - return !Selected.Quit; + InitialPath = DialogSaveAs.saveFileDialog(SelectedSaveFileAs, "./", "Config.json", ".json", "#Choose where to save a config"); + File = DialogSaveAs.getChosenPath(); + if (strlen(InitialPath) > 0 && strlen(File) > 0) { + SaveFile(File); } + + SelectedOpenFile = false; + SelectedSaveFileAs = false; + + ImGui::Render(); + + // Return true to keep rendering + return !Selected.Quit; } +} // namespace -int main(int argc, char **argv) { +int main(int argc, char** argv) { FEX::Config::InitializeConfigs(); fextl::string ImGUIConfig = FEXCore::Config::GetConfigDirectory(false) + "FEXConfig_imgui.ini"; @@ -954,8 +953,7 @@ int main(int argc, char **argv) { LoadNamedRootFSFolder(); SetupINotify(); } - } - else { + } else { if (OpenFile(FEXCore::Config::GetConfigFileLocation(), true)) { LoadNamedRootFSFolder(); SetupINotify(); diff --git a/Source/Tools/FEXGDBReader/FEXGDBReader.cpp b/Source/Tools/FEXGDBReader/FEXGDBReader.cpp index 36e909bd05..c1aa9812f0 100644 --- a/Source/Tools/FEXGDBReader/FEXGDBReader.cpp +++ b/Source/Tools/FEXGDBReader/FEXGDBReader.cpp @@ -12,19 +12,19 @@ GDB_DECLARE_GPL_COMPATIBLE_READER; #define debugf(...) extern "C" { -static enum gdb_status read_debug_info(struct gdb_reader_funcs *self, struct gdb_symbol_callbacks *cbs, void *memory, long memory_sz) { - - info_t *info = (info_t *)memory; - blocks_t *blocks = (blocks_t *)(info->blocks_ofs + (long)memory); - gdb_line_mapping *lines = (gdb_line_mapping *)(info->lines_ofs + (long)memory); +static enum gdb_status read_debug_info(struct gdb_reader_funcs* self, struct gdb_symbol_callbacks* cbs, void* memory, long memory_sz) { + + info_t* info = (info_t*)memory; + blocks_t* blocks = (blocks_t*)(info->blocks_ofs + (long)memory); + gdb_line_mapping* lines = (gdb_line_mapping*)(info->lines_ofs + (long)memory); debugf("info: %p\n", info); debugf("info: s %p\n", info->filename); debugf("info: s %s\n", info->filename); debugf("info: l %d\n", info->nlines); debugf("info: b %d\n", info->nblocks); - struct gdb_object *object = cbs->object_open(cbs); - struct gdb_symtab *symtab = cbs->symtab_open(cbs, object, info->filename); + struct gdb_object* object = cbs->object_open(cbs); + struct gdb_symtab* symtab = cbs->symtab_open(cbs, object, info->filename); for (int i = 0; i < info->nblocks; i++) { debugf("info: %d\n", i); @@ -50,16 +50,18 @@ static enum gdb_status read_debug_info(struct gdb_reader_funcs *self, struct gdb return GDB_SUCCESS; } -enum gdb_status unwind_frame(struct gdb_reader_funcs *self, struct gdb_unwind_callbacks *cbs) { return GDB_SUCCESS; } +enum gdb_status unwind_frame(struct gdb_reader_funcs* self, struct gdb_unwind_callbacks* cbs) { + return GDB_SUCCESS; +} -struct gdb_frame_id get_frame_id(struct gdb_reader_funcs *self, struct gdb_unwind_callbacks *cbs) { +struct gdb_frame_id get_frame_id(struct gdb_reader_funcs* self, struct gdb_unwind_callbacks* cbs) { struct gdb_frame_id frame = {0x1234000, 0}; return frame; } -void destroy_reader(struct gdb_reader_funcs *self) {} +void destroy_reader(struct gdb_reader_funcs* self) {} -extern struct gdb_reader_funcs *gdb_init_reader(void) { +extern struct gdb_reader_funcs* gdb_init_reader(void) { static struct gdb_reader_funcs funcs = {GDB_READER_INTERFACE_VERSION, NULL, read_debug_info, unwind_frame, get_frame_id, destroy_reader}; return &funcs; } diff --git a/Source/Tools/FEXGetConfig/Main.cpp b/Source/Tools/FEXGetConfig/Main.cpp index fe6ebc427a..e22da24a76 100644 --- a/Source/Tools/FEXGetConfig/Main.cpp +++ b/Source/Tools/FEXGetConfig/Main.cpp @@ -13,7 +13,7 @@ #include #include -int main(int argc, char **argv, char **envp) { +int main(int argc, char** argv, char** envp) { FEXCore::Config::Initialize(); FEXCore::Config::AddLayer(FEX::Config::CreateGlobalMainLayer()); FEXCore::Config::AddLayer(FEX::Config::CreateMainLayer()); @@ -22,23 +22,15 @@ int main(int argc, char **argv, char **envp) { FEXCore::Config::Load(); // Load the arguments - optparse::OptionParser Parser = optparse::OptionParser() - .description("Simple application to get a couple of FEX options"); + optparse::OptionParser Parser = optparse::OptionParser().description("Simple application to get a couple of FEX options"); - Parser.add_option("--install-prefix") - .action("store_true") - .help("Print the FEX install prefix"); + Parser.add_option("--install-prefix").action("store_true").help("Print the FEX install prefix"); - Parser.add_option("--app") - .help("Load an application profile for this application if it exists"); + Parser.add_option("--app").help("Load an application profile for this application if it exists"); - Parser.add_option("--current-rootfs") - .action("store_true") - .help("Print the directory that contains the FEX rootfs. Mounted in the case of squashfs"); + Parser.add_option("--current-rootfs").action("store_true").help("Print the directory that contains the FEX rootfs. Mounted in the case of squashfs"); - Parser.add_option("--version") - .action("store_true") - .help("Print the installed FEX-Emu version"); + Parser.add_option("--version").action("store_true").help("Print the installed FEX-Emu version"); optparse::Values Options = Parser.parse_args(argc, argv); diff --git a/Source/Tools/FEXLoader/AOT/AOTGenerator.cpp b/Source/Tools/FEXLoader/AOT/AOTGenerator.cpp index 88c4d44ef4..aeddae3cf9 100644 --- a/Source/Tools/FEXLoader/AOT/AOTGenerator.cpp +++ b/Source/Tools/FEXLoader/AOT/AOTGenerator.cpp @@ -16,21 +16,22 @@ #include namespace FEX::AOT { -void AOTGenSection(FEXCore::Context::Context *CTX, ELFCodeLoader::LoadedSection &Section) { +void AOTGenSection(FEXCore::Context::Context* CTX, ELFCodeLoader::LoadedSection& Section) { // Make sure this section is executable and big enough - if (!Section.Executable || Section.Size < 16) + if (!Section.Executable || Section.Size < 16) { return; + } fextl::set InitialBranchTargets; // Load the ELF again with symbol parsing this time - ELFLoader::ELFContainer container{Section.Filename, "", true}; + ELFLoader::ELFContainer container {Section.Filename, "", true}; // Add symbols to the branch targets list container.AddSymbols([&](ELFLoader::ELFSymbol* sym) { auto Destination = sym->Address + Section.ElfBase; - if (! (Destination >= Section.Base && Destination <= (Section.Base + Section.Size)) ) { + if (!(Destination >= Section.Base && Destination <= (Section.Base + Section.Size))) { return; // outside of current section, unlikely to be real code } @@ -43,7 +44,7 @@ void AOTGenSection(FEXCore::Context::Context *CTX, ELFCodeLoader::LoadedSection container.AddUnwindEntries([&](uintptr_t Entry) { auto Destination = Entry + Section.ElfBase; - if (! (Destination >= Section.Base && Destination <= (Section.Base + Section.Size)) ) { + if (!(Destination >= Section.Base && Destination <= (Section.Base + Section.Size))) { return; // outside of current section, unlikely to be real code } @@ -54,7 +55,7 @@ void AOTGenSection(FEXCore::Context::Context *CTX, ELFCodeLoader::LoadedSection // Scan the executable section and try to find function entries for (size_t Offset = 0; Offset < (Section.Size - 16); Offset++) { - uint8_t *pCode = (uint8_t *)(Section.Base + Offset); + uint8_t* pCode = (uint8_t*)(Section.Base + Offset); // Possible CALL if (*pCode == 0xE8) { @@ -63,11 +64,13 @@ void AOTGenSection(FEXCore::Context::Context *CTX, ELFCodeLoader::LoadedSection auto DestinationPtr = (uint8_t*)Destination; - if (! (Destination >= Section.Base && Destination <= (Section.Base + Section.Size)) ) + if (!(Destination >= Section.Base && Destination <= (Section.Base + Section.Size))) { continue; // outside of current section, unlikely to be real code + } - if (DestinationPtr[0] == 0 && DestinationPtr[1] == 0) + if (DestinationPtr[0] == 0 && DestinationPtr[1] == 0) { continue; // add al, [rax], unlikely to be real code + } InitialBranchTargets.insert(Destination); } @@ -88,7 +91,7 @@ void AOTGenSection(FEXCore::Context::Context *CTX, ELFCodeLoader::LoadedSection // Setup BranchTargets, Compiled sets from InitiaBranchTargets Compiled.insert(InitialBranchTargets.begin(), InitialBranchTargets.end()); - for (auto BranchTarget: InitialBranchTargets) { + for (auto BranchTarget : InitialBranchTargets) { BranchTargets.push(BranchTarget); } @@ -132,12 +135,14 @@ void AOTGenSection(FEXCore::Context::Context *CTX, ELFCodeLoader::LoadedSection if (ExternalBranchesLocal.size() > 0) { // Add them to the "to process" list QueueMutex.lock(); - for(auto Destination: ExternalBranchesLocal) { - if (! (Destination >= Section.Base && Destination <= (Section.Base + Section.Size)) ) - continue; - if (Compiled.contains(Destination)) - continue; - Compiled.insert(Destination); + for (auto Destination : ExternalBranchesLocal) { + if (!(Destination >= Section.Base && Destination <= (Section.Base + Section.Size))) { + continue; + } + if (Compiled.contains(Destination)) { + continue; + } + Compiled.insert(Destination); BranchTargets.push(Destination); } QueueMutex.unlock(); @@ -156,7 +161,7 @@ void AOTGenSection(FEXCore::Context::Context *CTX, ELFCodeLoader::LoadedSection } // Make sure all threads are finished - for (auto & Thread: ThreadPool) { + for (auto& Thread : ThreadPool) { Thread.join(); } @@ -164,4 +169,4 @@ void AOTGenSection(FEXCore::Context::Context *CTX, ELFCodeLoader::LoadedSection LogMan::Msg::IFmt("\nAll Done: {}", counter.load()); } -} +} // namespace FEX::AOT diff --git a/Source/Tools/FEXLoader/FEXLoader.cpp b/Source/Tools/FEXLoader/FEXLoader.cpp index 50802af350..27b08e7926 100644 --- a/Source/Tools/FEXLoader/FEXLoader.cpp +++ b/Source/Tools/FEXLoader/FEXLoader.cpp @@ -65,7 +65,7 @@ static bool SilentLog; static int OutputFD {STDERR_FILENO}; static bool ExecutedWithFD {false}; -void MsgHandler(LogMan::DebugLevels Level, char const *Message) { +void MsgHandler(LogMan::DebugLevels Level, const char* Message) { if (SilentLog) { return; } @@ -75,7 +75,7 @@ void MsgHandler(LogMan::DebugLevels Level, char const *Message) { fsync(OutputFD); } -void AssertHandler(char const *Message) { +void AssertHandler(const char* Message) { if (SilentLog) { return; } @@ -88,53 +88,53 @@ void AssertHandler(char const *Message) { } // Anonymous namespace namespace FEXServerLogging { - int FEXServerFD{}; - void MsgHandler(LogMan::DebugLevels Level, char const *Message) { - FEXServerClient::MsgHandler(FEXServerFD, Level, Message); - } +int FEXServerFD {}; +void MsgHandler(LogMan::DebugLevels Level, const char* Message) { + FEXServerClient::MsgHandler(FEXServerFD, Level, Message); +} - void AssertHandler(char const *Message) { - FEXServerClient::AssertHandler(FEXServerFD, Message); - } +void AssertHandler(const char* Message) { + FEXServerClient::AssertHandler(FEXServerFD, Message); } +} // namespace FEXServerLogging namespace AOTIR { - class AOTIRWriterFD final : public FEXCore::Context::AOTIRWriter { - public: - AOTIRWriterFD(const fextl::string &Path) { - // Create and truncate if exists. - constexpr int USER_PERMS = S_IRWXU | S_IRWXG | S_IRWXO; - FD = open(Path.c_str(), O_CREAT | O_WRONLY | O_TRUNC | O_CLOEXEC, USER_PERMS); - } +class AOTIRWriterFD final : public FEXCore::Context::AOTIRWriter { +public: + AOTIRWriterFD(const fextl::string& Path) { + // Create and truncate if exists. + constexpr int USER_PERMS = S_IRWXU | S_IRWXG | S_IRWXO; + FD = open(Path.c_str(), O_CREAT | O_WRONLY | O_TRUNC | O_CLOEXEC, USER_PERMS); + } - operator bool() const { - return FD != -1; - } + operator bool() const { + return FD != -1; + } - void Write(const void* Data, size_t Size) override { - write(FD, Data, Size); - } + void Write(const void* Data, size_t Size) override { + write(FD, Data, Size); + } - size_t Offset() override { - return lseek(FD, 0, SEEK_CUR); - } + size_t Offset() override { + return lseek(FD, 0, SEEK_CUR); + } - void Close() override { - if (FD != -1) { - close(FD); - FD = -1; - } - } + void Close() override { + if (FD != -1) { + close(FD); + FD = -1; + } + } - virtual ~AOTIRWriterFD() { - Close(); - } - private: - int FD{-1}; - }; -} + virtual ~AOTIRWriterFD() { + Close(); + } +private: + int FD {-1}; +}; +} // namespace AOTIR -void InterpreterHandler(fextl::string *Filename, fextl::string const &RootFS, fextl::vector *args) { +void InterpreterHandler(fextl::string* Filename, const fextl::string& RootFS, fextl::vector* args) { // Open the Filename to determine if it is a shebang file. int FD = open(Filename->c_str(), O_RDONLY | O_CLOEXEC); if (FD == -1) { @@ -154,13 +154,10 @@ void InterpreterHandler(fextl::string *Filename, fextl::string const &RootFS, fe } // Handle shebang files - if (Data[0] == '#' && - Data[1] == '!') { - fextl::string InterpreterLine { - Data.begin() + 2, // strip off "#!" prefix - std::find(Data.begin(), Data.end(), '\n') - }; - fextl::vector ShebangArguments{}; + if (Data[0] == '#' && Data[1] == '!') { + fextl::string InterpreterLine {Data.begin() + 2, // strip off "#!" prefix + std::find(Data.begin(), Data.end(), '\n')}; + fextl::vector ShebangArguments {}; // Shebang line can have a single argument fextl::istringstream InterpreterSS(InterpreterLine); @@ -173,7 +170,7 @@ void InterpreterHandler(fextl::string *Filename, fextl::string const &RootFS, fe } // Executable argument - fextl::string &ShebangProgram = ShebangArguments[0]; + fextl::string& ShebangProgram = ShebangArguments[0]; // If the filename is absolute then prepend the rootfs // If it is relative then don't append the rootfs @@ -188,7 +185,7 @@ void InterpreterHandler(fextl::string *Filename, fextl::string const &RootFS, fe close(FD); } -void RootFSRedirect(fextl::string *Filename, fextl::string const &RootFS) { +void RootFSRedirect(fextl::string* Filename, const fextl::string& RootFS) { auto RootFSLink = ELFCodeLoader::ResolveRootfsFile(*Filename, RootFS); if (FHU::Filesystem::Exists(RootFSLink)) { @@ -196,7 +193,7 @@ void RootFSRedirect(fextl::string *Filename, fextl::string const &RootFS) { } } -bool RanAsInterpreter(const char *Program) { +bool RanAsInterpreter(const char* Program) { return ExecutedWithFD || FEXLOADER_AS_INTERPRETER; } @@ -204,14 +201,12 @@ bool IsInterpreterInstalled() { // The interpreter is installed if both the binfmt_misc handlers are available // Or if we were originally executed with FD. Which means the interpreter is installed - return ExecutedWithFD || - (access("/proc/sys/fs/binfmt_misc/FEX-x86", F_OK) == 0 && - access("/proc/sys/fs/binfmt_misc/FEX-x86_64", F_OK) == 0); + return ExecutedWithFD || (access("/proc/sys/fs/binfmt_misc/FEX-x86", F_OK) == 0 && access("/proc/sys/fs/binfmt_misc/FEX-x86_64", F_OK) == 0); } namespace FEX::TSO { - void SetupTSOEmulation(FEXCore::Context::Context *CTX) { - // We need to check if these are defined or not. This is a very fresh feature. +void SetupTSOEmulation(FEXCore::Context::Context* CTX) { + // We need to check if these are defined or not. This is a very fresh feature. #ifndef PR_GET_MEM_MODEL #define PR_GET_MEM_MODEL 0x6d4d444c #endif @@ -224,50 +219,45 @@ namespace FEX::TSO { #ifndef PR_SET_MEM_MODEL_TSO #define PR_SET_MEM_MODEL_TSO 1 #endif - // Check to see if this is supported. - auto Result = prctl(PR_GET_MEM_MODEL, 0, 0, 0, 0); - if (Result == -1) { - // Unsupported, early exit. - return; - } + // Check to see if this is supported. + auto Result = prctl(PR_GET_MEM_MODEL, 0, 0, 0, 0); + if (Result == -1) { + // Unsupported, early exit. + return; + } - FEX_CONFIG_OPT(TSOEnabled, TSOENABLED); + FEX_CONFIG_OPT(TSOEnabled, TSOENABLED); - if (!TSOEnabled()) { - // TSO emulation isn't even enabled, early exit. - return; - } + if (!TSOEnabled()) { + // TSO emulation isn't even enabled, early exit. + return; + } - if (Result == PR_SET_MEM_MODEL_DEFAULT) { - // Try to set the TSO mode if we are currently default. - Result = prctl(PR_SET_MEM_MODEL, PR_SET_MEM_MODEL_TSO, 0, 0, 0); - if (Result == 0) { - // TSO mode successfully enabled. Tell the context to disable TSO emulation through atomics. - // This flag gets inherited on thread creation, so FEX only needs to set it at the start. - CTX->SetHardwareTSOSupport(true); - } + if (Result == PR_SET_MEM_MODEL_DEFAULT) { + // Try to set the TSO mode if we are currently default. + Result = prctl(PR_SET_MEM_MODEL, PR_SET_MEM_MODEL_TSO, 0, 0, 0); + if (Result == 0) { + // TSO mode successfully enabled. Tell the context to disable TSO emulation through atomics. + // This flag gets inherited on thread creation, so FEX only needs to set it at the start. + CTX->SetHardwareTSOSupport(true); } } } +} // namespace FEX::TSO -int main(int argc, char **argv, char **const envp) { +int main(int argc, char** argv, char** const envp) { auto SBRKPointer = FEXCore::Allocator::DisableSBRKAllocations(); FEXCore::Allocator::GLIBCScopedFault GLIBFaultScope; const bool IsInterpreter = RanAsInterpreter(argv[0]); ExecutedWithFD = getauxval(AT_EXECFD) != 0; const char* FEXFD = getenv("FEX_EXECVEFD"); - const std::string_view FEXFDView = FEXFD ? std::string_view{FEXFD} : std::string_view{}; + const std::string_view FEXFDView = FEXFD ? std::string_view {FEXFD} : std::string_view {}; LogMan::Throw::InstallHandler(AssertHandler); LogMan::Msg::InstallHandler(MsgHandler); - auto Program = FEX::Config::LoadConfig( - IsInterpreter, - true, - argc, argv, envp, - ExecutedWithFD, - FEXFDView); + auto Program = FEX::Config::LoadConfig(IsInterpreter, true, argc, argv, envp, ExecutedWithFD, FEXFDView); if (Program.ProgramPath.empty() && !FEXFD) { // Early exit if we weren't passed an argument @@ -315,8 +305,7 @@ int main(int argc, char **argv, char **const envp) { if (::SilentLog) { LogMan::Throw::UnInstallHandlers(); LogMan::Msg::UnInstallHandlers(); - } - else { + } else { auto LogFile = OutputLog(); // If stderr or stdout then we need to dup the FD // In some cases some applications will close stderr and stdout @@ -327,11 +316,9 @@ int main(int argc, char **argv, char **const envp) { // can run in to problems of writing to some file if (LogFile == "stderr") { OutputFD = dup(STDERR_FILENO); - } - else if (LogFile == "stdout") { + } else if (LogFile == "stdout") { OutputFD = dup(STDOUT_FILENO); - } - else if (LogFile == "server") { + } else if (LogFile == "server") { LogMan::Throw::UnInstallHandlers(); LogMan::Msg::UnInstallHandlers(); @@ -340,8 +327,7 @@ int main(int argc, char **argv, char **const envp) { LogMan::Throw::InstallHandler(FEXServerLogging::AssertHandler); LogMan::Msg::InstallHandler(FEXServerLogging::MsgHandler); } - } - else if (!LogFile.empty()) { + } else if (!LogFile.empty()) { OutputFD = open(LogFile.c_str(), O_CREAT | O_CLOEXEC | O_WRONLY); } } @@ -366,13 +352,13 @@ int main(int argc, char **argv, char **const envp) { } // Before we go any further, set all of our host environment variables that the config has provided - for (auto &HostEnv : HostEnvironment.All()) { + for (auto& HostEnv : HostEnvironment.All()) { // We are going to keep these alive in memory. // No need to split the string with setenv putenv(HostEnv.data()); } - ELFCodeLoader Loader{Program.ProgramPath, FEXFDView, LDPath(), Args, ParsedArgs, envp, &Environment}; + ELFCodeLoader Loader {Program.ProgramPath, FEXFDView, LDPath(), Args, ParsedArgs, envp, &Environment}; if (!Loader.ELFWasLoaded()) { // Loader couldn't load this program for some reason @@ -380,8 +366,7 @@ int main(int argc, char **argv, char **const envp) { #ifdef _M_ARM_64 fextl::fmt::print(stderr, "This is likely due to a misconfigured x86-64 RootFS\n"); fextl::fmt::print(stderr, "Current RootFS path set to '{}'\n", LDPath()); - if (LDPath().empty() || - FHU::Filesystem::Exists(LDPath()) == false) { + if (LDPath().empty() || FHU::Filesystem::Exists(LDPath()) == false) { fextl::fmt::print(stderr, "RootFS path doesn't exist. This is required on AArch64 hosts\n"); fextl::fmt::print(stderr, "Use FEXRootFSFetcher to download a RootFS\n"); } @@ -393,16 +378,14 @@ int main(int argc, char **argv, char **const envp) { // Don't need to canonicalize Program.ProgramPath, Config loader will have resolved this already. FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_APP_FILENAME, Program.ProgramPath); FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_APP_CONFIG_NAME, Program.ProgramName); - } - else if (FEXFD) { + } else if (FEXFD) { // Anonymous program. FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_APP_FILENAME, ""); FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_APP_CONFIG_NAME, ""); - } - else { + } else { { char ExistsTempPath[PATH_MAX]; - char *RealPath = realpath(Program.ProgramPath.c_str(), ExistsTempPath); + char* RealPath = realpath(Program.ProgramPath.c_str(), ExistsTempPath); if (RealPath) { FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_APP_FILENAME, fextl::string(RealPath)); } @@ -447,8 +430,8 @@ int main(int argc, char **argv, char **const envp) { auto SignalDelegation = FEX::HLE::CreateSignalDelegator(CTX.get(), Program.ProgramName); - auto SyscallHandler = Loader.Is64BitMode() ? FEX::HLE::x64::CreateHandler(CTX.get(), SignalDelegation.get()) - : FEX::HLE::x32::CreateHandler(CTX.get(), SignalDelegation.get(), std::move(Allocator)); + auto SyscallHandler = Loader.Is64BitMode() ? FEX::HLE::x64::CreateHandler(CTX.get(), SignalDelegation.get()) : + FEX::HLE::x32::CreateHandler(CTX.get(), SignalDelegation.get(), std::move(Allocator)); { // Load VDSO in to memory prior to mapping our ELFs. @@ -492,7 +475,7 @@ int main(int argc, char **argv, char **const envp) { FEXCore::Context::ExitReason ShutdownReason = FEXCore::Context::ExitReason::EXIT_SHUTDOWN; // There might already be an exit handler, leave it installed - if(!CTX->GetExitHandler()) { + if (!CTX->GetExitHandler()) { CTX->SetExitHandler([&](uint64_t thread, FEXCore::Context::ExitReason reason) { if (reason != FEXCore::Context::ExitReason::EXIT_DEBUG) { ShutdownReason = reason; @@ -506,7 +489,7 @@ int main(int argc, char **argv, char **const envp) { LogMan::Msg::IFmt("Warning: AOTIR is experimental, and might lead to crashes. " "Capture doesn't work with programs that fork."); - CTX->SetAOTIRLoader([](const fextl::string &fileid) -> int { + CTX->SetAOTIRLoader([](const fextl::string& fileid) -> int { const auto filepath = fextl::fmt::format("{}/aotir/{}.aotir", FEXCore::Config::GetDataDirectory(), fileid); return open(filepath.c_str(), O_RDONLY); }); @@ -534,7 +517,7 @@ int main(int argc, char **argv, char **const envp) { } if (AOTIRGenerate()) { - for(auto &Section: Loader.Sections) { + for (auto& Section : Loader.Sections) { FEX::AOT::AOTGenSection(CTX.get(), Section); } } else { @@ -586,8 +569,7 @@ int main(int argc, char **argv, char **const envp) { if (ShutdownReason == FEXCore::Context::ExitReason::EXIT_SHUTDOWN) { return ProgramStatus; - } - else { + } else { return -64 | ShutdownReason; } } diff --git a/Source/Tools/FEXRootFSFetcher/Main.cpp b/Source/Tools/FEXRootFSFetcher/Main.cpp index 5edc97bcf1..77864144a3 100644 --- a/Source/Tools/FEXRootFSFetcher/Main.cpp +++ b/Source/Tools/FEXRootFSFetcher/Main.cpp @@ -22,1150 +22,1076 @@ #include namespace ArgOptions { - bool AssumeYes = false; - enum class CompressedImageOption { - OPTION_ASK, - OPTION_EXTRACT, - OPTION_ASIS, - }; - - CompressedImageOption CompressedUsageOption {CompressedImageOption::OPTION_ASK}; +bool AssumeYes = false; +enum class CompressedImageOption { + OPTION_ASK, + OPTION_EXTRACT, + OPTION_ASIS, +}; - enum class ListQueryOption { - OPTION_ASK, - OPTION_FIRST, - }; +CompressedImageOption CompressedUsageOption {CompressedImageOption::OPTION_ASK}; - ListQueryOption DistroListOption {ListQueryOption::OPTION_ASK}; +enum class ListQueryOption { + OPTION_ASK, + OPTION_FIRST, +}; - fextl::vector RemainingArgs; +ListQueryOption DistroListOption {ListQueryOption::OPTION_ASK}; - std::string DistroName{}; - std::string DistroVersion{}; +fextl::vector RemainingArgs; - void ParseArguments(int argc, char **argv) { - optparse::OptionParser Parser = optparse::OptionParser() - .description("Tool for fetching RootFS from FEXServers") - .add_help_option(true); +std::string DistroName {}; +std::string DistroVersion {}; - Parser.add_option("-y", "--assume-yes") - .action("store_true") - .help("Assume yes to prompts"); +void ParseArguments(int argc, char** argv) { + optparse::OptionParser Parser = optparse::OptionParser().description("Tool for fetching RootFS from FEXServers").add_help_option(true); - Parser.add_option("-x", "--extract") - .action("store_true") - .help("Extract compressed image"); + Parser.add_option("-y", "--assume-yes").action("store_true").help("Assume yes to prompts"); - Parser.add_option("-a", "--as-is") - .action("store_true") - .help("Use compressed image as-is"); + Parser.add_option("-x", "--extract").action("store_true").help("Extract compressed image"); - Parser.add_option("--distro-name") - .help("Which distro name to select"); + Parser.add_option("-a", "--as-is").action("store_true").help("Use compressed image as-is"); - Parser.add_option("--distro-version") - .help("Which distro version to select"); + Parser.add_option("--distro-name").help("Which distro name to select"); - Parser.add_option("--distro-list-first") - .action("store_true") - .help("When presented the distro-list option, automatically select the first distro if there isn't an exact match."); + Parser.add_option("--distro-version").help("Which distro version to select"); - optparse::Values Options = Parser.parse_args(argc, argv); + Parser.add_option("--distro-list-first").action("store_true").help("When presented the distro-list option, automatically select the first distro if there isn't an exact match."); - if (Options.is_set_by_user("assume_yes")) { - AssumeYes = Options.get("assume_yes"); - } + optparse::Values Options = Parser.parse_args(argc, argv); - if (Options.is_set_by_user("extract")) { - CompressedUsageOption = CompressedImageOption::OPTION_EXTRACT; - } + if (Options.is_set_by_user("assume_yes")) { + AssumeYes = Options.get("assume_yes"); + } - if (Options.is_set_by_user("as_is")) { - CompressedUsageOption = CompressedImageOption::OPTION_ASIS; - } + if (Options.is_set_by_user("extract")) { + CompressedUsageOption = CompressedImageOption::OPTION_EXTRACT; + } - if (Options.is_set_by_user("distro_list_first")) { - DistroListOption = ListQueryOption::OPTION_FIRST; - } + if (Options.is_set_by_user("as_is")) { + CompressedUsageOption = CompressedImageOption::OPTION_ASIS; + } - if (Options.is_set_by_user("distro_name")) { - DistroName = Options["distro_name"]; - } + if (Options.is_set_by_user("distro_list_first")) { + DistroListOption = ListQueryOption::OPTION_FIRST; + } - if (Options.is_set_by_user("distro_version")) { - DistroVersion = Options["distro_version"]; - } + if (Options.is_set_by_user("distro_name")) { + DistroName = Options["distro_name"]; + } - RemainingArgs = Parser.args(); + if (Options.is_set_by_user("distro_version")) { + DistroVersion = Options["distro_version"]; } + + RemainingArgs = Parser.args(); } +} // namespace ArgOptions namespace Exec { - int32_t ExecAndWaitForResponse(const char *path, char* const* args) { - pid_t pid = fork(); - if (pid == 0) { - execvp(path, args); - _exit(-1); +int32_t ExecAndWaitForResponse(const char* path, char* const* args) { + pid_t pid = fork(); + if (pid == 0) { + execvp(path, args); + _exit(-1); + } else { + int32_t Status {}; + waitpid(pid, &Status, 0); + if (WIFEXITED(Status)) { + return (int8_t)WEXITSTATUS(Status); } - else { - int32_t Status{}; - waitpid(pid, &Status, 0); - if (WIFEXITED(Status)) { - return (int8_t)WEXITSTATUS(Status); - } - } - - return -1; } - int32_t ExecAndWaitForResponseRedirect(const char *path, char* const* args, int stdoutRedirect = -2, int stderrRedirect = -2) { - pid_t pid = fork(); - if (pid == 0) { - if (stdoutRedirect == -1) { + return -1; +} + +int32_t ExecAndWaitForResponseRedirect(const char* path, char* const* args, int stdoutRedirect = -2, int stderrRedirect = -2) { + pid_t pid = fork(); + if (pid == 0) { + if (stdoutRedirect == -1) { + close(STDOUT_FILENO); + } else if (stdoutRedirect == -2) { + // Do nothing + } else { + if (stdoutRedirect != STDOUT_FILENO) { close(STDOUT_FILENO); } - else if (stdoutRedirect == -2) { - // Do nothing - } - else { - if (stdoutRedirect != STDOUT_FILENO) { - close(STDOUT_FILENO); - } - dup2(stdoutRedirect, STDOUT_FILENO); - } - if (stderrRedirect == -1) { + dup2(stdoutRedirect, STDOUT_FILENO); + } + if (stderrRedirect == -1) { + close(STDERR_FILENO); + } else if (stderrRedirect == -2) { + // Do nothing + } else { + if (stderrRedirect != STDOUT_FILENO) { close(STDERR_FILENO); } - else if (stderrRedirect == -2) { - // Do nothing - } - else { - if (stderrRedirect != STDOUT_FILENO) { - close(STDERR_FILENO); - } - dup2(stderrRedirect, STDERR_FILENO); - } - execvp(path, args); - _exit(-1); + dup2(stderrRedirect, STDERR_FILENO); } - else { - int32_t Status{}; - while (waitpid(pid, &Status, 0) == -1 && errno == EINTR); - if (WIFEXITED(Status)) { - return (int8_t)WEXITSTATUS(Status); - } + execvp(path, args); + _exit(-1); + } else { + int32_t Status {}; + while (waitpid(pid, &Status, 0) == -1 && errno == EINTR) + ; + if (WIFEXITED(Status)) { + return (int8_t)WEXITSTATUS(Status); } - - return -1; } - std::string ExecAndWaitForResponseText(const char *path, char* const* args) { - int fd[2]; - pipe(fd); + return -1; +} - pid_t pid = fork(); +std::string ExecAndWaitForResponseText(const char* path, char* const* args) { + int fd[2]; + pipe(fd); - if (pid == 0) { - close(fd[0]); // Close read side + pid_t pid = fork(); - // Redirect stdout to pipe - dup2(fd[1], STDOUT_FILENO); + if (pid == 0) { + close(fd[0]); // Close read side - // Close stderr - close(STDERR_FILENO); + // Redirect stdout to pipe + dup2(fd[1], STDOUT_FILENO); - // We can now close the pipe since the duplications take care of the rest - close(fd[1]); + // Close stderr + close(STDERR_FILENO); - execvp(path, args); - _exit(-1); - } - else { - close(fd[1]); // Close write side + // We can now close the pipe since the duplications take care of the rest + close(fd[1]); - // Nothing larger than this - char Buffer[1024]{}; - std::string Output{}; + execvp(path, args); + _exit(-1); + } else { + close(fd[1]); // Close write side - // Read the pipe until it closes - while (size_t Size = read(fd[0], Buffer, sizeof(Buffer))) { - Output += std::string_view(Buffer, Size); - } + // Nothing larger than this + char Buffer[1024] {}; + std::string Output {}; - int32_t Status{}; - while (waitpid(pid, &Status, 0) == -1 && errno == EINTR); - if (WIFEXITED(Status)) { - // Return what we've read - close(fd[0]); - return Output; - } + // Read the pipe until it closes + while (size_t Size = read(fd[0], Buffer, sizeof(Buffer))) { + Output += std::string_view(Buffer, Size); } - return {}; + int32_t Status {}; + while (waitpid(pid, &Status, 0) == -1 && errno == EINTR) + ; + if (WIFEXITED(Status)) { + // Return what we've read + close(fd[0]); + return Output; + } } + + return {}; } +} // namespace Exec namespace WorkingAppsTester { - static bool Has_Curl {false}; - static bool Has_Squashfuse {false}; - static bool Has_Unsquashfs {false}; - - // EroFS specific - static bool Has_EroFSFuse {false}; - static bool Has_EroFSFsck {false}; - - void CheckCurl() { - // Check if curl exists on the host - std::vector ExecveArgs = { - "curl", - "-V", - nullptr, - }; +static bool Has_Curl {false}; +static bool Has_Squashfuse {false}; +static bool Has_Unsquashfs {false}; - int32_t Result = Exec::ExecAndWaitForResponseRedirect(ExecveArgs[0], const_cast(ExecveArgs.data()), -1, -1); - Has_Curl = Result != -1; - } +// EroFS specific +static bool Has_EroFSFuse {false}; +static bool Has_EroFSFsck {false}; - void CheckSquashfuse() { - std::vector ExecveArgs = { - "squashfuse", - "--help", - nullptr, - }; +void CheckCurl() { + // Check if curl exists on the host + std::vector ExecveArgs = { + "curl", + "-V", + nullptr, + }; - int32_t Result = Exec::ExecAndWaitForResponseRedirect(ExecveArgs[0], const_cast(ExecveArgs.data()), -1, -1); - Has_Squashfuse = Result != -1; - } + int32_t Result = Exec::ExecAndWaitForResponseRedirect(ExecveArgs[0], const_cast(ExecveArgs.data()), -1, -1); + Has_Curl = Result != -1; +} - void CheckUnsquashfs() { - std::vector ExecveArgs = { - "unsquashfs", - "--help", - nullptr, - }; +void CheckSquashfuse() { + std::vector ExecveArgs = { + "squashfuse", + "--help", + nullptr, + }; - int fd = ::syscall(SYS_memfd_create, "stdout", 0); - int32_t Result = Exec::ExecAndWaitForResponseRedirect(ExecveArgs[0], const_cast(ExecveArgs.data()), fd, fd); - Has_Unsquashfs = Result != -1; - if (Has_Unsquashfs) { - // Seek back to the start - lseek(fd, 0, SEEK_SET); - - // Unsquashfs needs to support zstd - // Scan its output to find the zstd compressor - FILE *fp = fdopen(fd, "r"); - char *Line {nullptr}; - size_t Len; - - bool ReadingDecompressors = false; - bool SupportsZSTD = false; - while (getline(&Line, &Len, fp) != -1) { - if (!ReadingDecompressors) { - if (strstr(Line, "Decompressors available")) { - ReadingDecompressors = true; - } + int32_t Result = Exec::ExecAndWaitForResponseRedirect(ExecveArgs[0], const_cast(ExecveArgs.data()), -1, -1); + Has_Squashfuse = Result != -1; +} + +void CheckUnsquashfs() { + std::vector ExecveArgs = { + "unsquashfs", + "--help", + nullptr, + }; + + int fd = ::syscall(SYS_memfd_create, "stdout", 0); + int32_t Result = Exec::ExecAndWaitForResponseRedirect(ExecveArgs[0], const_cast(ExecveArgs.data()), fd, fd); + Has_Unsquashfs = Result != -1; + if (Has_Unsquashfs) { + // Seek back to the start + lseek(fd, 0, SEEK_SET); + + // Unsquashfs needs to support zstd + // Scan its output to find the zstd compressor + FILE* fp = fdopen(fd, "r"); + char* Line {nullptr}; + size_t Len; + + bool ReadingDecompressors = false; + bool SupportsZSTD = false; + while (getline(&Line, &Len, fp) != -1) { + if (!ReadingDecompressors) { + if (strstr(Line, "Decompressors available")) { + ReadingDecompressors = true; } - else { - if (strstr(Line, "zstd")) { - SupportsZSTD = true; - } + } else { + if (strstr(Line, "zstd")) { + SupportsZSTD = true; } } + } - free(Line); - fclose(fp); + free(Line); + fclose(fp); - // Disable unsquashfs if it doesn't support ZSTD - if (!SupportsZSTD) { - Has_Unsquashfs = false; - } + // Disable unsquashfs if it doesn't support ZSTD + if (!SupportsZSTD) { + Has_Unsquashfs = false; } - close(fd); } + close(fd); +} - // EroFS specific tests - void CheckEroFSFuse() { - std::vector ExecveArgs = { - "erofsfuse", - "--help", - nullptr, - }; +// EroFS specific tests +void CheckEroFSFuse() { + std::vector ExecveArgs = { + "erofsfuse", + "--help", + nullptr, + }; - int32_t Result = Exec::ExecAndWaitForResponseRedirect(ExecveArgs[0], const_cast(ExecveArgs.data()), -1, -1); - Has_EroFSFuse = Result != -1; - } + int32_t Result = Exec::ExecAndWaitForResponseRedirect(ExecveArgs[0], const_cast(ExecveArgs.data()), -1, -1); + Has_EroFSFuse = Result != -1; +} - void CheckEroFSFsck() { - std::vector ExecveArgs = { - "fsck.erofs", - "-V", - nullptr, - }; +void CheckEroFSFsck() { + std::vector ExecveArgs = { + "fsck.erofs", + "-V", + nullptr, + }; - int32_t Result = Exec::ExecAndWaitForResponseRedirect(ExecveArgs[0], const_cast(ExecveArgs.data()), -1, -1); - Has_EroFSFsck = Result != -1; - } + int32_t Result = Exec::ExecAndWaitForResponseRedirect(ExecveArgs[0], const_cast(ExecveArgs.data()), -1, -1); + Has_EroFSFsck = Result != -1; +} - void Init() { - CheckCurl(); - CheckSquashfuse(); - CheckUnsquashfs(); - CheckEroFSFuse(); - CheckEroFSFsck(); - } +void Init() { + CheckCurl(); + CheckSquashfuse(); + CheckUnsquashfs(); + CheckEroFSFuse(); + CheckEroFSFsck(); } +} // namespace WorkingAppsTester namespace DistroQuery { - struct DistroInfo { - std::string DistroName; - std::string DistroVersion; - bool RollingRelease; - bool Unknown; - }; +struct DistroInfo { + std::string DistroName; + std::string DistroVersion; + bool RollingRelease; + bool Unknown; +}; + +DistroInfo GetDistroInfo() { + // Detect these files in order + // + // /etc/lsb-release + // eg: + // DISTRIB_ID=Ubuntu + // DISTRIB_RELEASE=21.10 + // DISTRIB_CODENAME=impish + // DISTRIB_DESCRIPTION="Ubuntu 21.10" + // + // /etc/os-release + // eg: + // PRETTY_NAME="Ubuntu 21.10" + // NAME="Ubuntu" + // VERSION_ID="21.10" + // VERSION="21.10 (Impish Indri)" + // VERSION_CODENAME=impish + // ID=ubuntu + // ID_LIKE=debian + // HOME_URL="https://www.ubuntu.com/" + // SUPPORT_URL="https://help.ubuntu.com/" + // BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/" + // PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy" + // UBUNTU_CODENAME=impish + // + // /etc/debian_version + // eg: + // 11.0 + // + // uname -r + // eg: + // 5.13.0-22-generic + DistroInfo Info {}; + uint32_t FoundCount {}; + + if (std::filesystem::exists("/etc/lsb-release")) { + std::fstream File("/etc/lsb-release", std::fstream::in); + std::string Line; + while (std::getline(File, Line)) { + if (File.eof() || FoundCount == 2) { + break; + } - DistroInfo GetDistroInfo() { - // Detect these files in order - // - // /etc/lsb-release - // eg: - // DISTRIB_ID=Ubuntu - // DISTRIB_RELEASE=21.10 - // DISTRIB_CODENAME=impish - // DISTRIB_DESCRIPTION="Ubuntu 21.10" - // - // /etc/os-release - // eg: - // PRETTY_NAME="Ubuntu 21.10" - // NAME="Ubuntu" - // VERSION_ID="21.10" - // VERSION="21.10 (Impish Indri)" - // VERSION_CODENAME=impish - // ID=ubuntu - // ID_LIKE=debian - // HOME_URL="https://www.ubuntu.com/" - // SUPPORT_URL="https://help.ubuntu.com/" - // BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/" - // PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy" - // UBUNTU_CODENAME=impish - // - // /etc/debian_version - // eg: - // 11.0 - // - // uname -r - // eg: - // 5.13.0-22-generic - DistroInfo Info{}; - uint32_t FoundCount{}; - - if (std::filesystem::exists("/etc/lsb-release")) { - std::fstream File ("/etc/lsb-release", std::fstream::in); - std::string Line; - while (std::getline(File, Line)) { - if (File.eof() || FoundCount == 2) { - break; - } + std::stringstream ss(Line); + std::string Key, Value; + std::getline(ss, Key, '='); + std::getline(ss, Value, '='); - std::stringstream ss(Line); - std::string Key, Value; - std::getline(ss, Key, '='); - std::getline(ss, Value, '='); - - if (Key == "DISTRIB_ID") { - auto ToLower = [](auto Str) { - std::transform(Str.begin(), Str.end(), Str.begin(), - [](unsigned char c){ return std::tolower(c); }); - return Str; - }; - Info.DistroName = ToLower(Value); - ++FoundCount; - } - else if (Key == "DISTRIB_RELEASE") { - Info.DistroVersion = Value; - ++FoundCount; - } + if (Key == "DISTRIB_ID") { + auto ToLower = [](auto Str) { + std::transform(Str.begin(), Str.end(), Str.begin(), [](unsigned char c) { return std::tolower(c); }); + return Str; + }; + Info.DistroName = ToLower(Value); + ++FoundCount; + } else if (Key == "DISTRIB_RELEASE") { + Info.DistroVersion = Value; + ++FoundCount; } } + } - if (FoundCount == 2) { - Info.Unknown = false; - if (Info.DistroName == "arch") { - Info.RollingRelease = true; - } - return Info; + if (FoundCount == 2) { + Info.Unknown = false; + if (Info.DistroName == "arch") { + Info.RollingRelease = true; } - FoundCount = 0; - - if (std::filesystem::exists("/etc/os-release")) { - std::fstream File ("/etc/os-release", std::fstream::in); - std::string Line; - while (std::getline(File, Line)) { - if (File.eof() || FoundCount == 2) { - break; - } + return Info; + } + FoundCount = 0; + + if (std::filesystem::exists("/etc/os-release")) { + std::fstream File("/etc/os-release", std::fstream::in); + std::string Line; + while (std::getline(File, Line)) { + if (File.eof() || FoundCount == 2) { + break; + } - std::stringstream ss(Line); - std::string Key, Value; - std::getline(ss, Key, '='); - std::getline(ss, Value, '='); + std::stringstream ss(Line); + std::string Key, Value; + std::getline(ss, Key, '='); + std::getline(ss, Value, '='); - if (Key == "ID") { - Info.DistroName = Value; - ++FoundCount; - } - else if (Key == "VERSION_ID") { - // Ubuntu provides VERSION_ID - // Strip the two quotes from the VERSION_ID - Value = Value.substr(1, Value.size() - 2); - Info.DistroVersion = Value; - ++FoundCount; - } - else if (Key == "IMAGE_VERSION") { - // Arch provides IMAGE_VERSION - Info.DistroVersion = Value; - ++FoundCount; - } + if (Key == "ID") { + Info.DistroName = Value; + ++FoundCount; + } else if (Key == "VERSION_ID") { + // Ubuntu provides VERSION_ID + // Strip the two quotes from the VERSION_ID + Value = Value.substr(1, Value.size() - 2); + Info.DistroVersion = Value; + ++FoundCount; + } else if (Key == "IMAGE_VERSION") { + // Arch provides IMAGE_VERSION + Info.DistroVersion = Value; + ++FoundCount; } } + } - if (FoundCount == 2) { - Info.Unknown = false; - if (Info.DistroName == "arch") { - Info.RollingRelease = true; - } - return Info; + if (FoundCount == 2) { + Info.Unknown = false; + if (Info.DistroName == "arch") { + Info.RollingRelease = true; } - FoundCount = 0; + return Info; + } + FoundCount = 0; - if (std::filesystem::exists("/etc/debian_version")) { - std::fstream File ("/etc/debian_version", std::fstream::in); - std::string Line; + if (std::filesystem::exists("/etc/debian_version")) { + std::fstream File("/etc/debian_version", std::fstream::in); + std::string Line; - Info.DistroName = "debian"; + Info.DistroName = "debian"; + ++FoundCount; + while (std::getline(File, Line)) { + Info.DistroVersion = Line; ++FoundCount; - while (std::getline(File, Line)) { - Info.DistroVersion = Line; - ++FoundCount; - } - } - - if (FoundCount == 2) { - Info.Unknown = false; - return Info; } + } - Info.DistroName = "Unknown"; - Info.DistroVersion = {}; - Info.Unknown = true; + if (FoundCount == 2) { + Info.Unknown = false; return Info; } + + Info.DistroName = "Unknown"; + Info.DistroVersion = {}; + Info.Unknown = true; + return Info; } +} // namespace DistroQuery namespace WebFileFetcher { - struct FileTargets { - // These two are for matching version checks - std::string DistroMatch; - std::string VersionMatch; +struct FileTargets { + // These two are for matching version checks + std::string DistroMatch; + std::string VersionMatch; - // This is a human readable name - std::string DistroName; + // This is a human readable name + std::string DistroName; - // This is the URL - fextl::string URL; + // This is the URL + fextl::string URL; - // This is the hash of the file - std::string Hash; + // This is the hash of the file + std::string Hash; - // FileType - enum class FileType { - TYPE_UNKNOWN, - TYPE_SQUASHFS, - TYPE_EROFS, - }; - FileType Type; + // FileType + enum class FileType { + TYPE_UNKNOWN, + TYPE_SQUASHFS, + TYPE_EROFS, }; + FileType Type; +}; - const static std::string DownloadURL = "https://rootfs.fex-emu.gg/RootFS_links.json"; +const static std::string DownloadURL = "https://rootfs.fex-emu.gg/RootFS_links.json"; - std::string DownloadToString(const std::string &URL) { - std::vector ExecveArgs = { - "curl", - URL.c_str(), - nullptr, - }; +std::string DownloadToString(const std::string& URL) { + std::vector ExecveArgs = { + "curl", + URL.c_str(), + nullptr, + }; - return Exec::ExecAndWaitForResponseText(ExecveArgs[0], const_cast(ExecveArgs.data())); - } + return Exec::ExecAndWaitForResponseText(ExecveArgs[0], const_cast(ExecveArgs.data())); +} - bool DownloadToPath(const fextl::string &URL, const fextl::string &Path) { - auto filename = URL.substr(URL.find_last_of('/') + 1); - auto PathName = Path + filename; +bool DownloadToPath(const fextl::string& URL, const fextl::string& Path) { + auto filename = URL.substr(URL.find_last_of('/') + 1); + auto PathName = Path + filename; - std::vector ExecveArgs = { - "curl", - URL.c_str(), - "-o", - PathName.c_str(), - nullptr, - }; + std::vector ExecveArgs = { + "curl", URL.c_str(), "-o", PathName.c_str(), nullptr, + }; - return Exec::ExecAndWaitForResponse(ExecveArgs[0], const_cast(ExecveArgs.data())) == 0; - } + return Exec::ExecAndWaitForResponse(ExecveArgs[0], const_cast(ExecveArgs.data())) == 0; +} - bool DownloadToPathWithZenityProgress(const fextl::string &URL, const fextl::string &Path) { - auto filename = URL.substr(URL.find_last_of('/') + 1); - auto PathName = Path + filename; - - // -# for progress bar - // -o for output file - // -f for silent fail - std::string CurlPipe = fmt::format("curl -C - -#f {} -o {} 2>&1", URL, PathName); - const std::string StdBuf = "stdbuf -oL tr '\\r' '\\n'"; - const std::string SedBuf = "sed -u 's/[^0-9]*\\([0-9]*\\).*/\\1/'"; - // zenity --auto-close can't be used since `curl -C` for whatever reason prints 100% at the start. - // Making zenity vanish immediately - const std::string ZenityBuf = "zenity --time-remaining --progress --no-cancel --title 'Downloading'"; - std::string BigArgs = - fmt::format("{} | {} | {} | {}", CurlPipe, StdBuf, SedBuf, ZenityBuf); - std::vector ExecveArgs = { - "/bin/sh", - "-c", - BigArgs.c_str(), - nullptr, - }; +bool DownloadToPathWithZenityProgress(const fextl::string& URL, const fextl::string& Path) { + auto filename = URL.substr(URL.find_last_of('/') + 1); + auto PathName = Path + filename; + + // -# for progress bar + // -o for output file + // -f for silent fail + std::string CurlPipe = fmt::format("curl -C - -#f {} -o {} 2>&1", URL, PathName); + const std::string StdBuf = "stdbuf -oL tr '\\r' '\\n'"; + const std::string SedBuf = "sed -u 's/[^0-9]*\\([0-9]*\\).*/\\1/'"; + // zenity --auto-close can't be used since `curl -C` for whatever reason prints 100% at the start. + // Making zenity vanish immediately + const std::string ZenityBuf = "zenity --time-remaining --progress --no-cancel --title 'Downloading'"; + std::string BigArgs = fmt::format("{} | {} | {} | {}", CurlPipe, StdBuf, SedBuf, ZenityBuf); + std::vector ExecveArgs = { + "/bin/sh", + "-c", + BigArgs.c_str(), + nullptr, + }; - return Exec::ExecAndWaitForResponse(ExecveArgs[0], const_cast(ExecveArgs.data())) == 0; - } + return Exec::ExecAndWaitForResponse(ExecveArgs[0], const_cast(ExecveArgs.data())) == 0; +} - struct JsonAllocator { - jsonPool_t PoolObject; - std::unique_ptr> json_objects; - }; - static_assert(offsetof(JsonAllocator, PoolObject) == 0, "This needs to be at offset zero"); +struct JsonAllocator { + jsonPool_t PoolObject; + std::unique_ptr> json_objects; +}; +static_assert(offsetof(JsonAllocator, PoolObject) == 0, "This needs to be at offset zero"); - json_t* PoolInit(jsonPool_t* Pool) { - JsonAllocator* alloc = reinterpret_cast(Pool); - alloc->json_objects = std::make_unique>(); - return &*alloc->json_objects->emplace(alloc->json_objects->end()); - } +json_t* PoolInit(jsonPool_t* Pool) { + JsonAllocator* alloc = reinterpret_cast(Pool); + alloc->json_objects = std::make_unique>(); + return &*alloc->json_objects->emplace(alloc->json_objects->end()); +} - json_t* PoolAlloc(jsonPool_t* Pool) { - JsonAllocator* alloc = reinterpret_cast(Pool); - return &*alloc->json_objects->emplace(alloc->json_objects->end()); - } +json_t* PoolAlloc(jsonPool_t* Pool) { + JsonAllocator* alloc = reinterpret_cast(Pool); + return &*alloc->json_objects->emplace(alloc->json_objects->end()); +} - std::optional> GetRootFSLinks() { - // Decode the filetargets - std::string Data = DownloadToString(DownloadURL); +std::optional> GetRootFSLinks() { + // Decode the filetargets + std::string Data = DownloadToString(DownloadURL); - if (Data.empty()) { - return std::nullopt; - } + if (Data.empty()) { + return std::nullopt; + } - JsonAllocator Pool { - .PoolObject = { + JsonAllocator Pool { + .PoolObject = + { .init = PoolInit, .alloc = PoolAlloc, }, - }; - - json_t const *json = json_createWithPool(&Data.at(0), &Pool.PoolObject); - if (!json) { - fprintf(stderr, "Couldn't create json"); - return {}; - } - - json_t const* RootList = json_getProperty(json, "v1"); - - if (!RootList) { - fprintf(stderr, "Couldn't get root list"); - return {}; - } - - std::vector Targets; + }; - for (json_t const* RootItem = json_getChild(RootList); - RootItem != nullptr; - RootItem = json_getSibling(RootItem)) { + const json_t* json = json_createWithPool(&Data.at(0), &Pool.PoolObject); + if (!json) { + fprintf(stderr, "Couldn't create json"); + return {}; + } - FileTargets Target; - Target.DistroName = json_getName(RootItem); + const json_t* RootList = json_getProperty(json, "v1"); - for (json_t const* DataItem = json_getChild(RootItem); - DataItem != nullptr; - DataItem = json_getSibling(DataItem)) { - auto DataName = std::string_view {json_getName(DataItem)}; + if (!RootList) { + fprintf(stderr, "Couldn't get root list"); + return {}; + } - if (DataName == "DistroMatch") { - Target.DistroMatch = json_getValue(DataItem); - } - else if (DataName == "DistroVersion") { - Target.VersionMatch = json_getValue(DataItem); - } - else if (DataName == "URL") { - Target.URL = json_getValue(DataItem); - } - else if (DataName == "Hash") { - Target.Hash = json_getValue(DataItem); - } - else if (DataName == "Type") { - auto DataValue = std::string_view {json_getValue(DataItem)}; - if (DataValue == "squashfs") { - Target.Type = FileTargets::FileType::TYPE_SQUASHFS; - } - else if (DataValue == "erofs") { - Target.Type = FileTargets::FileType::TYPE_EROFS; - } - else { - Target.Type = FileTargets::FileType::TYPE_UNKNOWN; - } + std::vector Targets; + + for (const json_t* RootItem = json_getChild(RootList); RootItem != nullptr; RootItem = json_getSibling(RootItem)) { + + FileTargets Target; + Target.DistroName = json_getName(RootItem); + + for (const json_t* DataItem = json_getChild(RootItem); DataItem != nullptr; DataItem = json_getSibling(DataItem)) { + auto DataName = std::string_view {json_getName(DataItem)}; + + if (DataName == "DistroMatch") { + Target.DistroMatch = json_getValue(DataItem); + } else if (DataName == "DistroVersion") { + Target.VersionMatch = json_getValue(DataItem); + } else if (DataName == "URL") { + Target.URL = json_getValue(DataItem); + } else if (DataName == "Hash") { + Target.Hash = json_getValue(DataItem); + } else if (DataName == "Type") { + auto DataValue = std::string_view {json_getValue(DataItem)}; + if (DataValue == "squashfs") { + Target.Type = FileTargets::FileType::TYPE_SQUASHFS; + } else if (DataValue == "erofs") { + Target.Type = FileTargets::FileType::TYPE_EROFS; + } else { + Target.Type = FileTargets::FileType::TYPE_UNKNOWN; } } - bool SupportsSquashFS = WorkingAppsTester::Has_Squashfuse || WorkingAppsTester::Has_Unsquashfs; - bool SupportsEroFS = WorkingAppsTester::Has_EroFSFuse; - if ((Target.Type == FileTargets::FileType::TYPE_SQUASHFS && SupportsSquashFS) || - (Target.Type == FileTargets::FileType::TYPE_EROFS && SupportsEroFS)) { - // If we don't understand the type, then we can't use this. - // Additionally if the type is erofs but the user doesn't have erofsfuse, then we can't use this - Targets.emplace_back(Target); - } } - - return Targets; + bool SupportsSquashFS = WorkingAppsTester::Has_Squashfuse || WorkingAppsTester::Has_Unsquashfs; + bool SupportsEroFS = WorkingAppsTester::Has_EroFSFuse; + if ((Target.Type == FileTargets::FileType::TYPE_SQUASHFS && SupportsSquashFS) || + (Target.Type == FileTargets::FileType::TYPE_EROFS && SupportsEroFS)) { + // If we don't understand the type, then we can't use this. + // Additionally if the type is erofs but the user doesn't have erofsfuse, then we can't use this + Targets.emplace_back(Target); + } } + + return Targets; } +} // namespace WebFileFetcher namespace Zenity { - bool ExecWithQuestion(const fextl::string &Question) { - fextl::string TextArg = "--text=" + Question; - const char *Args[] = { - "zenity", - "--question", - TextArg.c_str(), - nullptr, - }; +bool ExecWithQuestion(const fextl::string& Question) { + fextl::string TextArg = "--text=" + Question; + const char* Args[] = { + "zenity", + "--question", + TextArg.c_str(), + nullptr, + }; - int32_t Result = Exec::ExecAndWaitForResponse(Args[0], const_cast(Args)); - // 0 on Yes, 1 on No - return Result == 0; - } + int32_t Result = Exec::ExecAndWaitForResponse(Args[0], const_cast(Args)); + // 0 on Yes, 1 on No + return Result == 0; +} - void ExecWithInfo(const fextl::string &Text) { - fextl::string TextArg = "--text=" + Text; - const char *Args[] = { - "zenity", - "--info", - TextArg.c_str(), - nullptr, - }; +void ExecWithInfo(const fextl::string& Text) { + fextl::string TextArg = "--text=" + Text; + const char* Args[] = { + "zenity", + "--info", + TextArg.c_str(), + nullptr, + }; - Exec::ExecAndWaitForResponse(Args[0], const_cast(Args)); - } + Exec::ExecAndWaitForResponse(Args[0], const_cast(Args)); +} - bool AskForConfirmation(const fextl::string &Question) { - return ArgOptions::AssumeYes || ExecWithQuestion(Question); - } +bool AskForConfirmation(const fextl::string& Question) { + return ArgOptions::AssumeYes || ExecWithQuestion(Question); +} - int32_t AskForConfirmationList(const fextl::string &Text, const std::vector &Arguments) { - fextl::string TextArg = "--text=" + Text; - - std::vector ExecveArgs = { - "zenity", - "--list", - TextArg.c_str(), - "--hide-header", - "--column=Index", - "--column=Text", - "--hide-column=1", - }; +int32_t AskForConfirmationList(const fextl::string& Text, const std::vector& Arguments) { + fextl::string TextArg = "--text=" + Text; - std::vector NumberArgs; - for (size_t i = 0; i < Arguments.size(); ++i) { - NumberArgs.emplace_back(std::to_string(i)); - } + std::vector ExecveArgs = { + "zenity", "--list", TextArg.c_str(), "--hide-header", "--column=Index", "--column=Text", "--hide-column=1", + }; - for (size_t i = 0; i < Arguments.size(); ++i) { - const auto &Arg = Arguments[i]; - ExecveArgs.emplace_back(NumberArgs[i].c_str()); - ExecveArgs.emplace_back(Arg.c_str()); - } - ExecveArgs.emplace_back(nullptr); + std::vector NumberArgs; + for (size_t i = 0; i < Arguments.size(); ++i) { + NumberArgs.emplace_back(std::to_string(i)); + } - auto Result = Exec::ExecAndWaitForResponseText(ExecveArgs[0], const_cast(ExecveArgs.data())); - if (Result.empty()) { - return -1; - } - return std::stoi(Result); + for (size_t i = 0; i < Arguments.size(); ++i) { + const auto& Arg = Arguments[i]; + ExecveArgs.emplace_back(NumberArgs[i].c_str()); + ExecveArgs.emplace_back(Arg.c_str()); } + ExecveArgs.emplace_back(nullptr); - int32_t AskForComplexConfirmationList(const std::string &Text, const std::vector &Arguments) { - std::string TextArg = "--text=" + Text; + auto Result = Exec::ExecAndWaitForResponseText(ExecveArgs[0], const_cast(ExecveArgs.data())); + if (Result.empty()) { + return -1; + } + return std::stoi(Result); +} - std::vector ExecveArgs = { - "zenity", - "--list", - TextArg.c_str(), - }; +int32_t AskForComplexConfirmationList(const std::string& Text, const std::vector& Arguments) { + std::string TextArg = "--text=" + Text; - for (auto &Arg : Arguments) { - ExecveArgs.emplace_back(Arg.c_str()); - } - ExecveArgs.emplace_back(nullptr); + std::vector ExecveArgs = { + "zenity", + "--list", + TextArg.c_str(), + }; - auto Result = Exec::ExecAndWaitForResponseText(ExecveArgs[0], const_cast(ExecveArgs.data())); - if (Result.empty()) { - return -1; - } - return std::stoi(Result); + for (auto& Arg : Arguments) { + ExecveArgs.emplace_back(Arg.c_str()); } + ExecveArgs.emplace_back(nullptr); - int32_t AskForDistroSelection(DistroQuery::DistroInfo &Info, std::vector &Targets) { - // Search for an exact match - int32_t DistroIndex = -1; - if (!Info.Unknown) { - for (size_t i = 0; i < Targets.size(); ++i) { - const auto &Target = Targets[i]; - - bool ExactMatch = Target.DistroMatch == Info.DistroName && - (Info.RollingRelease || Target.VersionMatch == Info.DistroVersion); - if (ExactMatch) { - fextl::string Question = fextl::fmt::format("Found exact match for distro '{}'. Do you want to select this image?", Target.DistroName); - if (ExecWithQuestion(Question)) { - DistroIndex = i; - break; - } + auto Result = Exec::ExecAndWaitForResponseText(ExecveArgs[0], const_cast(ExecveArgs.data())); + if (Result.empty()) { + return -1; + } + return std::stoi(Result); +} + +int32_t AskForDistroSelection(DistroQuery::DistroInfo& Info, std::vector& Targets) { + // Search for an exact match + int32_t DistroIndex = -1; + if (!Info.Unknown) { + for (size_t i = 0; i < Targets.size(); ++i) { + const auto& Target = Targets[i]; + + bool ExactMatch = Target.DistroMatch == Info.DistroName && (Info.RollingRelease || Target.VersionMatch == Info.DistroVersion); + if (ExactMatch) { + fextl::string Question = fextl::fmt::format("Found exact match for distro '{}'. Do you want to select this image?", Target.DistroName); + if (ExecWithQuestion(Question)) { + DistroIndex = i; + break; } } } + } - if (DistroIndex != -1) { - return DistroIndex; - } - - if (ArgOptions::DistroListOption == ArgOptions::ListQueryOption::OPTION_FIRST) { - // Return the first option if not an exact match. - return 0; - } + if (DistroIndex != -1) { + return DistroIndex; + } - std::vector Args; + if (ArgOptions::DistroListOption == ArgOptions::ListQueryOption::OPTION_FIRST) { + // Return the first option if not an exact match. + return 0; + } - Args.emplace_back("--column=Index"); - Args.emplace_back("--column=Distro"); - Args.emplace_back("--hide-column=1"); - for (size_t i = 0; i < Targets.size(); ++i) { - const auto &Target = Targets[i]; - Args.emplace_back(std::to_string(i)); - Args.emplace_back(Target.DistroName); - } + std::vector Args; - std::string Text = "RootFS list selection"; - return AskForComplexConfirmationList(Text, Args); + Args.emplace_back("--column=Index"); + Args.emplace_back("--column=Distro"); + Args.emplace_back("--hide-column=1"); + for (size_t i = 0; i < Targets.size(); ++i) { + const auto& Target = Targets[i]; + Args.emplace_back(std::to_string(i)); + Args.emplace_back(Target.DistroName); } - bool ValidateCheckExists(const WebFileFetcher::FileTargets &Target) { - fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/"; - auto filename = Target.URL.substr(Target.URL.find_last_of('/') + 1); - auto PathName = RootFS + filename; - uint64_t ExpectedHash = std::stoul(Target.Hash, nullptr, 16); - - std::error_code ec; - if (std::filesystem::exists(PathName, ec)) { - const std::vector Args { - "Overwrite", - "Validate", - }; - fextl::string Text = filename + " already exists. What do you want to do?"; - int Result = AskForConfirmationList(Text, Args); - if (Result == -1) { - return false; - } + std::string Text = "RootFS list selection"; + return AskForComplexConfirmationList(Text, Args); +} - auto Res = XXFileHash::HashFile(PathName); - if (Result == 0) { - if (Res.first == true && - Res.second == ExpectedHash) { - fextl::string Text = fextl::fmt::format("{} matches expected hash. Skipping download", filename); - ExecWithInfo(Text); - return false; - } +bool ValidateCheckExists(const WebFileFetcher::FileTargets& Target) { + fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/"; + auto filename = Target.URL.substr(Target.URL.find_last_of('/') + 1); + auto PathName = RootFS + filename; + uint64_t ExpectedHash = std::stoul(Target.Hash, nullptr, 16); + + std::error_code ec; + if (std::filesystem::exists(PathName, ec)) { + const std::vector Args { + "Overwrite", + "Validate", + }; + fextl::string Text = filename + " already exists. What do you want to do?"; + int Result = AskForConfirmationList(Text, Args); + if (Result == -1) { + return false; + } + + auto Res = XXFileHash::HashFile(PathName); + if (Result == 0) { + if (Res.first == true && Res.second == ExpectedHash) { + fextl::string Text = fextl::fmt::format("{} matches expected hash. Skipping download", filename); + ExecWithInfo(Text); + return false; } - else if (Result == 1) { - if (Res.first == false || - Res.second != ExpectedHash) { - return AskForConfirmation("RootFS doesn't match hash!\nDo you want to redownload?"); - } - else { - fextl::string Text = fextl::fmt::format("{} matches expected hash", filename); - ExecWithInfo(Text); - return false; - } + } else if (Result == 1) { + if (Res.first == false || Res.second != ExpectedHash) { + return AskForConfirmation("RootFS doesn't match hash!\nDo you want to redownload?"); + } else { + fextl::string Text = fextl::fmt::format("{} matches expected hash", filename); + ExecWithInfo(Text); + return false; } } - - return true; } - bool ValidateDownloadSelection(const WebFileFetcher::FileTargets &Target) { - fextl::string Text = fextl::fmt::format("Selected Rootfs: {}\n", Target.DistroName); - Text += fmt::format("\tURL: {}\n", Target.URL); - Text += fmt::format("Are you sure that you want to download this image"); + return true; +} - if (AskForConfirmation(Text)) { - fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/"; - std::error_code ec{}; - if (!std::filesystem::exists(RootFS, ec)) { - // Doesn't exist, create the the folder as a user convenience - if (!std::filesystem::create_directories(RootFS, ec)) { - // Well I guess we failed - Text = fmt::format("Couldn't create {} path for storing RootFS", RootFS); - ExecWithInfo(Text); - return false; - } - } +bool ValidateDownloadSelection(const WebFileFetcher::FileTargets& Target) { + fextl::string Text = fextl::fmt::format("Selected Rootfs: {}\n", Target.DistroName); + Text += fmt::format("\tURL: {}\n", Target.URL); + Text += fmt::format("Are you sure that you want to download this image"); - if (!WebFileFetcher::DownloadToPathWithZenityProgress(Target.URL, RootFS)) { + if (AskForConfirmation(Text)) { + fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/"; + std::error_code ec {}; + if (!std::filesystem::exists(RootFS, ec)) { + // Doesn't exist, create the the folder as a user convenience + if (!std::filesystem::create_directories(RootFS, ec)) { + // Well I guess we failed + Text = fmt::format("Couldn't create {} path for storing RootFS", RootFS); + ExecWithInfo(Text); return false; } + } - return true; + if (!WebFileFetcher::DownloadToPathWithZenityProgress(Target.URL, RootFS)) { + return false; } - return false; + + return true; } + return false; } +} // namespace Zenity namespace TTY { - bool AskForConfirmation(const fextl::string &Question) { - if (ArgOptions::AssumeYes) { - return true; - } +bool AskForConfirmation(const fextl::string& Question) { + if (ArgOptions::AssumeYes) { + return true; + } - auto ToLowerInPlace = [](auto &Str) { - std::transform(Str.begin(), Str.end(), Str.begin(), - [](unsigned char c){ return std::tolower(c); }); - }; + auto ToLowerInPlace = [](auto& Str) { + std::transform(Str.begin(), Str.end(), Str.begin(), [](unsigned char c) { return std::tolower(c); }); + }; - std::cout << Question << std::endl; - std::cout << "Response {y,yes,1} or {n,no,0}" << std::endl; - std::string Response; - std::cin >> Response; + std::cout << Question << std::endl; + std::cout << "Response {y,yes,1} or {n,no,0}" << std::endl; + std::string Response; + std::cin >> Response; - ToLowerInPlace(Response); - if (Response == "y" || - Response == "yes" || - Response == "1") { - return true; - } - else if (Response == "n" || - Response == "no" || - Response == "0") { - return false; - } - else { - std::cout << "Unknown response. Assuming no" << std::endl; - return false; - } + ToLowerInPlace(Response); + if (Response == "y" || Response == "yes" || Response == "1") { + return true; + } else if (Response == "n" || Response == "no" || Response == "0") { + return false; + } else { + std::cout << "Unknown response. Assuming no" << std::endl; + return false; } +} - void ExecWithInfo(const fextl::string &Text) { - std::cout << Text << std::endl; - } +void ExecWithInfo(const fextl::string& Text) { + std::cout << Text << std::endl; +} - int32_t AskForConfirmationList(const fextl::string &Text, const std::vector &List) { - fmt::print("{}\n", Text); - fmt::print("Options:\n"); - fmt::print("\t0: Cancel\n"); +int32_t AskForConfirmationList(const fextl::string& Text, const std::vector& List) { + fmt::print("{}\n", Text); + fmt::print("Options:\n"); + fmt::print("\t0: Cancel\n"); - for (size_t i = 0; i < List.size(); ++i) { - fmt::print("\t{}: {}\n", i+1, List[i]); - } + for (size_t i = 0; i < List.size(); ++i) { + fmt::print("\t{}: {}\n", i + 1, List[i]); + } - fmt::print("\t\nResponse {{1-{}}} or 0 to cancel\n", List.size()); - fextl::string Response; - std::cin >> Response; + fmt::print("\t\nResponse {{1-{}}} or 0 to cancel\n", List.size()); + fextl::string Response; + std::cin >> Response; - int32_t ResponseInt = std::stol(Response.data(), nullptr, 0); - if (ResponseInt == 0) { - return -1; - } - else if (ResponseInt >= 1 && - (ResponseInt - 1) < List.size()) { - return ResponseInt - 1; - } - else { - std::cout << "Unknown response. Assuming cancel" << std::endl; - return -1; - } + int32_t ResponseInt = std::stol(Response.data(), nullptr, 0); + if (ResponseInt == 0) { + return -1; + } else if (ResponseInt >= 1 && (ResponseInt - 1) < List.size()) { + return ResponseInt - 1; + } else { + std::cout << "Unknown response. Assuming cancel" << std::endl; + return -1; } +} - int32_t AskForDistroSelection(DistroQuery::DistroInfo &Info, std::vector &Targets) { - // Search for an exact match - int32_t DistroIndex = -1; - if (!Info.Unknown) { - for (size_t i = 0; i < Targets.size(); ++i) { - const auto &Target = Targets[i]; - - bool ExactMatch = Target.DistroMatch == Info.DistroName && - Target.VersionMatch == Info.DistroVersion; - if (ExactMatch) { - fextl::string Question = fextl::fmt::format("Found exact match for distro '{}'. Do you want to select this image?", Target.DistroName); - if (AskForConfirmation(Question)) { - DistroIndex = i; - break; - } +int32_t AskForDistroSelection(DistroQuery::DistroInfo& Info, std::vector& Targets) { + // Search for an exact match + int32_t DistroIndex = -1; + if (!Info.Unknown) { + for (size_t i = 0; i < Targets.size(); ++i) { + const auto& Target = Targets[i]; + + bool ExactMatch = Target.DistroMatch == Info.DistroName && Target.VersionMatch == Info.DistroVersion; + if (ExactMatch) { + fextl::string Question = fextl::fmt::format("Found exact match for distro '{}'. Do you want to select this image?", Target.DistroName); + if (AskForConfirmation(Question)) { + DistroIndex = i; + break; } } } + } - if (DistroIndex != -1) { - return DistroIndex; - } - - if (ArgOptions::DistroListOption == ArgOptions::ListQueryOption::OPTION_FIRST) { - // Return the first option if not an exact match. - return 0; - } + if (DistroIndex != -1) { + return DistroIndex; + } - std::vector Args; - for (size_t i = 0; i < Targets.size(); ++i) { - const auto &Target = Targets[i]; - Args.emplace_back(Target.DistroName); - } + if (ArgOptions::DistroListOption == ArgOptions::ListQueryOption::OPTION_FIRST) { + // Return the first option if not an exact match. + return 0; + } - fextl::string Text = "RootFS list selection"; - return AskForConfirmationList(Text, Args); + std::vector Args; + for (size_t i = 0; i < Targets.size(); ++i) { + const auto& Target = Targets[i]; + Args.emplace_back(Target.DistroName); } - bool ValidateCheckExists(const WebFileFetcher::FileTargets &Target) { - fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/"; - auto filename = Target.URL.substr(Target.URL.find_last_of('/') + 1); - auto PathName = RootFS + filename; - uint64_t ExpectedHash = std::stoul(Target.Hash, nullptr, 16); - - std::error_code ec; - if (std::filesystem::exists(PathName, ec)) { - const std::vector Args { - "Overwrite", - "Validate", - }; - fextl::string Text = filename + " already exists. What do you want to do?"; - int Result = AskForConfirmationList(Text, Args); - if (Result == -1) { + fextl::string Text = "RootFS list selection"; + return AskForConfirmationList(Text, Args); +} + +bool ValidateCheckExists(const WebFileFetcher::FileTargets& Target) { + fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/"; + auto filename = Target.URL.substr(Target.URL.find_last_of('/') + 1); + auto PathName = RootFS + filename; + uint64_t ExpectedHash = std::stoul(Target.Hash, nullptr, 16); + + std::error_code ec; + if (std::filesystem::exists(PathName, ec)) { + const std::vector Args { + "Overwrite", + "Validate", + }; + fextl::string Text = filename + " already exists. What do you want to do?"; + int Result = AskForConfirmationList(Text, Args); + if (Result == -1) { + return false; + } + fmt::print("Validating RootFS hash...\n"); + auto Res = XXFileHash::HashFile(PathName); + if (Result == 0) { + if (Res.first == true && Res.second == ExpectedHash) { + fmt::print("{} matches expected hash. Skipping downloading\n", filename); return false; } - fmt::print("Validating RootFS hash...\n"); - auto Res = XXFileHash::HashFile(PathName); - if (Result == 0) { - if (Res.first == true && - Res.second == ExpectedHash) { - fmt::print("{} matches expected hash. Skipping downloading\n", filename); - return false; - } - } - else if (Result == 1) { - if (Res.first == false || - Res.second != ExpectedHash) { - fmt::print("RootFS doesn't match hash!\n"); - return AskForConfirmation("Do you want to redownload?"); - } - else { - fmt::print("{} matches expected hash\n", filename); - return false; - } + } else if (Result == 1) { + if (Res.first == false || Res.second != ExpectedHash) { + fmt::print("RootFS doesn't match hash!\n"); + return AskForConfirmation("Do you want to redownload?"); + } else { + fmt::print("{} matches expected hash\n", filename); + return false; } } - - return true; } - bool ValidateDownloadSelection(const WebFileFetcher::FileTargets &Target) { - fmt::print("Selected Rootfs: {}\n", Target.DistroName); - fmt::print("\tURL: {}\n", Target.URL); - - if (AskForConfirmation("Are you sure that you want to download this image")) { - fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/"; - std::error_code ec{}; - if (!std::filesystem::exists(RootFS, ec)) { - // Doesn't exist, create the the folder as a user convenience - if (!std::filesystem::create_directories(RootFS, ec)) { - // Well I guess we failed - fmt::print("Couldn't create {} path for storing RootFS\n", RootFS); - return false; - } - } - auto DoDownload = [&Target, &RootFS]() -> bool { - if (!WebFileFetcher::DownloadToPath(Target.URL, RootFS)) { - fmt::print("Couldn't download RootFS\n"); - return false; - } + return true; +} - return true; - }; +bool ValidateDownloadSelection(const WebFileFetcher::FileTargets& Target) { + fmt::print("Selected Rootfs: {}\n", Target.DistroName); + fmt::print("\tURL: {}\n", Target.URL); - while (DoDownload() == false) { - if (AskForConfirmation("Curl RootFS download failed. Do you want to retry?")) { - // Loop to retry - } - else { - return false; - } + if (AskForConfirmation("Are you sure that you want to download this image")) { + fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/"; + std::error_code ec {}; + if (!std::filesystem::exists(RootFS, ec)) { + // Doesn't exist, create the the folder as a user convenience + if (!std::filesystem::create_directories(RootFS, ec)) { + // Well I guess we failed + fmt::print("Couldn't create {} path for storing RootFS\n", RootFS); + return false; + } + } + auto DoDownload = [&Target, &RootFS]() -> bool { + if (!WebFileFetcher::DownloadToPath(Target.URL, RootFS)) { + fmt::print("Couldn't download RootFS\n"); + return false; } - // Got here then we passed return true; + }; + + while (DoDownload() == false) { + if (AskForConfirmation("Curl RootFS download failed. Do you want to retry?")) { + // Loop to retry + } else { + return false; + } } - return false; + + // Got here then we passed + return true; } + return false; } +} // namespace TTY namespace { - bool IsTTY{}; - - std::function _AskForConfirmation; - std::function _ExecWithInfo; - std::function &List)> _AskForConfirmationList; - std::function &Targets)> _AskForDistroSelection; - std::function _ValidateCheckExists; - std::function _ValidateDownloadSelection; - - void CheckTTY() { - IsTTY = isatty(STDOUT_FILENO); - - if (IsTTY) { - _AskForConfirmation = TTY::AskForConfirmation; - _ExecWithInfo = TTY::ExecWithInfo; - _AskForConfirmationList = TTY::AskForConfirmationList; - _AskForDistroSelection = TTY::AskForDistroSelection; - _ValidateCheckExists = TTY::ValidateCheckExists; - _ValidateDownloadSelection = TTY::ValidateDownloadSelection; - } - else { - _AskForConfirmation = Zenity::AskForConfirmation; - _ExecWithInfo = Zenity::ExecWithInfo; - _AskForConfirmationList = Zenity::AskForConfirmationList; - _AskForDistroSelection = Zenity::AskForDistroSelection; - _ValidateCheckExists = Zenity::ValidateCheckExists; - _ValidateDownloadSelection = Zenity::ValidateDownloadSelection; - } - } - - bool AskForConfirmation(const fextl::string &Question) { - return _AskForConfirmation(Question); +bool IsTTY {}; + +std::function _AskForConfirmation; +std::function _ExecWithInfo; +std::function& List)> _AskForConfirmationList; +std::function& Targets)> _AskForDistroSelection; +std::function _ValidateCheckExists; +std::function _ValidateDownloadSelection; + +void CheckTTY() { + IsTTY = isatty(STDOUT_FILENO); + + if (IsTTY) { + _AskForConfirmation = TTY::AskForConfirmation; + _ExecWithInfo = TTY::ExecWithInfo; + _AskForConfirmationList = TTY::AskForConfirmationList; + _AskForDistroSelection = TTY::AskForDistroSelection; + _ValidateCheckExists = TTY::ValidateCheckExists; + _ValidateDownloadSelection = TTY::ValidateDownloadSelection; + } else { + _AskForConfirmation = Zenity::AskForConfirmation; + _ExecWithInfo = Zenity::ExecWithInfo; + _AskForConfirmationList = Zenity::AskForConfirmationList; + _AskForDistroSelection = Zenity::AskForDistroSelection; + _ValidateCheckExists = Zenity::ValidateCheckExists; + _ValidateDownloadSelection = Zenity::ValidateDownloadSelection; } +} - void ExecWithInfo(const fextl::string &Text) { - _ExecWithInfo(Text); - } +bool AskForConfirmation(const fextl::string& Question) { + return _AskForConfirmation(Question); +} - int32_t AskForConfirmationList(const fextl::string &Text, const std::vector &Arguments) { - return _AskForConfirmationList(Text, Arguments); - } +void ExecWithInfo(const fextl::string& Text) { + _ExecWithInfo(Text); +} - int32_t AskForDistroSelection(std::vector &Targets) { - auto Info = DistroQuery::GetDistroInfo(); +int32_t AskForConfirmationList(const fextl::string& Text, const std::vector& Arguments) { + return _AskForConfirmationList(Text, Arguments); +} - if (!ArgOptions::DistroName.empty()) { - Info.DistroName = ArgOptions::DistroName; - } - if (!ArgOptions::DistroVersion.empty()) { - Info.DistroVersion = ArgOptions::DistroVersion; - } +int32_t AskForDistroSelection(std::vector& Targets) { + auto Info = DistroQuery::GetDistroInfo(); - return _AskForDistroSelection(Info, Targets); + if (!ArgOptions::DistroName.empty()) { + Info.DistroName = ArgOptions::DistroName; } - - bool ValidateCheckExists(const WebFileFetcher::FileTargets &Target) { - return _ValidateCheckExists(Target); + if (!ArgOptions::DistroVersion.empty()) { + Info.DistroVersion = ArgOptions::DistroVersion; } - bool ValidateDownloadSelection(const WebFileFetcher::FileTargets &Target) { - return _ValidateDownloadSelection(Target); - } + return _AskForDistroSelection(Info, Targets); +} + +bool ValidateCheckExists(const WebFileFetcher::FileTargets& Target) { + return _ValidateCheckExists(Target); +} + +bool ValidateDownloadSelection(const WebFileFetcher::FileTargets& Target) { + return _ValidateDownloadSelection(Target); } +} // namespace namespace ConfigSetter { - void SetRootFSAsDefault(const fextl::string &RootFS) { - fextl::string Filename = FEXCore::Config::GetConfigFileLocation(); - auto LoadedConfig = FEX::Config::CreateMainLayer(&Filename); - LoadedConfig->Load(); - LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_ROOTFS, RootFS); - FEX::Config::SaveLayerToJSON(Filename, LoadedConfig.get()); - } +void SetRootFSAsDefault(const fextl::string& RootFS) { + fextl::string Filename = FEXCore::Config::GetConfigFileLocation(); + auto LoadedConfig = FEX::Config::CreateMainLayer(&Filename); + LoadedConfig->Load(); + LoadedConfig->EraseSet(FEXCore::Config::ConfigOption::CONFIG_ROOTFS, RootFS); + FEX::Config::SaveLayerToJSON(Filename, LoadedConfig.get()); } +} // namespace ConfigSetter namespace UnSquash { - bool UnsquashRootFS(const fextl::string &Path, const fextl::string &RootFS, const fextl::string &FolderName) { - auto TargetFolder = Path + FolderName; - - bool Extract = true; - std::error_code ec; - if (std::filesystem::exists(TargetFolder, ec)) { - fextl::string Question = "Target folder \"" + FolderName + "\" already exists. Overwrite?"; - if (AskForConfirmation(Question)) { - if (std::filesystem::remove_all(TargetFolder, ec) != ~0ULL) { - Extract = true; - } +bool UnsquashRootFS(const fextl::string& Path, const fextl::string& RootFS, const fextl::string& FolderName) { + auto TargetFolder = Path + FolderName; + + bool Extract = true; + std::error_code ec; + if (std::filesystem::exists(TargetFolder, ec)) { + fextl::string Question = "Target folder \"" + FolderName + "\" already exists. Overwrite?"; + if (AskForConfirmation(Question)) { + if (std::filesystem::remove_all(TargetFolder, ec) != ~0ULL) { + Extract = true; } } + } - if (Extract) { - const std::vector ExecveArgs = { - "unsquashfs", - "-f", - "-d", - TargetFolder.c_str(), - RootFS.c_str(), - nullptr, - }; - - return Exec::ExecAndWaitForResponse(ExecveArgs[0], const_cast(ExecveArgs.data())) == 0; - } + if (Extract) { + const std::vector ExecveArgs = { + "unsquashfs", "-f", "-d", TargetFolder.c_str(), RootFS.c_str(), nullptr, + }; - return false; + return Exec::ExecAndWaitForResponse(ExecveArgs[0], const_cast(ExecveArgs.data())) == 0; } - bool ExtractEroFS(const fextl::string &Path, const fextl::string &RootFS, const fextl::string &FolderName) { - auto TargetFolder = Path + FolderName; + return false; +} - bool Extract = true; - std::error_code ec; - if (std::filesystem::exists(TargetFolder, ec)) { - fextl::string Question = "Target folder \"" + FolderName + "\" already exists. Overwrite?"; - if (AskForConfirmation(Question)) { - if (std::filesystem::remove_all(TargetFolder, ec) != ~0ULL) { - Extract = true; - } - if (ec) { - ExecWithInfo("Couldn't remove previous directory. Won't extract."); - } +bool ExtractEroFS(const fextl::string& Path, const fextl::string& RootFS, const fextl::string& FolderName) { + auto TargetFolder = Path + FolderName; + + bool Extract = true; + std::error_code ec; + if (std::filesystem::exists(TargetFolder, ec)) { + fextl::string Question = "Target folder \"" + FolderName + "\" already exists. Overwrite?"; + if (AskForConfirmation(Question)) { + if (std::filesystem::remove_all(TargetFolder, ec) != ~0ULL) { + Extract = true; + } + if (ec) { + ExecWithInfo("Couldn't remove previous directory. Won't extract."); } } + } - if (Extract) { - ExecWithInfo("Extracting Erofs. This might take a few minutes."); - - const auto ExtractOption = fmt::format("--extract={}", TargetFolder); - const std::vector ExecveArgs = { - "fsck.erofs", - ExtractOption.c_str(), - RootFS.c_str(), - nullptr, - }; + if (Extract) { + ExecWithInfo("Extracting Erofs. This might take a few minutes."); - return Exec::ExecAndWaitForResponse(ExecveArgs[0], const_cast(ExecveArgs.data())) == 0; - } + const auto ExtractOption = fmt::format("--extract={}", TargetFolder); + const std::vector ExecveArgs = { + "fsck.erofs", + ExtractOption.c_str(), + RootFS.c_str(), + nullptr, + }; - return false; + return Exec::ExecAndWaitForResponse(ExecveArgs[0], const_cast(ExecveArgs.data())) == 0; } + + return false; } +} // namespace UnSquash -int main(int argc, char **argv, char **const envp) { +int main(int argc, char** argv, char** const envp) { CheckTTY(); - FEX::Config::LoadConfig( - true, - false, - argc, argv, envp, - false, {} - ); + FEX::Config::LoadConfig(true, false, argc, argv, envp, false, {}); // Reload the meta layer FEXCore::Config::ReloadMetaLayer(); @@ -1176,8 +1102,7 @@ int main(int argc, char **argv, char **const envp) { auto Res = XXFileHash::HashFile(ArgOptions::RemainingArgs[0]); if (Res.first) { fmt::print("{} has hash: {:x}\n", ArgOptions::RemainingArgs[0], Res.second); - } - else { + } else { fmt::print("Couldn't generate hash for {}\n", ArgOptions::RemainingArgs[0]); } return 0; @@ -1189,9 +1114,7 @@ int main(int argc, char **argv, char **const envp) { ExecWithInfo("curl is required to use this tool. Please install curl before using."); return -1; } - if (!WorkingAppsTester::Has_Squashfuse && - !WorkingAppsTester::Has_Unsquashfs && - !WorkingAppsTester::Has_EroFSFuse) { + if (!WorkingAppsTester::Has_Squashfuse && !WorkingAppsTester::Has_Unsquashfs && !WorkingAppsTester::Has_EroFSFuse) { // We need at least one tool to mount or extract image files ExecWithInfo("squashfuse, unsquashfs, or erofsfuse is required to use this tool. Please install one before using."); return -1; @@ -1200,12 +1123,10 @@ int main(int argc, char **argv, char **const envp) { FEX_CONFIG_OPT(LDPath, ROOTFS); std::error_code ec; - fextl::string Question{}; - if (LDPath().empty() || - std::filesystem::exists(LDPath(), ec) == false) { + fextl::string Question {}; + if (LDPath().empty() || std::filesystem::exists(LDPath(), ec) == false) { Question = "RootFS not found. Do you want to try and download one?"; - } - else { + } else { Question = "RootFS is already in use. Do you want to check the download list?"; } @@ -1225,15 +1146,14 @@ int main(int argc, char **argv, char **const envp) { int32_t DistroIndex = AskForDistroSelection(Targets); if (DistroIndex != -1) { - const auto &Target = Targets[DistroIndex]; + const auto& Target = Targets[DistroIndex]; fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/"; auto filename = Target.URL.substr(Target.URL.find_last_of('/') + 1); auto PathName = RootFS + filename; if (!ValidateCheckExists(Target)) { // Keep going - } - else { + } else { auto ValidateDownload = [&Target, &PathName]() -> std::pair { std::error_code ec; if (ValidateDownloadSelection(Target)) { @@ -1241,20 +1161,17 @@ int main(int argc, char **argv, char **const envp) { if (std::filesystem::exists(PathName, ec)) { auto Res = XXFileHash::HashFile(PathName); - if (Res.first == false || - Res.second != ExpectedHash) { + if (Res.first == false || Res.second != ExpectedHash) { fextl::string Text = fextl::fmt::format("Couldn't hash the rootfs or hash didn't match\n"); Text += fmt::format("Hash {:x} != Expected Hash {:x}\n", Res.second, ExpectedHash); ExecWithInfo(Text); return std::make_pair(-1, true); } - } - else { + } else { ExecWithInfo("Correctly downloaded RootFS but doesn't exist?"); return std::make_pair(-1, false); } - } - else { + } else { ExecWithInfo("Couldn't download rootfs for some reason."); return std::make_pair(-1, false); } @@ -1262,64 +1179,58 @@ int main(int argc, char **argv, char **const envp) { return std::make_pair(0, false); }; - std::pair Result{}; - while ((Result = ValidateDownload()).second == true && - Result.first == -1) { + std::pair Result {}; + while ((Result = ValidateDownload()).second == true && Result.first == -1) { if (AskForConfirmation("Do you want to try downloading the RootFS again?")) { // Continue the loop - } - else { + } else { // Didn't want to retry, just exit now return Result.first; } } // Early exit on other errors - if (Result.first == -1 && - Result.second == false) { + if (Result.first == -1 && Result.second == false) { return Result.first; } } struct ExtractStrings { - char const *ExtractOrAsIs; - char const *AsIsSinceMounterNonFunctional; - char const *AsIsSinceExtractorNonFunctional; - char const *AsIsSinceNothingWorks; + const char* ExtractOrAsIs; + const char* AsIsSinceMounterNonFunctional; + const char* AsIsSinceExtractorNonFunctional; + const char* AsIsSinceNothingWorks; }; ArgOptions::CompressedImageOption UseImageAs {ArgOptions::CompressedUsageOption}; - bool HasExtractor{}; - bool HasMounter{}; - std::function ExtractHelper; + bool HasExtractor {}; + bool HasMounter {}; + std::function ExtractHelper; ExtractStrings ExtractingStrings; if (Target.Type == WebFileFetcher::FileTargets::FileType::TYPE_SQUASHFS) { HasExtractor = WorkingAppsTester::Has_Unsquashfs; HasMounter = WorkingAppsTester::Has_Squashfuse; ExtractHelper = UnSquash::UnsquashRootFS; - ExtractingStrings = - { - "Do you wish to extract the squashfs file or use it as-is?", - "Squashfuse doesn't work. Do you wish to extract the squashfs file?", - "Unsquashfs doesn't work. Do you want to use the squashfs file as-is?", - "Unsquashfs and squashfuse isn't working. Leaving rootfs as-is", - }; - } - else if (Target.Type == WebFileFetcher::FileTargets::FileType::TYPE_EROFS) { + ExtractingStrings = { + "Do you wish to extract the squashfs file or use it as-is?", + "Squashfuse doesn't work. Do you wish to extract the squashfs file?", + "Unsquashfs doesn't work. Do you want to use the squashfs file as-is?", + "Unsquashfs and squashfuse isn't working. Leaving rootfs as-is", + }; + } else if (Target.Type == WebFileFetcher::FileTargets::FileType::TYPE_EROFS) { HasExtractor = WorkingAppsTester::Has_EroFSFsck; HasMounter = WorkingAppsTester::Has_EroFSFuse; ExtractHelper = UnSquash::ExtractEroFS; - ExtractingStrings = - { - "Do you wish to extract the erofs file or use it as-is?", - "erofsfuse doesn't work. Do you wish to extract the erofs file?", - "Extracting erofs doesn't work. Do you want to use the erofs file as-is?", - "Extracting erofs and erofsfuse isn't working. Leaving rootfs as-is", - }; + ExtractingStrings = { + "Do you wish to extract the erofs file or use it as-is?", + "erofsfuse doesn't work. Do you wish to extract the erofs file?", + "Extracting erofs doesn't work. Do you want to use the erofs file as-is?", + "Extracting erofs and erofsfuse isn't working. Leaving rootfs as-is", + }; } - int32_t Result{}; + int32_t Result {}; std::vector Args = { "Extract", "As-Is", @@ -1331,20 +1242,17 @@ int main(int argc, char **argv, char **const envp) { Result = AskForConfirmationList(ExtractingStrings.ExtractOrAsIs, Args); if (Result == 0) { UseImageAs = ArgOptions::CompressedImageOption::OPTION_EXTRACT; - } - else if (Result == 1) { + } else if (Result == 1) { UseImageAs = ArgOptions::CompressedImageOption::OPTION_ASIS; } - } - else { + } else { Args.pop_back(); Result = AskForConfirmationList(ExtractingStrings.AsIsSinceMounterNonFunctional, Args); if (Result == 0) { UseImageAs = ArgOptions::CompressedImageOption::OPTION_EXTRACT; } } - } - else { + } else { if (HasMounter) { Args.erase(Args.begin()); Result = AskForConfirmationList(ExtractingStrings.AsIsSinceExtractorNonFunctional, Args); @@ -1352,8 +1260,7 @@ int main(int argc, char **argv, char **const envp) { // We removed an argument, Just change "As-Is" from 0 to 1 for later logic to work UseImageAs = ArgOptions::CompressedImageOption::OPTION_ASIS; } - } - else { + } else { Args.erase(Args.begin()); ExecWithInfo(ExtractingStrings.AsIsSinceNothingWorks); UseImageAs = ArgOptions::CompressedImageOption::OPTION_ASIS; diff --git a/Source/Tools/FEXRootFSFetcher/XXFileHash.cpp b/Source/Tools/FEXRootFSFetcher/XXFileHash.cpp index 93f7a37b54..4fd11a678c 100644 --- a/Source/Tools/FEXRootFSFetcher/XXFileHash.cpp +++ b/Source/Tools/FEXRootFSFetcher/XXFileHash.cpp @@ -9,66 +9,66 @@ #include namespace XXFileHash { - // 32MB blocks - constexpr static size_t BLOCK_SIZE = 32 * 1024 * 1024; - std::pair HashFile(const fextl::string &Filepath) { - int fd = open(Filepath.c_str(), O_RDONLY); - if (fd == -1) { - return {false, 0}; - } +// 32MB blocks +constexpr static size_t BLOCK_SIZE = 32 * 1024 * 1024; +std::pair HashFile(const fextl::string& Filepath) { + int fd = open(Filepath.c_str(), O_RDONLY); + if (fd == -1) { + return {false, 0}; + } - auto HadError = [fd]() -> std::pair { - close(fd); - return {false, 0}; - }; - // Get file size - off_t Size = lseek(fd, 0, SEEK_END); - double SizeD = Size; + auto HadError = [fd]() -> std::pair { + close(fd); + return {false, 0}; + }; + // Get file size + off_t Size = lseek(fd, 0, SEEK_END); + double SizeD = Size; - // Reset to beginning - lseek(fd, 0, SEEK_SET); + // Reset to beginning + lseek(fd, 0, SEEK_SET); - // Set up XXHash state - XXH3_state_t* const State = XXH3_createState(); - XXH64_hash_t const Seed = 0; + // Set up XXHash state + XXH3_state_t* const State = XXH3_createState(); + const XXH64_hash_t Seed = 0; - if (!State) { - return HadError(); - } + if (!State) { + return HadError(); + } - if (XXH3_64bits_reset_withSeed(State, Seed) == XXH_ERROR) { - return HadError(); - } + if (XXH3_64bits_reset_withSeed(State, Seed) == XXH_ERROR) { + return HadError(); + } - std::vector Data(BLOCK_SIZE); - off_t CurrentOffset = 0; - auto Now = std::chrono::high_resolution_clock::now(); + std::vector Data(BLOCK_SIZE); + off_t CurrentOffset = 0; + auto Now = std::chrono::high_resolution_clock::now(); - // Let the kernel know that we will be reading linearly - posix_fadvise(fd, 0, Size, POSIX_FADV_SEQUENTIAL); - while (CurrentOffset < Size) { + // Let the kernel know that we will be reading linearly + posix_fadvise(fd, 0, Size, POSIX_FADV_SEQUENTIAL); + while (CurrentOffset < Size) { - ssize_t Result = pread(fd, Data.data(), BLOCK_SIZE, CurrentOffset); - if (Result == -1) { - return HadError(); - } + ssize_t Result = pread(fd, Data.data(), BLOCK_SIZE, CurrentOffset); + if (Result == -1) { + return HadError(); + } - if (XXH3_64bits_update(State, Data.data(), Result) == XXH_ERROR) { - return HadError(); - } - auto Cur = std::chrono::high_resolution_clock::now(); - auto Dur = Cur - Now; - if (Dur >= std::chrono::seconds(1)) { - fmt::print("{:.2}% hashed\n", (double)CurrentOffset / SizeD * 100.0); - Now = Cur; - } - CurrentOffset += Result; + if (XXH3_64bits_update(State, Data.data(), Result) == XXH_ERROR) { + return HadError(); } + auto Cur = std::chrono::high_resolution_clock::now(); + auto Dur = Cur - Now; + if (Dur >= std::chrono::seconds(1)) { + fmt::print("{:.2}% hashed\n", (double)CurrentOffset / SizeD * 100.0); + Now = Cur; + } + CurrentOffset += Result; + } - XXH64_hash_t const Hash = XXH3_64bits_digest(State); - XXH3_freeState(State); + const XXH64_hash_t Hash = XXH3_64bits_digest(State); + XXH3_freeState(State); - close(fd); - return {true, Hash}; - } + close(fd); + return {true, Hash}; } +} // namespace XXFileHash diff --git a/Source/Tools/FEXServer/ArgumentLoader.cpp b/Source/Tools/FEXServer/ArgumentLoader.cpp index b27540299f..c7c1aa252a 100644 --- a/Source/Tools/FEXServer/ArgumentLoader.cpp +++ b/Source/Tools/FEXServer/ArgumentLoader.cpp @@ -7,50 +7,32 @@ #include namespace FEXServer::Config { - static fextl::string Version = "FEX-Emu (" GIT_DESCRIBE_STRING ") "; - - FEXServerOptions Load(int argc, char **argv) { - FEXServerOptions FEXOptions{}; - optparse::OptionParser Parser = optparse::OptionParser() - .version(Version); - - Parser.add_option("-k", "--kill") - .action("store_true") - .set_default(false) - .help("Shutdown an already active FEXServer"); - - Parser.add_option("-f", "--foreground") - .action("store_true") - .set_default(false) - .help("Run this FEXServer in the foreground"); - - Parser.add_option("-p", "--persistent") - .action("store") - .type("int") - .set_default(0) - .set_optional_value(true) - .metavar("n") - .help("Make FEXServer persistent. Optional number of seconds"); - - Parser.add_option("-w", "--wait") - .action("store_true") - .set_default(false) - .help("Wait for the FEXServer to shutdown"); - - Parser.add_option("-v") - .action("version") - .help("Version string"); - - optparse::Values Options = Parser.parse_args(argc, argv); - - FEXOptions.Kill = Options.get("kill"); - FEXOptions.Foreground = Options.get("foreground"); - FEXOptions.Wait = Options.get("wait"); - if (FEXOptions.Wait) { - FEXOptions.Foreground = true; - } - FEXOptions.PersistentTimeout = Options.get("persistent"); - - return FEXOptions; +static fextl::string Version = "FEX-Emu (" GIT_DESCRIBE_STRING ") "; + +FEXServerOptions Load(int argc, char** argv) { + FEXServerOptions FEXOptions {}; + optparse::OptionParser Parser = optparse::OptionParser().version(Version); + + Parser.add_option("-k", "--kill").action("store_true").set_default(false).help("Shutdown an already active FEXServer"); + + Parser.add_option("-f", "--foreground").action("store_true").set_default(false).help("Run this FEXServer in the foreground"); + + Parser.add_option("-p", "--persistent").action("store").type("int").set_default(0).set_optional_value(true).metavar("n").help("Make FEXServer persistent. Optional number of seconds"); + + Parser.add_option("-w", "--wait").action("store_true").set_default(false).help("Wait for the FEXServer to shutdown"); + + Parser.add_option("-v").action("version").help("Version string"); + + optparse::Values Options = Parser.parse_args(argc, argv); + + FEXOptions.Kill = Options.get("kill"); + FEXOptions.Foreground = Options.get("foreground"); + FEXOptions.Wait = Options.get("wait"); + if (FEXOptions.Wait) { + FEXOptions.Foreground = true; } + FEXOptions.PersistentTimeout = Options.get("persistent"); + + return FEXOptions; } +} // namespace FEXServer::Config diff --git a/Source/Tools/FEXServer/Logger.cpp b/Source/Tools/FEXServer/Logger.cpp index 644909651c..6c57352f74 100644 --- a/Source/Tools/FEXServer/Logger.cpp +++ b/Source/Tools/FEXServer/Logger.cpp @@ -8,142 +8,135 @@ #include namespace Logging { - void ClientMsgHandler(int FD, uint64_t Timestamp, uint32_t PID, uint32_t TID, uint32_t Level, const char* Msg); +void ClientMsgHandler(int FD, uint64_t Timestamp, uint32_t PID, uint32_t TID, uint32_t Level, const char* Msg); } namespace Logger { - std::vector PollFDs{}; - std::mutex IncomingPollFDsLock{}; - std::vector IncomingPollFDs{}; - std::thread LogThread; - std::atomic ShouldShutdown {false}; - std::atomic LoggerThreadTID{}; - - void HandleLogData(int Socket) { - std::vector Data(1500); - size_t CurrentRead{}; - while (true) { - int Read = read(Socket, &Data.at(CurrentRead), Data.size() - CurrentRead); - if (Read > 0) { - CurrentRead += Read; - if (CurrentRead == Data.size()) { - Data.resize(Data.size() << 1); - } - else { - // No more to read - break; - } - } - else { - if (errno == EWOULDBLOCK) { - // no error - } - else { - perror("read"); - } +std::vector PollFDs {}; +std::mutex IncomingPollFDsLock {}; +std::vector IncomingPollFDs {}; +std::thread LogThread; +std::atomic ShouldShutdown {false}; +std::atomic LoggerThreadTID {}; + +void HandleLogData(int Socket) { + std::vector Data(1500); + size_t CurrentRead {}; + while (true) { + int Read = read(Socket, &Data.at(CurrentRead), Data.size() - CurrentRead); + if (Read > 0) { + CurrentRead += Read; + if (CurrentRead == Data.size()) { + Data.resize(Data.size() << 1); + } else { + // No more to read break; } + } else { + if (errno == EWOULDBLOCK) { + // no error + } else { + perror("read"); + } + break; } + } - size_t CurrentOffset{}; - while (CurrentOffset < CurrentRead) { - FEXServerClient::Logging::PacketHeader *Header = reinterpret_cast(&Data[CurrentOffset]); - if (Header->PacketType == FEXServerClient::Logging::PacketTypes::TYPE_MSG) { - FEXServerClient::Logging::PacketMsg *Msg = reinterpret_cast(&Data[CurrentOffset]); - const char *MsgText = reinterpret_cast(&Data[CurrentOffset + sizeof(FEXServerClient::Logging::PacketMsg)]); - Logging::ClientMsgHandler(Socket, Msg->Header.Timestamp, Msg->Header.PID, Msg->Header.TID, Msg->Level, MsgText); - - CurrentOffset += sizeof(FEXServerClient::Logging::PacketMsg) + Msg->MessageLength; - } - else { - CurrentOffset = CurrentRead; - } + size_t CurrentOffset {}; + while (CurrentOffset < CurrentRead) { + FEXServerClient::Logging::PacketHeader* Header = reinterpret_cast(&Data[CurrentOffset]); + if (Header->PacketType == FEXServerClient::Logging::PacketTypes::TYPE_MSG) { + FEXServerClient::Logging::PacketMsg* Msg = reinterpret_cast(&Data[CurrentOffset]); + const char* MsgText = reinterpret_cast(&Data[CurrentOffset + sizeof(FEXServerClient::Logging::PacketMsg)]); + Logging::ClientMsgHandler(Socket, Msg->Header.Timestamp, Msg->Header.PID, Msg->Header.TID, Msg->Level, MsgText); + + CurrentOffset += sizeof(FEXServerClient::Logging::PacketMsg) + Msg->MessageLength; + } else { + CurrentOffset = CurrentRead; } } +} - void LogThreadFunc() { - LoggerThreadTID = FHU::Syscalls::gettid(); +void LogThreadFunc() { + LoggerThreadTID = FHU::Syscalls::gettid(); - while (!ShouldShutdown) { - struct timespec ts{}; - ts.tv_sec = 5; + while (!ShouldShutdown) { + struct timespec ts {}; + ts.tv_sec = 5; - { - std::unique_lock lk {IncomingPollFDsLock}; - PollFDs.insert(PollFDs.end(), std::make_move_iterator(IncomingPollFDs.begin()), std::make_move_iterator(IncomingPollFDs.end())); - IncomingPollFDs.clear(); - } - if (PollFDs.size() == 0) { - pselect(0, nullptr, nullptr, nullptr, &ts, nullptr); - } - else { - int Result = ppoll(&PollFDs.at(0), PollFDs.size(), &ts, nullptr); - if (Result > 0) { - // Walk the FDs and see if we got any results - for (auto it = PollFDs.begin(); it != PollFDs.end(); ) { - bool Erase{}; - if (it->revents != 0) { - if (it->revents & POLLIN) { - // Data from the socket - HandleLogData(it->fd); - } - else if (it->revents & (POLLHUP | POLLERR | POLLNVAL | POLLRDHUP)) { - // Error or hangup, close the socket and erase it from our list - Erase = true; - close(it->fd); - } - - it->revents = 0; - --Result; + { + std::unique_lock lk {IncomingPollFDsLock}; + PollFDs.insert(PollFDs.end(), std::make_move_iterator(IncomingPollFDs.begin()), std::make_move_iterator(IncomingPollFDs.end())); + IncomingPollFDs.clear(); + } + if (PollFDs.size() == 0) { + pselect(0, nullptr, nullptr, nullptr, &ts, nullptr); + } else { + int Result = ppoll(&PollFDs.at(0), PollFDs.size(), &ts, nullptr); + if (Result > 0) { + // Walk the FDs and see if we got any results + for (auto it = PollFDs.begin(); it != PollFDs.end();) { + bool Erase {}; + if (it->revents != 0) { + if (it->revents & POLLIN) { + // Data from the socket + HandleLogData(it->fd); + } else if (it->revents & (POLLHUP | POLLERR | POLLNVAL | POLLRDHUP)) { + // Error or hangup, close the socket and erase it from our list + Erase = true; + close(it->fd); } - if (Erase) { - it = PollFDs.erase(it); - } - else { - ++it; - } + it->revents = 0; + --Result; + } - if (Result == 0) { - // Early break if we've consumed all the results - break; - } + if (Erase) { + it = PollFDs.erase(it); + } else { + ++it; + } + + if (Result == 0) { + // Early break if we've consumed all the results + break; } } } } } +} - void StartLogThread() { - LogThread = std::thread(LogThreadFunc); - } - - void AppendLogFD(int FD) { - { - std::unique_lock lk {IncomingPollFDsLock}; - IncomingPollFDs.emplace_back(pollfd { - .fd = FD, - .events = POLLIN, - .revents = 0, - }); - } +void StartLogThread() { + LogThread = std::thread(LogThreadFunc); +} - // Wake up the thread immediately - FHU::Syscalls::tgkill(::getpid(), LoggerThreadTID, SIGUSR1); +void AppendLogFD(int FD) { + { + std::unique_lock lk {IncomingPollFDsLock}; + IncomingPollFDs.emplace_back(pollfd { + .fd = FD, + .events = POLLIN, + .revents = 0, + }); } - bool LogThreadRunning() { - return LogThread.joinable(); - } + // Wake up the thread immediately + FHU::Syscalls::tgkill(::getpid(), LoggerThreadTID, SIGUSR1); +} + +bool LogThreadRunning() { + return LogThread.joinable(); +} - void Shutdown() { - ShouldShutdown = true; +void Shutdown() { + ShouldShutdown = true; - // Wake up the thread immediately - FHU::Syscalls::tgkill(::getpid(), LoggerThreadTID, SIGUSR1); + // Wake up the thread immediately + FHU::Syscalls::tgkill(::getpid(), LoggerThreadTID, SIGUSR1); - if (LogThread.joinable()) { - LogThread.join(); - } + if (LogThread.joinable()) { + LogThread.join(); } } +} // namespace Logger diff --git a/Source/Tools/FEXServer/Main.cpp b/Source/Tools/FEXServer/Main.cpp index fae9238226..9cdb6c8167 100644 --- a/Source/Tools/FEXServer/Main.cpp +++ b/Source/Tools/FEXServer/Main.cpp @@ -25,79 +25,78 @@ #include namespace Logging { - void MsgHandler(LogMan::DebugLevels Level, char const *Message) { - const auto Output = fmt::format("[{}] {}\n", LogMan::DebugLevelStr(Level), Message); - write(STDOUT_FILENO, Output.c_str(), Output.size()); - } +void MsgHandler(LogMan::DebugLevels Level, const char* Message) { + const auto Output = fmt::format("[{}] {}\n", LogMan::DebugLevelStr(Level), Message); + write(STDOUT_FILENO, Output.c_str(), Output.size()); +} - void AssertHandler(char const *Message) { - const auto Output = fmt::format("[ASSERT] {}\n", Message); - write(STDOUT_FILENO, Output.c_str(), Output.size()); - } +void AssertHandler(const char* Message) { + const auto Output = fmt::format("[ASSERT] {}\n", Message); + write(STDOUT_FILENO, Output.c_str(), Output.size()); +} - void ClientMsgHandler(int FD, uint64_t Timestamp, uint32_t PID, uint32_t TID, uint32_t Level, const char* Msg) { - const auto Output = fmt::format("[{}][{}][{}.{}] {}\n", LogMan::DebugLevelStr(Level), Timestamp, PID, TID, Msg); - write(STDERR_FILENO, Output.c_str(), Output.size()); - } +void ClientMsgHandler(int FD, uint64_t Timestamp, uint32_t PID, uint32_t TID, uint32_t Level, const char* Msg) { + const auto Output = fmt::format("[{}][{}][{}.{}] {}\n", LogMan::DebugLevelStr(Level), Timestamp, PID, TID, Msg); + write(STDERR_FILENO, Output.c_str(), Output.size()); } +} // namespace Logging namespace { - void ActionHandler(int sig, siginfo_t *info, void *context) { - // FEX_TODO("Fix this"); - if (sig == SIGINT) { - // Someone trying to kill us. Shutdown. - ProcessPipe::Shutdown(); - return; - } - _exit(1); +void ActionHandler(int sig, siginfo_t* info, void* context) { + // FEX_TODO("Fix this"); + if (sig == SIGINT) { + // Someone trying to kill us. Shutdown. + ProcessPipe::Shutdown(); + return; } + _exit(1); +} - void ActionIgnore(int sig, siginfo_t *info, void *context) { - } +void ActionIgnore(int sig, siginfo_t* info, void* context) {} - void SetupSignals() { - // Setup our signal handlers now so we can capture some events - struct sigaction act{}; - act.sa_sigaction = ActionHandler; - act.sa_flags = SA_SIGINFO; - - // SIGTERM if something is trying to terminate us - sigaction(SIGTERM, &act, nullptr); - // SIGINT if something is trying to terminate us - sigaction(SIGINT, &act, nullptr); - - // SIGUSR1 just to interrupt syscalls - act.sa_sigaction = ActionIgnore; - sigaction(SIGUSR1, &act, nullptr); - - // Ignore SIGPIPE, we will be checking for pipe closure which could send this signal - signal(SIGPIPE, SIG_IGN); - // SIGCHLD if squashfuse exits early. - // Ignore it for now - signal(SIGCHLD, SIG_IGN); - } +void SetupSignals() { + // Setup our signal handlers now so we can capture some events + struct sigaction act {}; + act.sa_sigaction = ActionHandler; + act.sa_flags = SA_SIGINFO; - /** - * @brief Deparents itself by forking and terminating the parent process. - */ - void DeparentSelf() { - auto SystemdEnv = getenv("INVOCATION_ID"); - if (SystemdEnv) { - // If FEXServer was launched through systemd then don't deparent, otherwise systemd kills the entire server. - return; - } + // SIGTERM if something is trying to terminate us + sigaction(SIGTERM, &act, nullptr); + // SIGINT if something is trying to terminate us + sigaction(SIGINT, &act, nullptr); - pid_t pid = fork(); + // SIGUSR1 just to interrupt syscalls + act.sa_sigaction = ActionIgnore; + sigaction(SIGUSR1, &act, nullptr); - if (pid != 0) { - // Parent is leaving to force this process to deparent itself - // This lets this process become the child of whatever the reaper parent is - _exit(0); - } + // Ignore SIGPIPE, we will be checking for pipe closure which could send this signal + signal(SIGPIPE, SIG_IGN); + // SIGCHLD if squashfuse exits early. + // Ignore it for now + signal(SIGCHLD, SIG_IGN); +} + +/** + * @brief Deparents itself by forking and terminating the parent process. + */ +void DeparentSelf() { + auto SystemdEnv = getenv("INVOCATION_ID"); + if (SystemdEnv) { + // If FEXServer was launched through systemd then don't deparent, otherwise systemd kills the entire server. + return; + } + + pid_t pid = fork(); + + if (pid != 0) { + // Parent is leaving to force this process to deparent itself + // This lets this process become the child of whatever the reaper parent is + _exit(0); } } +} // namespace -int main(int argc, char **argv, char **const envp) { +int main(int argc, char** argv, char** const envp) { auto Options = FEXServer::Config::Load(argc, argv); SetupSignals(); @@ -115,12 +114,7 @@ int main(int argc, char **argv, char **const envp) { DeparentSelf(); } - FEX::Config::LoadConfig( - true, - false, - argc, argv, envp, - false, {} - ); + FEX::Config::LoadConfig(true, false, argc, argv, envp, false, {}); // Reload the meta layer FEXCore::Config::ReloadMetaLayer(); @@ -139,7 +133,8 @@ int main(int argc, char **argv, char **const envp) { PollFD.events = POLLIN | POLLOUT | POLLRDHUP | POLLERR | POLLHUP | POLLNVAL; // Wait for a result on the pipe that isn't EINTR - while (poll(&PollFD, 1, -1) == -1 && errno == EINTR); + while (poll(&PollFD, 1, -1) == -1 && errno == EINTR) + ; LogMan::Msg::IFmt("[FEXServer] FEXServer shutdown"); } diff --git a/Source/Tools/FEXServer/PipeScanner.cpp b/Source/Tools/FEXServer/PipeScanner.cpp index 15dd72d413..fdbc844a93 100644 --- a/Source/Tools/FEXServer/PipeScanner.cpp +++ b/Source/Tools/FEXServer/PipeScanner.cpp @@ -7,44 +7,44 @@ #include namespace PipeScanner { - std::vector IncomingPipes{}; +std::vector IncomingPipes {}; - // Scan and store any pipe files. - // This will capture all pipe files so needs to be executed early. - // This ensures we find any pipe files from execve for waiting FEXInterpreters. - void ScanForPipes() { - DIR *fd = opendir("/proc/self/fd"); - if (fd) { - struct dirent *dir{}; - do { - dir = readdir(fd); - if (dir) { - char *end{}; - int open_fd = std::strtol(dir->d_name, &end, 8); - if (end != dir->d_name) { - struct stat stat{}; - int result = fstat(open_fd, &stat); - if (result == -1) { - continue; - } - if (stat.st_mode & S_IFIFO) { - // Close any incoming pipes - IncomingPipes.emplace_back(open_fd); - } +// Scan and store any pipe files. +// This will capture all pipe files so needs to be executed early. +// This ensures we find any pipe files from execve for waiting FEXInterpreters. +void ScanForPipes() { + DIR* fd = opendir("/proc/self/fd"); + if (fd) { + struct dirent* dir {}; + do { + dir = readdir(fd); + if (dir) { + char* end {}; + int open_fd = std::strtol(dir->d_name, &end, 8); + if (end != dir->d_name) { + struct stat stat {}; + int result = fstat(open_fd, &stat); + if (result == -1) { + continue; + } + if (stat.st_mode & S_IFIFO) { + // Close any incoming pipes + IncomingPipes.emplace_back(open_fd); } } - } while (dir); + } + } while (dir); - closedir(fd); - } + closedir(fd); } +} - void ClosePipes() { - for (auto pipe : IncomingPipes) { - int Null{0}; - write(pipe, &Null, sizeof(Null)); - close(pipe); - } - IncomingPipes.clear(); +void ClosePipes() { + for (auto pipe : IncomingPipes) { + int Null {0}; + write(pipe, &Null, sizeof(Null)); + close(pipe); } + IncomingPipes.clear(); } +} // namespace PipeScanner diff --git a/Source/Tools/FEXServer/ProcessPipe.cpp b/Source/Tools/FEXServer/ProcessPipe.cpp index 2be8cd2710..69b52b5687 100644 --- a/Source/Tools/FEXServer/ProcessPipe.cpp +++ b/Source/Tools/FEXServer/ProcessPipe.cpp @@ -16,133 +16,102 @@ #include namespace ProcessPipe { - constexpr int USER_PERMS = S_IRWXU | S_IRWXG | S_IRWXO; - int ServerLockFD {-1}; - int ServerSocketFD{-1}; - std::atomic ShouldShutdown {false}; - time_t RequestTimeout {10}; - bool Foreground {false}; - std::vector PollFDs{}; - - // FD count watching - constexpr size_t static MAX_FD_DISTANCE = 32; - rlimit MaxFDs{}; - std::atomic NumFilesOpened{}; - - size_t GetNumFilesOpen() { - // Walk /proc/self/fd/ to see how many open files we currently have - const std::filesystem::path self{"/proc/self/fd/"}; - - return std::distance(std::filesystem::directory_iterator{self}, std::filesystem::directory_iterator{}); +constexpr int USER_PERMS = S_IRWXU | S_IRWXG | S_IRWXO; +int ServerLockFD {-1}; +int ServerSocketFD {-1}; +std::atomic ShouldShutdown {false}; +time_t RequestTimeout {10}; +bool Foreground {false}; +std::vector PollFDs {}; + +// FD count watching +constexpr size_t static MAX_FD_DISTANCE = 32; +rlimit MaxFDs {}; +std::atomic NumFilesOpened {}; + +size_t GetNumFilesOpen() { + // Walk /proc/self/fd/ to see how many open files we currently have + const std::filesystem::path self {"/proc/self/fd/"}; + + return std::distance(std::filesystem::directory_iterator {self}, std::filesystem::directory_iterator {}); +} + +void GetMaxFDs() { + // Get our kernel limit for the number of open files + if (getrlimit(RLIMIT_NOFILE, &MaxFDs) != 0) { + fprintf(stderr, "[FEXMountDaemon] getrlimit(RLIMIT_NOFILE) returned error %d %s\n", errno, strerror(errno)); } - void GetMaxFDs() { - // Get our kernel limit for the number of open files - if (getrlimit(RLIMIT_NOFILE, &MaxFDs) != 0) { - fprintf(stderr, "[FEXMountDaemon] getrlimit(RLIMIT_NOFILE) returned error %d %s\n", errno, strerror(errno)); - } + // Walk /proc/self/fd/ to see how many open files we currently have + NumFilesOpened = GetNumFilesOpen(); +} - // Walk /proc/self/fd/ to see how many open files we currently have - NumFilesOpened = GetNumFilesOpen(); +void CheckRaiseFDLimit() { + if (NumFilesOpened < (MaxFDs.rlim_cur - MAX_FD_DISTANCE)) { + // No need to raise the limit. + return; } - void CheckRaiseFDLimit() { - if (NumFilesOpened < (MaxFDs.rlim_cur - MAX_FD_DISTANCE)) { - // No need to raise the limit. - return; - } - - if (MaxFDs.rlim_cur == MaxFDs.rlim_max) { - fprintf(stderr, "[FEXMountDaemon] Our open FD limit is already set to max and we are wanting to increase it\n"); - fprintf(stderr, "[FEXMountDaemon] FEXMountDaemon will now no longer be able to track new instances of FEX\n"); - fprintf(stderr, "[FEXMountDaemon] Current limit is %zd(hard %zd) FDs and we are at %zd\n", MaxFDs.rlim_cur, MaxFDs.rlim_max, GetNumFilesOpen()); - fprintf(stderr, "[FEXMountDaemon] Ask your administrator to raise your kernel's hard limit on open FDs\n"); - return; - } + if (MaxFDs.rlim_cur == MaxFDs.rlim_max) { + fprintf(stderr, "[FEXMountDaemon] Our open FD limit is already set to max and we are wanting to increase it\n"); + fprintf(stderr, "[FEXMountDaemon] FEXMountDaemon will now no longer be able to track new instances of FEX\n"); + fprintf(stderr, "[FEXMountDaemon] Current limit is %zd(hard %zd) FDs and we are at %zd\n", MaxFDs.rlim_cur, MaxFDs.rlim_max, + GetNumFilesOpen()); + fprintf(stderr, "[FEXMountDaemon] Ask your administrator to raise your kernel's hard limit on open FDs\n"); + return; + } - rlimit NewLimit = MaxFDs; + rlimit NewLimit = MaxFDs; - // Just multiply by two - NewLimit.rlim_cur <<= 1; + // Just multiply by two + NewLimit.rlim_cur <<= 1; - // Now limit to the hard max - NewLimit.rlim_cur = std::min(NewLimit.rlim_cur, NewLimit.rlim_max); + // Now limit to the hard max + NewLimit.rlim_cur = std::min(NewLimit.rlim_cur, NewLimit.rlim_max); - if (setrlimit(RLIMIT_NOFILE, &NewLimit) != 0) { - fprintf(stderr, "[FEXMountDaemon] Couldn't raise FD limit to %zd even though our hard limit is %zd\n", NewLimit.rlim_cur, NewLimit.rlim_max); - } - else { - // Set the new limit - MaxFDs = NewLimit; - } + if (setrlimit(RLIMIT_NOFILE, &NewLimit) != 0) { + fprintf(stderr, "[FEXMountDaemon] Couldn't raise FD limit to %zd even though our hard limit is %zd\n", NewLimit.rlim_cur, NewLimit.rlim_max); + } else { + // Set the new limit + MaxFDs = NewLimit; } +} - bool InitializeServerPipe() { - auto ServerFolder = FEXServerClient::GetServerLockFolder(); - - std::error_code ec{}; - if (!std::filesystem::exists(ServerFolder, ec)) { - // Doesn't exist, create the the folder as a user convenience - if (!std::filesystem::create_directories(ServerFolder, ec)) { - LogMan::Msg::EFmt("Couldn't create server pipe folder at: {}", ServerFolder); - return false; - } - } +bool InitializeServerPipe() { + auto ServerFolder = FEXServerClient::GetServerLockFolder(); - auto ServerLockPath = FEXServerClient::GetServerLockFile(); - - // Now this is some tricky locking logic to ensure that we only ever have one server running - // The logic is as follows: - // - Try to make the lock file - // - If Exists then check to see if it is a stale handle - // - Stale checking means opening the file that we know exists - // - Then we try getting a write lock - // - If we fail to get the write lock, then leave - // - Otherwise continue down the codepath and degrade to read lock - // - Else try to acquire a write lock to ensure only one FEXServer exists - // - // - Once a write lock is acquired, downgrade it to a read lock - // - This ensures that future FEXServers won't race to create multiple read locks - int Ret = open(ServerLockPath.c_str(), O_RDWR | O_CREAT | O_CLOEXEC | O_EXCL, USER_PERMS); - ServerLockFD = Ret; - - if (Ret == -1 && errno == EEXIST) { - // If the lock exists then it might be a stale connection. - // Check the lock status to see if another process is still alive. - ServerLockFD = open(ServerLockPath.c_str(), O_RDWR | O_CLOEXEC, USER_PERMS); - if (ServerLockFD != -1) { - // Now that we have opened the file, try to get a write lock. - flock lk { - .l_type = F_WRLCK, - .l_whence = SEEK_SET, - .l_start = 0, - .l_len = 0, - }; - Ret = fcntl(ServerLockFD, F_SETLK, &lk); - - if (Ret != -1) { - // Write lock was gained, we can now continue onward. - } - else { - // We couldn't get a write lock, this means that another process already owns a lock on the lock - close(ServerLockFD); - ServerLockFD = -1; - return false; - } - } - else { - // File couldn't get opened even though it existed? - // Must have raced something here. - return false; - } - } - else if (Ret == -1) { - // Unhandled error. - LogMan::Msg::EFmt("Unable to create FEXServer named lock file at: {} {} {}", ServerLockPath, errno, strerror(errno)); + std::error_code ec {}; + if (!std::filesystem::exists(ServerFolder, ec)) { + // Doesn't exist, create the the folder as a user convenience + if (!std::filesystem::create_directories(ServerFolder, ec)) { + LogMan::Msg::EFmt("Couldn't create server pipe folder at: {}", ServerFolder); return false; } - else { - // FIFO file was created. Try to get a write lock + } + + auto ServerLockPath = FEXServerClient::GetServerLockFile(); + + // Now this is some tricky locking logic to ensure that we only ever have one server running + // The logic is as follows: + // - Try to make the lock file + // - If Exists then check to see if it is a stale handle + // - Stale checking means opening the file that we know exists + // - Then we try getting a write lock + // - If we fail to get the write lock, then leave + // - Otherwise continue down the codepath and degrade to read lock + // - Else try to acquire a write lock to ensure only one FEXServer exists + // + // - Once a write lock is acquired, downgrade it to a read lock + // - This ensures that future FEXServers won't race to create multiple read locks + int Ret = open(ServerLockPath.c_str(), O_RDWR | O_CREAT | O_CLOEXEC | O_EXCL, USER_PERMS); + ServerLockFD = Ret; + + if (Ret == -1 && errno == EEXIST) { + // If the lock exists then it might be a stale connection. + // Check the lock status to see if another process is still alive. + ServerLockFD = open(ServerLockPath.c_str(), O_RDWR | O_CLOEXEC, USER_PERMS); + if (ServerLockFD != -1) { + // Now that we have opened the file, try to get a write lock. flock lk { .l_type = F_WRLCK, .l_whence = SEEK_SET, @@ -151,17 +120,27 @@ namespace ProcessPipe { }; Ret = fcntl(ServerLockFD, F_SETLK, &lk); - if (Ret == -1) { - // Couldn't get a write lock, something else must have got it + if (Ret != -1) { + // Write lock was gained, we can now continue onward. + } else { + // We couldn't get a write lock, this means that another process already owns a lock on the lock close(ServerLockFD); ServerLockFD = -1; return false; } + } else { + // File couldn't get opened even though it existed? + // Must have raced something here. + return false; } - - // Now that a write lock is held, downgrade it to a read lock + } else if (Ret == -1) { + // Unhandled error. + LogMan::Msg::EFmt("Unable to create FEXServer named lock file at: {} {} {}", ServerLockPath, errno, strerror(errno)); + return false; + } else { + // FIFO file was created. Try to get a write lock flock lk { - .l_type = F_RDLCK, + .l_type = F_WRLCK, .l_whence = SEEK_SET, .l_start = 0, .l_len = 0, @@ -169,381 +148,370 @@ namespace ProcessPipe { Ret = fcntl(ServerLockFD, F_SETLK, &lk); if (Ret == -1) { - // This shouldn't occur - LogMan::Msg::EFmt("Unable to downgrade a write lock to a read lock {} {} {}", ServerLockPath, errno, strerror(errno)); + // Couldn't get a write lock, something else must have got it close(ServerLockFD); ServerLockFD = -1; return false; } - - return true; } - bool InitializeServerSocket() { - auto ServerSocketName = FEXServerClient::GetServerSocketName(); - - // Create the initial unix socket - ServerSocketFD = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); - if (ServerSocketFD == -1) { - LogMan::Msg::EFmt("Couldn't create AF_UNIX socket: {} {}\n", errno, strerror(errno)); - return false; - } - - struct sockaddr_un addr{}; - addr.sun_family = AF_UNIX; - size_t SizeOfSocketString = std::min(ServerSocketName.size() + 1, sizeof(addr.sun_path) - 1); - addr.sun_path[0] = 0; // Abstract AF_UNIX sockets start with \0 - strncpy(addr.sun_path + 1, ServerSocketName.data(), SizeOfSocketString); - // Include final null character. - size_t SizeOfAddr = sizeof(addr.sun_family) + SizeOfSocketString; - - // Bind the socket to the path - int Result = bind(ServerSocketFD, reinterpret_cast(&addr), SizeOfAddr); - if (Result == -1) { - LogMan::Msg::EFmt("Couldn't bind AF_UNIX socket '{}': {} {}\n", addr.sun_path, errno, strerror(errno)); - close(ServerSocketFD); - ServerSocketFD = -1; - return false; - } - - listen(ServerSocketFD, 16); - PollFDs.emplace_back(pollfd { - .fd = ServerSocketFD, - .events = POLLIN, - .revents = 0, - }); - - return true; + // Now that a write lock is held, downgrade it to a read lock + flock lk { + .l_type = F_RDLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0, + }; + Ret = fcntl(ServerLockFD, F_SETLK, &lk); + + if (Ret == -1) { + // This shouldn't occur + LogMan::Msg::EFmt("Unable to downgrade a write lock to a read lock {} {} {}", ServerLockPath, errno, strerror(errno)); + close(ServerLockFD); + ServerLockFD = -1; + return false; } - void SendEmptyErrorPacket(int Socket) { - FEXServerClient::FEXServerResultPacket Res { - .Header { - .Type = FEXServerClient::PacketType::TYPE_ERROR, - }, - }; - - struct iovec iov { - .iov_base = &Res, - .iov_len = sizeof(Res), - }; + return true; +} - struct msghdr msg { - .msg_name = nullptr, - .msg_namelen = 0, - .msg_iov = &iov, - .msg_iovlen = 1, - }; +bool InitializeServerSocket() { + auto ServerSocketName = FEXServerClient::GetServerSocketName(); - sendmsg(Socket, &msg, 0); + // Create the initial unix socket + ServerSocketFD = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (ServerSocketFD == -1) { + LogMan::Msg::EFmt("Couldn't create AF_UNIX socket: {} {}\n", errno, strerror(errno)); + return false; } - void SendFDSuccessPacket(int Socket, int FD) { - FEXServerClient::FEXServerResultPacket Res { - .Header { - .Type = FEXServerClient::PacketType::TYPE_SUCCESS, - }, - }; + struct sockaddr_un addr {}; + addr.sun_family = AF_UNIX; + size_t SizeOfSocketString = std::min(ServerSocketName.size() + 1, sizeof(addr.sun_path) - 1); + addr.sun_path[0] = 0; // Abstract AF_UNIX sockets start with \0 + strncpy(addr.sun_path + 1, ServerSocketName.data(), SizeOfSocketString); + // Include final null character. + size_t SizeOfAddr = sizeof(addr.sun_family) + SizeOfSocketString; + + // Bind the socket to the path + int Result = bind(ServerSocketFD, reinterpret_cast(&addr), SizeOfAddr); + if (Result == -1) { + LogMan::Msg::EFmt("Couldn't bind AF_UNIX socket '{}': {} {}\n", addr.sun_path, errno, strerror(errno)); + close(ServerSocketFD); + ServerSocketFD = -1; + return false; + } - struct iovec iov { - .iov_base = &Res, - .iov_len = sizeof(Res), - }; + listen(ServerSocketFD, 16); + PollFDs.emplace_back(pollfd { + .fd = ServerSocketFD, + .events = POLLIN, + .revents = 0, + }); - struct msghdr msg { - .msg_name = nullptr, - .msg_namelen = 0, - .msg_iov = &iov, - .msg_iovlen = 1, - }; + return true; +} - // Setup the ancillary buffer. This is where we will be getting pipe FDs - // We only need 4 bytes for the FD - constexpr size_t CMSG_SIZE = CMSG_SPACE(sizeof(int)); - union AncillaryBuffer { - struct cmsghdr Header; - uint8_t Buffer[CMSG_SIZE]; - }; - AncillaryBuffer AncBuf{}; +void SendEmptyErrorPacket(int Socket) { + FEXServerClient::FEXServerResultPacket Res { + .Header { + .Type = FEXServerClient::PacketType::TYPE_ERROR, + }, + }; - // Now link to our ancilllary buffer - msg.msg_control = AncBuf.Buffer; - msg.msg_controllen = CMSG_SIZE; + struct iovec iov { + .iov_base = &Res, .iov_len = sizeof(Res), + }; - // Now we need to setup the ancillary buffer data. We are only sending an FD - struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_len = CMSG_LEN(sizeof(int)); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; + struct msghdr msg { + .msg_name = nullptr, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, + }; - // We are giving the daemon the write side of the pipe - memcpy(CMSG_DATA(cmsg), &FD, sizeof(int)); + sendmsg(Socket, &msg, 0); +} - sendmsg(Socket, &msg, 0); - } +void SendFDSuccessPacket(int Socket, int FD) { + FEXServerClient::FEXServerResultPacket Res { + .Header { + .Type = FEXServerClient::PacketType::TYPE_SUCCESS, + }, + }; + + struct iovec iov { + .iov_base = &Res, .iov_len = sizeof(Res), + }; + + struct msghdr msg { + .msg_name = nullptr, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, + }; + + // Setup the ancillary buffer. This is where we will be getting pipe FDs + // We only need 4 bytes for the FD + constexpr size_t CMSG_SIZE = CMSG_SPACE(sizeof(int)); + union AncillaryBuffer { + struct cmsghdr Header; + uint8_t Buffer[CMSG_SIZE]; + }; + AncillaryBuffer AncBuf {}; + + // Now link to our ancilllary buffer + msg.msg_control = AncBuf.Buffer; + msg.msg_controllen = CMSG_SIZE; + + // Now we need to setup the ancillary buffer data. We are only sending an FD + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + + // We are giving the daemon the write side of the pipe + memcpy(CMSG_DATA(cmsg), &FD, sizeof(int)); + + sendmsg(Socket, &msg, 0); +} - void HandleSocketData(int Socket) { - std::vector Data(1500); - size_t CurrentRead{}; +void HandleSocketData(int Socket) { + std::vector Data(1500); + size_t CurrentRead {}; - // Get the current number of FDs of the process before we start handling sockets. - GetMaxFDs(); + // Get the current number of FDs of the process before we start handling sockets. + GetMaxFDs(); - while (true) { - struct iovec iov { - .iov_base = &Data.at(CurrentRead), - .iov_len = Data.size() - CurrentRead, - }; + while (true) { + struct iovec iov { + .iov_base = &Data.at(CurrentRead), .iov_len = Data.size() - CurrentRead, + }; - struct msghdr msg { - .msg_name = nullptr, - .msg_namelen = 0, - .msg_iov = &iov, - .msg_iovlen = 1, - }; + struct msghdr msg { + .msg_name = nullptr, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, + }; - ssize_t Read = recvmsg(Socket, &msg, 0); - if (Read <= msg.msg_iov->iov_len) { - CurrentRead += Read; - if (CurrentRead == Data.size()) { - Data.resize(Data.size() << 1); - } - else { - // No more to read - break; - } - } - else { - if (errno == EWOULDBLOCK) { - // no error - } - else { - perror("read"); - } + ssize_t Read = recvmsg(Socket, &msg, 0); + if (Read <= msg.msg_iov->iov_len) { + CurrentRead += Read; + if (CurrentRead == Data.size()) { + Data.resize(Data.size() << 1); + } else { + // No more to read break; } + } else { + if (errno == EWOULDBLOCK) { + // no error + } else { + perror("read"); + } + break; } + } - size_t CurrentOffset{}; - while (CurrentOffset < CurrentRead) { - FEXServerClient::FEXServerRequestPacket *Req = reinterpret_cast(&Data[CurrentOffset]); - switch (Req->Header.Type) { - case FEXServerClient::PacketType::TYPE_KILL: - ShouldShutdown = true; - CurrentOffset += sizeof(FEXServerClient::FEXServerRequestPacket::BasicRequest); - break; - case FEXServerClient::PacketType::TYPE_GET_LOG_FD: { - if (Logger::LogThreadRunning()) { - int fds[2]{}; - pipe2(fds, 0); - // 0 = Read - // 1 = Write - Logger::AppendLogFD(fds[0]); - - SendFDSuccessPacket(Socket, fds[1]); - - // Close the write side now, doesn't matter to us - close(fds[1]); - - // Check if we need to increase the FD limit. - ++NumFilesOpened; - CheckRaiseFDLimit(); - } - else { - // Log thread isn't running. Let FEXInterpreter know it can't have one. - SendEmptyErrorPacket(Socket); - } - - CurrentOffset += sizeof(FEXServerClient::FEXServerRequestPacket::Header); - break; - } - case FEXServerClient::PacketType::TYPE_GET_ROOTFS_PATH: { - fextl::string MountFolder = SquashFS::GetMountFolder(); - - FEXServerClient::FEXServerResultPacket Res { - .MountPath { - .Header { - .Type = FEXServerClient::PacketType::TYPE_GET_ROOTFS_PATH, - }, - .Length = MountFolder.size() + 1, - }, - }; - - char Null{}; - - iovec iov[3] { - { - .iov_base = &Res, - .iov_len = sizeof(Res), - }, - { - .iov_base = MountFolder.data(), - .iov_len = MountFolder.size(), - }, - { - .iov_base = &Null, - .iov_len = 1, - }, - }; - - struct msghdr msg { - .msg_name = nullptr, - .msg_namelen = 0, - .msg_iov = iov, - .msg_iovlen = 3, - }; - - sendmsg(Socket, &msg, 0); - - CurrentOffset += sizeof(FEXServerClient::FEXServerRequestPacket::BasicRequest); - break; - } - case FEXServerClient::PacketType::TYPE_GET_PID_FD: { - int FD = FHU::Syscalls::pidfd_open(::getpid(), 0); + size_t CurrentOffset {}; + while (CurrentOffset < CurrentRead) { + FEXServerClient::FEXServerRequestPacket* Req = reinterpret_cast(&Data[CurrentOffset]); + switch (Req->Header.Type) { + case FEXServerClient::PacketType::TYPE_KILL: + ShouldShutdown = true; + CurrentOffset += sizeof(FEXServerClient::FEXServerRequestPacket::BasicRequest); + break; + case FEXServerClient::PacketType::TYPE_GET_LOG_FD: { + if (Logger::LogThreadRunning()) { + int fds[2] {}; + pipe2(fds, 0); + // 0 = Read + // 1 = Write + Logger::AppendLogFD(fds[0]); + + SendFDSuccessPacket(Socket, fds[1]); + + // Close the write side now, doesn't matter to us + close(fds[1]); + + // Check if we need to increase the FD limit. + ++NumFilesOpened; + CheckRaiseFDLimit(); + } else { + // Log thread isn't running. Let FEXInterpreter know it can't have one. + SendEmptyErrorPacket(Socket); + } - if (FD < 0) { - // Couldn't get PIDFD due to too old of kernel. - // Return a pipe to track the same information. - // - int fds[2]; - pipe2(fds, O_CLOEXEC); - SendFDSuccessPacket(Socket, fds[0]); + CurrentOffset += sizeof(FEXServerClient::FEXServerRequestPacket::Header); + break; + } + case FEXServerClient::PacketType::TYPE_GET_ROOTFS_PATH: { + fextl::string MountFolder = SquashFS::GetMountFolder(); + + FEXServerClient::FEXServerResultPacket Res { + .MountPath { + .Header { + .Type = FEXServerClient::PacketType::TYPE_GET_ROOTFS_PATH, + }, + .Length = MountFolder.size() + 1, + }, + }; - // Close the read side now, doesn't matter to us - close(fds[0]); + char Null {}; + + iovec iov[3] { + { + .iov_base = &Res, + .iov_len = sizeof(Res), + }, + { + .iov_base = MountFolder.data(), + .iov_len = MountFolder.size(), + }, + { + .iov_base = &Null, + .iov_len = 1, + }, + }; - // Check if we need to increase the FD limit. - ++NumFilesOpened; - CheckRaiseFDLimit(); + struct msghdr msg { + .msg_name = nullptr, .msg_namelen = 0, .msg_iov = iov, .msg_iovlen = 3, + }; - // Write side will naturally close on process exit, letting the other process know we have exited. - } - else { - SendFDSuccessPacket(Socket, FD); + sendmsg(Socket, &msg, 0); - // Close the FD now since we've sent it - close(FD); - } - - CurrentOffset += sizeof(FEXServerClient::FEXServerRequestPacket::Header); - break; - } - // Invalid - case FEXServerClient::PacketType::TYPE_ERROR: - default: - // Something sent us an invalid packet. To ensure we don't spin infinitely, consume all the data. - LogMan::Msg::EFmt("[FEXServer] InvalidPacket size received 0x{:x} bytes", CurrentRead - CurrentOffset); - CurrentOffset = CurrentRead; - break; + CurrentOffset += sizeof(FEXServerClient::FEXServerRequestPacket::BasicRequest); + break; + } + case FEXServerClient::PacketType::TYPE_GET_PID_FD: { + int FD = FHU::Syscalls::pidfd_open(::getpid(), 0); + + if (FD < 0) { + // Couldn't get PIDFD due to too old of kernel. + // Return a pipe to track the same information. + // + int fds[2]; + pipe2(fds, O_CLOEXEC); + SendFDSuccessPacket(Socket, fds[0]); + + // Close the read side now, doesn't matter to us + close(fds[0]); + + // Check if we need to increase the FD limit. + ++NumFilesOpened; + CheckRaiseFDLimit(); + + // Write side will naturally close on process exit, letting the other process know we have exited. + } else { + SendFDSuccessPacket(Socket, FD); + + // Close the FD now since we've sent it + close(FD); } + + CurrentOffset += sizeof(FEXServerClient::FEXServerRequestPacket::Header); + break; + } + // Invalid + case FEXServerClient::PacketType::TYPE_ERROR: + default: + // Something sent us an invalid packet. To ensure we don't spin infinitely, consume all the data. + LogMan::Msg::EFmt("[FEXServer] InvalidPacket size received 0x{:x} bytes", CurrentRead - CurrentOffset); + CurrentOffset = CurrentRead; + break; } } +} - void CloseConnections() { - // Close the server pipe so new processes will know to spin up a new FEXServer. - // This one is closing - close (ServerLockFD); +void CloseConnections() { + // Close the server pipe so new processes will know to spin up a new FEXServer. + // This one is closing + close(ServerLockFD); - // Close the server socket so no more connections can be started - close(ServerSocketFD); - } + // Close the server socket so no more connections can be started + close(ServerSocketFD); +} - void WaitForRequests() { - auto LastDataTime = std::chrono::system_clock::now(); - - while (!ShouldShutdown) { - struct timespec ts{}; - ts.tv_sec = RequestTimeout; - - int Result = ppoll(&PollFDs.at(0), PollFDs.size(), &ts, nullptr); - std::vector NewPollFDs{}; - - if (Result > 0) { - // Walk the FDs and see if we got any results - for (auto it = PollFDs.begin(); it != PollFDs.end(); ) { - auto &Event = *it; - bool Erase{}; - - if (Event.revents != 0) { - if (Event.fd == ServerSocketFD) { - if (Event.revents & POLLIN) { - // If it is the listen socket then we have a new connection - struct sockaddr_storage Addr{}; - socklen_t AddrSize{}; - int NewFD = accept(ServerSocketFD, reinterpret_cast(&Addr), &AddrSize); - - // Add the new client to the temporary array - NewPollFDs.emplace_back(pollfd { - .fd = NewFD, - .events = POLLIN | POLLPRI | POLLRDHUP, - .revents = 0, - }); - } - else if (Event.revents & (POLLHUP | POLLERR | POLLNVAL)) { - // Listen socket error or shutting down - break; - } +void WaitForRequests() { + auto LastDataTime = std::chrono::system_clock::now(); + + while (!ShouldShutdown) { + struct timespec ts {}; + ts.tv_sec = RequestTimeout; + + int Result = ppoll(&PollFDs.at(0), PollFDs.size(), &ts, nullptr); + std::vector NewPollFDs {}; + + if (Result > 0) { + // Walk the FDs and see if we got any results + for (auto it = PollFDs.begin(); it != PollFDs.end();) { + auto& Event = *it; + bool Erase {}; + + if (Event.revents != 0) { + if (Event.fd == ServerSocketFD) { + if (Event.revents & POLLIN) { + // If it is the listen socket then we have a new connection + struct sockaddr_storage Addr {}; + socklen_t AddrSize {}; + int NewFD = accept(ServerSocketFD, reinterpret_cast(&Addr), &AddrSize); + + // Add the new client to the temporary array + NewPollFDs.emplace_back(pollfd { + .fd = NewFD, + .events = POLLIN | POLLPRI | POLLRDHUP, + .revents = 0, + }); + } else if (Event.revents & (POLLHUP | POLLERR | POLLNVAL)) { + // Listen socket error or shutting down + break; } - else { - if (Event.revents & POLLIN) { - // Data from the socket - HandleSocketData(Event.fd); - } - - if (Event.revents & (POLLHUP | POLLERR | POLLNVAL | POLLRDHUP)) { - // Error or hangup, close the socket and erase it from our list - Erase = true; - close(Event.fd); - } + } else { + if (Event.revents & POLLIN) { + // Data from the socket + HandleSocketData(Event.fd); } - Event.revents = 0; - --Result; - } - - if (Erase) { - it = PollFDs.erase(it); - } - else { - ++it; + if (Event.revents & (POLLHUP | POLLERR | POLLNVAL | POLLRDHUP)) { + // Error or hangup, close the socket and erase it from our list + Erase = true; + close(Event.fd); + } } - if (Result == 0) { - // Early break if we've consumed all the results - break; - } + Event.revents = 0; + --Result; } - // Insert the new FDs to poll - PollFDs.insert(PollFDs.begin(), NewPollFDs.begin(), NewPollFDs.end()); + if (Erase) { + it = PollFDs.erase(it); + } else { + ++it; + } - LastDataTime = std::chrono::system_clock::now(); - } - else { - auto Now = std::chrono::system_clock::now(); - auto Diff = Now - LastDataTime; - if (Diff >= std::chrono::seconds(RequestTimeout) && - !Foreground && - PollFDs.size() == 1) { - // If we aren't running in the foreground and we have no connections after a timeout - // Then we can just go ahead and leave - ShouldShutdown = true; - LogMan::Msg::DFmt("[FEXServer] Shutting Down"); + if (Result == 0) { + // Early break if we've consumed all the results + break; } } - } - CloseConnections(); - } - - void SetConfiguration(bool Foreground, uint32_t PersistentTimeout) { - ProcessPipe::Foreground = Foreground; - ProcessPipe::RequestTimeout = PersistentTimeout; + // Insert the new FDs to poll + PollFDs.insert(PollFDs.begin(), NewPollFDs.begin(), NewPollFDs.end()); + + LastDataTime = std::chrono::system_clock::now(); + } else { + auto Now = std::chrono::system_clock::now(); + auto Diff = Now - LastDataTime; + if (Diff >= std::chrono::seconds(RequestTimeout) && !Foreground && PollFDs.size() == 1) { + // If we aren't running in the foreground and we have no connections after a timeout + // Then we can just go ahead and leave + ShouldShutdown = true; + LogMan::Msg::DFmt("[FEXServer] Shutting Down"); + } + } } - void Shutdown() { - ShouldShutdown = true; - } + CloseConnections(); } +void SetConfiguration(bool Foreground, uint32_t PersistentTimeout) { + ProcessPipe::Foreground = Foreground; + ProcessPipe::RequestTimeout = PersistentTimeout; +} +void Shutdown() { + ShouldShutdown = true; +} +} // namespace ProcessPipe diff --git a/Source/Tools/FEXServer/SquashFS.cpp b/Source/Tools/FEXServer/SquashFS.cpp index 430ad38839..62e976c290 100644 --- a/Source/Tools/FEXServer/SquashFS.cpp +++ b/Source/Tools/FEXServer/SquashFS.cpp @@ -14,59 +14,28 @@ namespace SquashFS { - constexpr int USER_PERMS = S_IRWXU | S_IRWXG | S_IRWXO; - int ServerRootFSLockFD {-1}; - int FuseMountPID{}; - fextl::string MountFolder{}; - - void ShutdownImagePID() { - if (FuseMountPID) { - FHU::Syscalls::tgkill(FuseMountPID, FuseMountPID, SIGINT); - } +constexpr int USER_PERMS = S_IRWXU | S_IRWXG | S_IRWXO; +int ServerRootFSLockFD {-1}; +int FuseMountPID {}; +fextl::string MountFolder {}; + +void ShutdownImagePID() { + if (FuseMountPID) { + FHU::Syscalls::tgkill(FuseMountPID, FuseMountPID, SIGINT); } +} - bool InitializeSquashFSPipe() { - auto RootFSLockFile = FEXServerClient::GetServerRootFSLockFile(); - - int Ret = open(RootFSLockFile.c_str(), O_CREAT | O_RDWR | O_TRUNC | O_EXCL | O_CLOEXEC, USER_PERMS); - ServerRootFSLockFD = Ret; - if (Ret == -1 && errno == EEXIST) { - // If the fifo exists then it might be a stale connection. - // Check the lock status to see if another process is still alive. - ServerRootFSLockFD = open(RootFSLockFile.c_str(), O_RDWR | O_CLOEXEC, USER_PERMS); - if (ServerRootFSLockFD != -1) { - // Now that we have opened the file, try to get a write lock. - flock lk { - .l_type = F_WRLCK, - .l_whence = SEEK_SET, - .l_start = 0, - .l_len = 0, - }; - Ret = fcntl(ServerRootFSLockFD, F_SETLK, &lk); - - if (Ret != -1) { - // Write lock was gained, we can now continue onward. - } - else { - // We couldn't get a write lock, this means that another process already owns a lock on the fifo - close(ServerRootFSLockFD); - ServerRootFSLockFD = -1; - return false; - } - } - else { - // File couldn't get opened even though it existed? - // Must have raced something here. - return false; - } - } - else if (Ret == -1) { - // Unhandled error. - LogMan::Msg::EFmt("[FEXServer] Unable to create FEXServer RootFS lock file at: {} {} {}", RootFSLockFile, errno, strerror(errno)); - return false; - } - else { - // FIFO file was created. Try to get a write lock +bool InitializeSquashFSPipe() { + auto RootFSLockFile = FEXServerClient::GetServerRootFSLockFile(); + + int Ret = open(RootFSLockFile.c_str(), O_CREAT | O_RDWR | O_TRUNC | O_EXCL | O_CLOEXEC, USER_PERMS); + ServerRootFSLockFD = Ret; + if (Ret == -1 && errno == EEXIST) { + // If the fifo exists then it might be a stale connection. + // Check the lock status to see if another process is still alive. + ServerRootFSLockFD = open(RootFSLockFile.c_str(), O_RDWR | O_CLOEXEC, USER_PERMS); + if (ServerRootFSLockFD != -1) { + // Now that we have opened the file, try to get a write lock. flock lk { .l_type = F_WRLCK, .l_whence = SEEK_SET, @@ -75,201 +44,228 @@ namespace SquashFS { }; Ret = fcntl(ServerRootFSLockFD, F_SETLK, &lk); - if (Ret == -1) { - // Couldn't get a write lock, something else must have got it + if (Ret != -1) { + // Write lock was gained, we can now continue onward. + } else { + // We couldn't get a write lock, this means that another process already owns a lock on the fifo close(ServerRootFSLockFD); ServerRootFSLockFD = -1; return false; } + } else { + // File couldn't get opened even though it existed? + // Must have raced something here. + return false; } - - return true; - } - - bool DowngradeRootFSPipeToReadLock() { + } else if (Ret == -1) { + // Unhandled error. + LogMan::Msg::EFmt("[FEXServer] Unable to create FEXServer RootFS lock file at: {} {} {}", RootFSLockFile, errno, strerror(errno)); + return false; + } else { + // FIFO file was created. Try to get a write lock flock lk { - .l_type = F_RDLCK, + .l_type = F_WRLCK, .l_whence = SEEK_SET, .l_start = 0, .l_len = 0, }; - int Ret = fcntl(ServerRootFSLockFD, F_SETLK, &lk); + Ret = fcntl(ServerRootFSLockFD, F_SETLK, &lk); if (Ret == -1) { - // This shouldn't occur - LogMan::Msg::EFmt("[FEXServer] Unable to downgrade a rootfs write lock to a read lock {} {}", errno, strerror(errno)); + // Couldn't get a write lock, something else must have got it close(ServerRootFSLockFD); ServerRootFSLockFD = -1; return false; } - - return true; } - bool MountRootFSImagePath(const fextl::string &SquashFS, bool EroFS) { - pid_t ParentTID = ::getpid(); - MountFolder = fmt::format("{}/.FEXMount{}-XXXXXX", FEXServerClient::GetServerMountFolder(), ParentTID); - char *MountFolderStr = MountFolder.data(); - - // Make the temporary mount folder - if (mkdtemp(MountFolderStr) == nullptr) { - LogMan::Msg::EFmt("[FEXServer] Couldn't create temporary mount name: {}", MountFolder); - return false; - } - - // Change the permissions - if (chmod(MountFolderStr, 0777) != 0) { - LogMan::Msg::EFmt("[FEXServer] Couldn't change permissions on temporary mount: {}", MountFolder); - rmdir(MountFolderStr); - return false; - } + return true; +} - // Create local FDs so our internal forks can communicate - int fds[2]; - pipe2(fds, 0); - - int pid = fork(); - if (pid == 0) { - // Child - close(fds[0]); // Close read side - const char *argv[4]; - argv[0] = EroFS ? "erofsfuse" : "squashfuse"; - argv[1] = SquashFS.c_str(); - argv[2] = MountFolder.c_str(); - argv[3] = nullptr; - - // Try and execute {erofsfuse, squashfuse} to mount our rootfs - if (execvpe(argv[0], (char * const*)argv, environ) == -1) { - // Give a hopefully helpful error message for users - LogMan::Msg::EFmt("[FEXServer] '{}' Couldn't execute for some reason: {} {}\n", argv[0], errno, strerror(errno)); - LogMan::Msg::EFmt("[FEXServer] To mount squashfs rootfs files you need {} installed\n", argv[0]); - LogMan::Msg::EFmt("[FEXServer] Check your FUSE setup.\n"); - - // Let the parent know that we couldn't execute for some reason - uint64_t error{1}; - write(fds[1], &error, sizeof(error)); - - // End the child - exit(1); - } - } - else { - FuseMountPID = pid; - // Parent - // Wait for the child to exit - // This will happen with execvpe of squashmount or exit on failure - while (waitpid(pid, nullptr, 0) == -1 && errno == EINTR); +bool DowngradeRootFSPipeToReadLock() { + flock lk { + .l_type = F_RDLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0, + }; + int Ret = fcntl(ServerRootFSLockFD, F_SETLK, &lk); + + if (Ret == -1) { + // This shouldn't occur + LogMan::Msg::EFmt("[FEXServer] Unable to downgrade a rootfs write lock to a read lock {} {}", errno, strerror(errno)); + close(ServerRootFSLockFD); + ServerRootFSLockFD = -1; + return false; + } - // Check the child pipe for messages - pollfd PollFD; - PollFD.fd = fds[0]; - PollFD.events = POLLIN; + return true; +} - int Result = poll(&PollFD, 1, 0); +bool MountRootFSImagePath(const fextl::string& SquashFS, bool EroFS) { + pid_t ParentTID = ::getpid(); + MountFolder = fmt::format("{}/.FEXMount{}-XXXXXX", FEXServerClient::GetServerMountFolder(), ParentTID); + char* MountFolderStr = MountFolder.data(); - if (Result == 1 && PollFD.revents & POLLIN) { - // Child couldn't execvpe for whatever reason - // Remove the mount path and leave Just in case it was created - rmdir(MountFolderStr); + // Make the temporary mount folder + if (mkdtemp(MountFolderStr) == nullptr) { + LogMan::Msg::EFmt("[FEXServer] Couldn't create temporary mount name: {}", MountFolder); + return false; + } - // Close the pipe now - close(fds[0]); + // Change the permissions + if (chmod(MountFolderStr, 0777) != 0) { + LogMan::Msg::EFmt("[FEXServer] Couldn't change permissions on temporary mount: {}", MountFolder); + rmdir(MountFolderStr); + return false; + } - LogMan::Msg::EFmt("[FEXServer] Couldn't mount squashfs\n"); - return false; - } + // Create local FDs so our internal forks can communicate + int fds[2]; + pipe2(fds, 0); + + int pid = fork(); + if (pid == 0) { + // Child + close(fds[0]); // Close read side + const char* argv[4]; + argv[0] = EroFS ? "erofsfuse" : "squashfuse"; + argv[1] = SquashFS.c_str(); + argv[2] = MountFolder.c_str(); + argv[3] = nullptr; + + // Try and execute {erofsfuse, squashfuse} to mount our rootfs + if (execvpe(argv[0], (char* const*)argv, environ) == -1) { + // Give a hopefully helpful error message for users + LogMan::Msg::EFmt("[FEXServer] '{}' Couldn't execute for some reason: {} {}\n", argv[0], errno, strerror(errno)); + LogMan::Msg::EFmt("[FEXServer] To mount squashfs rootfs files you need {} installed\n", argv[0]); + LogMan::Msg::EFmt("[FEXServer] Check your FUSE setup.\n"); + + // Let the parent know that we couldn't execute for some reason + uint64_t error {1}; + write(fds[1], &error, sizeof(error)); + + // End the child + exit(1); + } + } else { + FuseMountPID = pid; + // Parent + // Wait for the child to exit + // This will happen with execvpe of squashmount or exit on failure + while (waitpid(pid, nullptr, 0) == -1 && errno == EINTR) + ; + + // Check the child pipe for messages + pollfd PollFD; + PollFD.fd = fds[0]; + PollFD.events = POLLIN; + + int Result = poll(&PollFD, 1, 0); + + if (Result == 1 && PollFD.revents & POLLIN) { + // Child couldn't execvpe for whatever reason + // Remove the mount path and leave Just in case it was created + rmdir(MountFolderStr); // Close the pipe now close(fds[0]); - } - // Write to the lock file where we are mounted - write(ServerRootFSLockFD, MountFolder.c_str(), MountFolder.size()); - fdatasync(ServerRootFSLockFD); + LogMan::Msg::EFmt("[FEXServer] Couldn't mount squashfs\n"); + return false; + } - return true; + // Close the pipe now + close(fds[0]); } - void UnmountRootFS() { - FEX_CONFIG_OPT(LDPath, ROOTFS); - if (!FEX::FormatCheck::IsSquashFS(LDPath()) && !FEX::FormatCheck::IsEroFS(LDPath())) { - return; - } + // Write to the lock file where we are mounted + write(ServerRootFSLockFD, MountFolder.c_str(), MountFolder.size()); + fdatasync(ServerRootFSLockFD); - SquashFS::ShutdownImagePID(); + return true; +} - // Handle final mount removal - // fusermount for unmounting the mountpoint, then the {erfsfuse, squashfuse} will exit automatically - int pid = fork(); +void UnmountRootFS() { + FEX_CONFIG_OPT(LDPath, ROOTFS); + if (!FEX::FormatCheck::IsSquashFS(LDPath()) && !FEX::FormatCheck::IsEroFS(LDPath())) { + return; + } - if (pid == 0) { - const char *argv[5]; - argv[0] = "fusermount"; - argv[1] = "-u"; - argv[2] = "-q"; - argv[3] = MountFolder.c_str(); - argv[4] = nullptr; + SquashFS::ShutdownImagePID(); - if (execvp(argv[0], (char * const*)argv) == -1) { - fprintf(stderr, "fusermount failed to execute. You may have an mount living at '%s' to clean up now\n", MountFolder.c_str()); - fprintf(stderr, "Try `%s %s %s %s`\n", argv[0], argv[1], argv[2], argv[3]); - exit(1); - } - } - else { - // Wait for fusermount to leave - while (waitpid(pid, nullptr, 0) == -1 && errno == EINTR); + // Handle final mount removal + // fusermount for unmounting the mountpoint, then the {erfsfuse, squashfuse} will exit automatically + int pid = fork(); - // Remove the mount path - rmdir(MountFolder.c_str()); + if (pid == 0) { + const char* argv[5]; + argv[0] = "fusermount"; + argv[1] = "-u"; + argv[2] = "-q"; + argv[3] = MountFolder.c_str(); + argv[4] = nullptr; - // Remove the rootfs lock file - auto RootFSLockFile = FEXServerClient::GetServerRootFSLockFile(); - unlink(RootFSLockFile.c_str()); + if (execvp(argv[0], (char* const*)argv) == -1) { + fprintf(stderr, "fusermount failed to execute. You may have an mount living at '%s' to clean up now\n", MountFolder.c_str()); + fprintf(stderr, "Try `%s %s %s %s`\n", argv[0], argv[1], argv[2], argv[3]); + exit(1); } - } + } else { + // Wait for fusermount to leave + while (waitpid(pid, nullptr, 0) == -1 && errno == EINTR) + ; - bool InitializeSquashFS() { - FEX_CONFIG_OPT(LDPath, ROOTFS); + // Remove the mount path + rmdir(MountFolder.c_str()); - MountFolder = LDPath(); + // Remove the rootfs lock file + auto RootFSLockFile = FEXServerClient::GetServerRootFSLockFile(); + unlink(RootFSLockFile.c_str()); + } +} - bool IsSquashFS {false}; - bool IsEroFS {false}; +bool InitializeSquashFS() { + FEX_CONFIG_OPT(LDPath, ROOTFS); - // Check if the image is an EroFS - IsEroFS = FEX::FormatCheck::IsEroFS(MountFolder); + MountFolder = LDPath(); - if (!IsEroFS) { - // Check if the image is an SquashFS - IsSquashFS = FEX::FormatCheck::IsSquashFS(MountFolder); - } + bool IsSquashFS {false}; + bool IsEroFS {false}; - if (!IsSquashFS && !IsEroFS) { - // If this isn't a rootfs image then we have nothing to do here - return true; - } + // Check if the image is an EroFS + IsEroFS = FEX::FormatCheck::IsEroFS(MountFolder); - if (!InitializeSquashFSPipe()) { - LogMan::Msg::EFmt("[FEXServer] Couldn't initialize SquashFSPipe"); - return false; - } + if (!IsEroFS) { + // Check if the image is an SquashFS + IsSquashFS = FEX::FormatCheck::IsSquashFS(MountFolder); + } - // Setup rootfs here - if (!MountRootFSImagePath(LDPath(), IsEroFS)) { - LogMan::Msg::EFmt("[FEXServer] Couldn't mount squashfs path"); - return false; - } + if (!IsSquashFS && !IsEroFS) { + // If this isn't a rootfs image then we have nothing to do here + return true; + } - if (!DowngradeRootFSPipeToReadLock()) { - LogMan::Msg::EFmt("[FEXServer] Couldn't downgrade read lock"); - return false; - } + if (!InitializeSquashFSPipe()) { + LogMan::Msg::EFmt("[FEXServer] Couldn't initialize SquashFSPipe"); + return false; + } - return true; + // Setup rootfs here + if (!MountRootFSImagePath(LDPath(), IsEroFS)) { + LogMan::Msg::EFmt("[FEXServer] Couldn't mount squashfs path"); + return false; } - fextl::string GetMountFolder() { - return MountFolder; + if (!DowngradeRootFSPipeToReadLock()) { + LogMan::Msg::EFmt("[FEXServer] Couldn't downgrade read lock"); + return false; } + + return true; +} + +fextl::string GetMountFolder() { + return MountFolder; } +} // namespace SquashFS diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/EmulatedFiles/EmulatedFiles.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/EmulatedFiles/EmulatedFiles.cpp index 03b73436cd..df53753d66 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/EmulatedFiles/EmulatedFiles.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/EmulatedFiles/EmulatedFiles.cpp @@ -33,357 +33,273 @@ desc: Emulated /proc/cpuinfo, version, osrelease, etc #include namespace FEX::EmulatedFile { - /** - * @brief Generates a temporary file using raw FDs - * - * Since we are hooking syscalls that are expecting to use raw FDs, we need to make sure to also use raw FDs. - * The guest application can leave these FDs dangling. - * - * Using glibc tmpfile creates a FILE which glibc tracks and will try cleaning up on application exit. - * If we are running a 32-bit application then this dangling FILE will be allocated using the FEX allcator - * Which will have already been cleaned up on shutdown. - * - * Dangling raw FD is safe since if the guest doesn't close them, then the kernel cleans them up on application close. - * - * @return A temporary file that we can use - */ - static int GenTmpFD(const char *pathname, int flags) { - uint32_t memfd_flags {MFD_ALLOW_SEALING}; - if (flags & O_CLOEXEC) memfd_flags |= MFD_CLOEXEC; - - return memfd_create(pathname, memfd_flags); +/** + * @brief Generates a temporary file using raw FDs + * + * Since we are hooking syscalls that are expecting to use raw FDs, we need to make sure to also use raw FDs. + * The guest application can leave these FDs dangling. + * + * Using glibc tmpfile creates a FILE which glibc tracks and will try cleaning up on application exit. + * If we are running a 32-bit application then this dangling FILE will be allocated using the FEX allcator + * Which will have already been cleaned up on shutdown. + * + * Dangling raw FD is safe since if the guest doesn't close them, then the kernel cleans them up on application close. + * + * @return A temporary file that we can use + */ +static int GenTmpFD(const char* pathname, int flags) { + uint32_t memfd_flags {MFD_ALLOW_SEALING}; + if (flags & O_CLOEXEC) { + memfd_flags |= MFD_CLOEXEC; } - // Seal the tmpfd features by sealing them all. - // Makes the tmpfd read-only. - static void SealTmpFD(int fd) { - fcntl(fd, F_ADD_SEALS, - F_SEAL_SEAL | - F_SEAL_SHRINK | - F_SEAL_GROW | - F_SEAL_WRITE | - F_SEAL_FUTURE_WRITE); - } + return memfd_create(pathname, memfd_flags); +} - fextl::string GenerateCPUInfo(FEXCore::Context::Context *ctx, uint32_t CPUCores) { - fextl::ostringstream cpu_stream{}; - auto res_0 = ctx->RunCPUIDFunction(0, 0); - auto res_1 = ctx->RunCPUIDFunction(1, 0); - auto res_6 = ctx->RunCPUIDFunction(6, 0); - auto res_7 = ctx->RunCPUIDFunction(7, 0); - auto res_10 = ctx->RunCPUIDFunction(0x10, 0); - - auto res_8000_0001 = ctx->RunCPUIDFunction(0x8000'0001, 0); - auto res_8000_0007 = ctx->RunCPUIDFunction(0x8000'0007, 0); - auto res_8000_0008 = ctx->RunCPUIDFunction(0x8000'0008, 0); - auto res_8000_000a = ctx->RunCPUIDFunction(0x8000'000a, 0); - auto res_8000_001f = ctx->RunCPUIDFunction(0x8000'001f, 0); - - union VendorID { - struct { - uint32_t id; - char Str[13]; - }; - struct { - FEXCore::CPUID::FunctionResults cpuid; - uint8_t null; - }; - }; +// Seal the tmpfd features by sealing them all. +// Makes the tmpfd read-only. +static void SealTmpFD(int fd) { + fcntl(fd, F_ADD_SEALS, F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_FUTURE_WRITE); +} - union ModelName { - struct { - char Str[49]; - }; - struct { - FEXCore::CPUID::FunctionResults cpuid_2; - FEXCore::CPUID::FunctionResults cpuid_3; - FEXCore::CPUID::FunctionResults cpuid_4; - uint8_t null; - }; +fextl::string GenerateCPUInfo(FEXCore::Context::Context* ctx, uint32_t CPUCores) { + fextl::ostringstream cpu_stream {}; + auto res_0 = ctx->RunCPUIDFunction(0, 0); + auto res_1 = ctx->RunCPUIDFunction(1, 0); + auto res_6 = ctx->RunCPUIDFunction(6, 0); + auto res_7 = ctx->RunCPUIDFunction(7, 0); + auto res_10 = ctx->RunCPUIDFunction(0x10, 0); + + auto res_8000_0001 = ctx->RunCPUIDFunction(0x8000'0001, 0); + auto res_8000_0007 = ctx->RunCPUIDFunction(0x8000'0007, 0); + auto res_8000_0008 = ctx->RunCPUIDFunction(0x8000'0008, 0); + auto res_8000_000a = ctx->RunCPUIDFunction(0x8000'000a, 0); + auto res_8000_001f = ctx->RunCPUIDFunction(0x8000'001f, 0); + + union VendorID { + struct { + uint32_t id; + char Str[13]; }; - - union Info { + struct { FEXCore::CPUID::FunctionResults cpuid; - struct { - unsigned Stepping : 4; - unsigned Model : 4; - unsigned FamilyID : 4; - unsigned Type : 4; - unsigned ExModelID : 4; - unsigned ExFamilyID : 8; - unsigned : 4; - }; + uint8_t null; }; + }; - VendorID vendorid {}; - vendorid.cpuid = {res_0.eax, res_0.ebx, res_0.edx, res_0.ecx}; - vendorid.null = 0; - - Info info {res_1}; - - uint32_t Family = info.FamilyID + (info.FamilyID == 0xF ? info.ExFamilyID : 0); - fextl::ostringstream flags_data{}; - // Generate the flags data up front - // This is the same per core - { -#define FLAG(flag, name) if (flag) { flags_data << name << " "; } - FLAG(res_1.edx & (1 << 0), "fpu") - FLAG(res_1.edx & (1 << 1), "vme") - FLAG(res_1.edx & (1 << 2), "de") - FLAG(res_1.edx & (1 << 3), "pse") - FLAG(res_1.edx & (1 << 4), "tsc") - FLAG(res_1.edx & (1 << 5), "msr") - FLAG(res_1.edx & (1 << 6), "pae") - FLAG(res_1.edx & (1 << 7), "mce") - FLAG(res_1.edx & (1 << 8), "cx8") - FLAG(res_1.edx & (1 << 9), "apic") - FLAG(res_1.edx & (1 << 11), "sep") - FLAG(res_1.edx & (1 << 12), "mtrr") - FLAG(res_1.edx & (1 << 13), "pge") - FLAG(res_1.edx & (1 << 14), "mca") - FLAG(res_1.edx & (1 << 15), "cmov") - FLAG(res_1.edx & (1 << 16), "pat") - FLAG(res_1.edx & (1 << 17), "pse36") - FLAG(res_1.edx & (1 << 18), "pn") - FLAG(res_1.edx & (1 << 19), "clflush") - FLAG(res_1.edx & (1 << 21), "ds") // XXX - FLAG(res_1.edx & (1 << 22), "acpi") // XXX - FLAG(res_1.edx & (1 << 23), "mmx") - FLAG(res_1.edx & (1 << 24), "fxsr") - FLAG(res_1.edx & (1 << 25), "sse") - FLAG(res_1.edx & (1 << 26), "sse2") - FLAG(res_1.edx & (1 << 27), "ss") - FLAG(res_1.edx & (1 << 28), "ht") - FLAG(res_1.edx & (1 << 29), "tm") - FLAG(res_1.edx & (1 << 30), "ia64") - FLAG(res_1.edx & (1 << 31), "pbe") - - FLAG(res_8000_0001.edx & (1 << 11), - "syscall") - FLAG(res_8000_0001.edx & (1 << 19), - "mp") - FLAG(res_8000_0001.edx & (1 << 20), - "nx") - FLAG(res_8000_0001.edx & (1 << 22), - "mmxext") - FLAG(res_8000_0001.edx & (1 << 25), - "fxsr_opt") - FLAG(res_8000_0001.edx & (1 << 26), - "pdpe1gb") - FLAG(res_8000_0001.edx & (1 << 27), - "rdtscp") - FLAG(res_8000_0001.edx & (1 << 29), - "lm") - FLAG(res_8000_0001.edx & (1 << 31), - "3dnow") - FLAG(res_8000_0001.edx & (1 << 30), - "3dnowext") - - FLAG(res_8000_0007.edx & (1 << 8), - "constant_tsc") - - // We are not a uniprocessor running in SMP mode - FLAG(false, "up") - // Timer is always running - FLAG(true, "art") - // No Intel perfmon - FLAG(false, "arch_perfmon") - // No precise event based sampling - FLAG(false, "pebs") - // No branch trace store - FLAG(false, "bts") - - FLAG(true, "rep_good") - FLAG(res_8000_0007.edx & (1 << 12), - "tm") - - // Always support long nop - FLAG(true, "nopl") - - // Always expose topology information - FLAG(true, "xtoplogy") - - // Atom/geode only? - FLAG(false, "tsc_reliable") - FLAG(res_8000_0007.edx & (1 << 8), - "nonstop_tsc") - - // We always support CPUID - FLAG(true, "cpuid") - FLAG(Family > 0x16, - "extd_apicid") - FLAG(false, "amd_dcm") // Never claim to be a multi node processor - FLAG(res_8000_0007.edx & (1 << 11), - "aperfmperf") - - // Need to check ARM documentation if we can support this? - FLAG(false, "nonstop_tsc_s3") - - // We can calculate this flag on AArch64 - FLAG(true, "tsc_known_freq") - - FLAG(res_1.ecx & (1 << 0), - "pni") - FLAG(res_1.ecx & (1 << 1), - "pclmulqdq") - FLAG(res_1.ecx & (1 << 2), - "dtes64") - FLAG(res_1.ecx & (1 << 3), - "monitor") - FLAG(res_1.ecx & (1 << 4), - "ds_cpl") - FLAG(res_1.ecx & (1 << 5), - "vmx") - FLAG(res_1.ecx & (1 << 6), - "smx") - FLAG(res_1.ecx & (1 << 7), - "est") - FLAG(res_1.ecx & (1 << 8), - "tm2") - FLAG(res_1.ecx & (1 << 9), - "ssse3") - FLAG(res_1.ecx & (1 << 11), - "sdbg") - FLAG(res_1.ecx & (1 << 12), - "fma") - FLAG(res_1.ecx & (1 << 13), - "cx16") - FLAG(res_1.ecx & (1 << 14), - "xptr") - FLAG(res_1.ecx & (1 << 15), - "pdcm") - FLAG(res_1.ecx & (1 << 17), - "pcid") - FLAG(res_1.ecx & (1 << 18), - "dca") - FLAG(res_1.ecx & (1 << 19), - "sse4_1") - FLAG(res_1.ecx & (1 << 20), - "sse4_2") - FLAG(res_1.ecx & (1 << 21), - "x2apic") - FLAG(res_1.ecx & (1 << 22), - "movbe") - FLAG(res_1.ecx & (1 << 23), - "popcnt") - FLAG(res_1.ecx & (1 << 24), - "tsc_deadline_timer") - FLAG(res_1.ecx & (1 << 25), - "aes") - FLAG(res_1.ecx & (1 << 26), - "xsave") - FLAG(res_1.ecx & (1 << 28), - "avx") - FLAG(res_1.ecx & (1 << 29), - "f16c") - FLAG(res_1.ecx & (1 << 30), - "rdrand") - FLAG(res_1.ecx & (1 << 31), - "hypervisor") - - FLAG(res_8000_0001.ecx & (1 << 0), - "lahf_lm") - FLAG(res_8000_0001.ecx & (1 << 1), - "cmp_legacy") - FLAG(res_8000_0001.ecx & (1 << 2), - "svm") - FLAG(res_8000_0001.ecx & (1 << 3), - "extapic") - FLAG(res_8000_0001.ecx & (1 << 4), - "cr8_legacy") - FLAG(res_8000_0001.ecx & (1 << 5), - "abm") - FLAG(res_8000_0001.ecx & (1 << 6), - "sse4a") - FLAG(res_8000_0001.ecx & (1 << 7), - "misalignsse") - FLAG(res_8000_0001.ecx & (1 << 8), - "3dnowprefetch") - FLAG(res_8000_0001.ecx & (1 << 9), - "osvw") - FLAG(res_8000_0001.ecx & (1 << 10), - "ibs") - FLAG(res_8000_0001.ecx & (1 << 11), - "xop") - FLAG(res_8000_0001.ecx & (1 << 12), - "skinit") - FLAG(res_8000_0001.ecx & (1 << 13), - "wdt") - FLAG(res_8000_0001.ecx & (1 << 15), - "lwp") - FLAG(res_8000_0001.ecx & (1 << 16), - "fma4") - FLAG(res_8000_0001.ecx & (1 << 17), - "tce") - FLAG(res_8000_0001.ecx & (1 << 19), - "nodeid_msr") - FLAG(res_8000_0001.ecx & (1 << 21), - "tbm") - FLAG(res_8000_0001.ecx & (1 << 22), - "topoext") - FLAG(res_8000_0001.ecx & (1 << 23), - "perfctr_core") - FLAG(res_8000_0001.ecx & (1 << 24), - "perfctr_nb") - FLAG(res_8000_0001.ecx & (1 << 26), - "bpext") - FLAG(res_8000_0001.ecx & (1 << 27), - "ptsc") - - FLAG(res_8000_0001.ecx & (1 << 28), - "perfctr_llc") - FLAG(res_8000_0001.ecx & (1 << 29), - "mwaitx") - - // We don't support ring 3 supporting mwait - FLAG(false, "ring3mwait") - // We don't support Intel CPUID fault support - FLAG(false, "cpuid_fault") - FLAG(res_8000_0007.edx & (1 << 9), - "cpb") - FLAG(res_6.ecx & (1 << 3), - "epb") - FLAG(res_10.ebx & (1 << 1), - "cat_l3") - FLAG(res_10.ebx & (1 << 2), - "cat_l2") - FLAG(false, // Needs leaf support - "cdp_l3") - FLAG(false, "invpcid_single") - FLAG(res_8000_0007.edx & (1 << 7), - "hw_pstate") - FLAG(res_8000_001f.eax & (1 << 0), - "sme") - - // Kernel page table isolation. - FLAG(false, "pti") - - // We don't support Intel's Protected Processor Inventory Number - FLAG(false, "intel_ppin") - FLAG(false, // Needs leaf support - "cdp_l2") - - FLAG(res_8000_0008.ebx & (1 << 6), - "mba") - FLAG(res_8000_001f.eax & (1 << 1), - "sev") - - { - // Speculative bug workarounds - // We don't claim to have these bugs, so we don't need to claim these flags - FLAG(res_7.edx & (1 << 31), - "ssbd") - FLAG(false, "ibrs") - FLAG(false, "ibpb") - - FLAG(res_7.edx & (1 << 27), - "stibp") - - FLAG(false, "ibrs_enhanced") - } + union ModelName { + struct { + char Str[49]; + }; + struct { + FEXCore::CPUID::FunctionResults cpuid_2; + FEXCore::CPUID::FunctionResults cpuid_3; + FEXCore::CPUID::FunctionResults cpuid_4; + uint8_t null; + }; + }; + + union Info { + FEXCore::CPUID::FunctionResults cpuid; + struct { + unsigned Stepping : 4; + unsigned Model : 4; + unsigned FamilyID : 4; + unsigned Type : 4; + unsigned ExModelID : 4; + unsigned ExFamilyID : 8; + unsigned : 4; + }; + }; + + VendorID vendorid {}; + vendorid.cpuid = {res_0.eax, res_0.ebx, res_0.edx, res_0.ecx}; + vendorid.null = 0; - // We don't support Intel's TPR Shadow feature - FLAG(false, "tpr_shadow") + Info info {res_1}; + + uint32_t Family = info.FamilyID + (info.FamilyID == 0xF ? info.ExFamilyID : 0); + fextl::ostringstream flags_data {}; + // Generate the flags data up front + // This is the same per core + { +#define FLAG(flag, name) \ + if (flag) { \ + flags_data << name << " "; \ + } + FLAG(res_1.edx & (1 << 0), "fpu") + FLAG(res_1.edx & (1 << 1), "vme") + FLAG(res_1.edx & (1 << 2), "de") + FLAG(res_1.edx & (1 << 3), "pse") + FLAG(res_1.edx & (1 << 4), "tsc") + FLAG(res_1.edx & (1 << 5), "msr") + FLAG(res_1.edx & (1 << 6), "pae") + FLAG(res_1.edx & (1 << 7), "mce") + FLAG(res_1.edx & (1 << 8), "cx8") + FLAG(res_1.edx & (1 << 9), "apic") + FLAG(res_1.edx & (1 << 11), "sep") + FLAG(res_1.edx & (1 << 12), "mtrr") + FLAG(res_1.edx & (1 << 13), "pge") + FLAG(res_1.edx & (1 << 14), "mca") + FLAG(res_1.edx & (1 << 15), "cmov") + FLAG(res_1.edx & (1 << 16), "pat") + FLAG(res_1.edx & (1 << 17), "pse36") + FLAG(res_1.edx & (1 << 18), "pn") + FLAG(res_1.edx & (1 << 19), "clflush") + FLAG(res_1.edx & (1 << 21), "ds") // XXX + FLAG(res_1.edx & (1 << 22), "acpi") // XXX + FLAG(res_1.edx & (1 << 23), "mmx") + FLAG(res_1.edx & (1 << 24), "fxsr") + FLAG(res_1.edx & (1 << 25), "sse") + FLAG(res_1.edx & (1 << 26), "sse2") + FLAG(res_1.edx & (1 << 27), "ss") + FLAG(res_1.edx & (1 << 28), "ht") + FLAG(res_1.edx & (1 << 29), "tm") + FLAG(res_1.edx & (1 << 30), "ia64") + FLAG(res_1.edx & (1 << 31), "pbe") + + FLAG(res_8000_0001.edx & (1 << 11), "syscall") + FLAG(res_8000_0001.edx & (1 << 19), "mp") + FLAG(res_8000_0001.edx & (1 << 20), "nx") + FLAG(res_8000_0001.edx & (1 << 22), "mmxext") + FLAG(res_8000_0001.edx & (1 << 25), "fxsr_opt") + FLAG(res_8000_0001.edx & (1 << 26), "pdpe1gb") + FLAG(res_8000_0001.edx & (1 << 27), "rdtscp") + FLAG(res_8000_0001.edx & (1 << 29), "lm") + FLAG(res_8000_0001.edx & (1 << 31), "3dnow") + FLAG(res_8000_0001.edx & (1 << 30), "3dnowext") + + FLAG(res_8000_0007.edx & (1 << 8), "constant_tsc") + + // We are not a uniprocessor running in SMP mode + FLAG(false, "up") + // Timer is always running + FLAG(true, "art") + // No Intel perfmon + FLAG(false, "arch_perfmon") + // No precise event based sampling + FLAG(false, "pebs") + // No branch trace store + FLAG(false, "bts") + + FLAG(true, "rep_good") + FLAG(res_8000_0007.edx & (1 << 12), "tm") + + // Always support long nop + FLAG(true, "nopl") + + // Always expose topology information + FLAG(true, "xtoplogy") + + // Atom/geode only? + FLAG(false, "tsc_reliable") + FLAG(res_8000_0007.edx & (1 << 8), "nonstop_tsc") + + // We always support CPUID + FLAG(true, "cpuid") + FLAG(Family > 0x16, "extd_apicid") + FLAG(false, "amd_dcm") // Never claim to be a multi node processor + FLAG(res_8000_0007.edx & (1 << 11), "aperfmperf") + + // Need to check ARM documentation if we can support this? + FLAG(false, "nonstop_tsc_s3") + + // We can calculate this flag on AArch64 + FLAG(true, "tsc_known_freq") + + FLAG(res_1.ecx & (1 << 0), "pni") + FLAG(res_1.ecx & (1 << 1), "pclmulqdq") + FLAG(res_1.ecx & (1 << 2), "dtes64") + FLAG(res_1.ecx & (1 << 3), "monitor") + FLAG(res_1.ecx & (1 << 4), "ds_cpl") + FLAG(res_1.ecx & (1 << 5), "vmx") + FLAG(res_1.ecx & (1 << 6), "smx") + FLAG(res_1.ecx & (1 << 7), "est") + FLAG(res_1.ecx & (1 << 8), "tm2") + FLAG(res_1.ecx & (1 << 9), "ssse3") + FLAG(res_1.ecx & (1 << 11), "sdbg") + FLAG(res_1.ecx & (1 << 12), "fma") + FLAG(res_1.ecx & (1 << 13), "cx16") + FLAG(res_1.ecx & (1 << 14), "xptr") + FLAG(res_1.ecx & (1 << 15), "pdcm") + FLAG(res_1.ecx & (1 << 17), "pcid") + FLAG(res_1.ecx & (1 << 18), "dca") + FLAG(res_1.ecx & (1 << 19), "sse4_1") + FLAG(res_1.ecx & (1 << 20), "sse4_2") + FLAG(res_1.ecx & (1 << 21), "x2apic") + FLAG(res_1.ecx & (1 << 22), "movbe") + FLAG(res_1.ecx & (1 << 23), "popcnt") + FLAG(res_1.ecx & (1 << 24), "tsc_deadline_timer") + FLAG(res_1.ecx & (1 << 25), "aes") + FLAG(res_1.ecx & (1 << 26), "xsave") + FLAG(res_1.ecx & (1 << 28), "avx") + FLAG(res_1.ecx & (1 << 29), "f16c") + FLAG(res_1.ecx & (1 << 30), "rdrand") + FLAG(res_1.ecx & (1 << 31), "hypervisor") + + FLAG(res_8000_0001.ecx & (1 << 0), "lahf_lm") + FLAG(res_8000_0001.ecx & (1 << 1), "cmp_legacy") + FLAG(res_8000_0001.ecx & (1 << 2), "svm") + FLAG(res_8000_0001.ecx & (1 << 3), "extapic") + FLAG(res_8000_0001.ecx & (1 << 4), "cr8_legacy") + FLAG(res_8000_0001.ecx & (1 << 5), "abm") + FLAG(res_8000_0001.ecx & (1 << 6), "sse4a") + FLAG(res_8000_0001.ecx & (1 << 7), "misalignsse") + FLAG(res_8000_0001.ecx & (1 << 8), "3dnowprefetch") + FLAG(res_8000_0001.ecx & (1 << 9), "osvw") + FLAG(res_8000_0001.ecx & (1 << 10), "ibs") + FLAG(res_8000_0001.ecx & (1 << 11), "xop") + FLAG(res_8000_0001.ecx & (1 << 12), "skinit") + FLAG(res_8000_0001.ecx & (1 << 13), "wdt") + FLAG(res_8000_0001.ecx & (1 << 15), "lwp") + FLAG(res_8000_0001.ecx & (1 << 16), "fma4") + FLAG(res_8000_0001.ecx & (1 << 17), "tce") + FLAG(res_8000_0001.ecx & (1 << 19), "nodeid_msr") + FLAG(res_8000_0001.ecx & (1 << 21), "tbm") + FLAG(res_8000_0001.ecx & (1 << 22), "topoext") + FLAG(res_8000_0001.ecx & (1 << 23), "perfctr_core") + FLAG(res_8000_0001.ecx & (1 << 24), "perfctr_nb") + FLAG(res_8000_0001.ecx & (1 << 26), "bpext") + FLAG(res_8000_0001.ecx & (1 << 27), "ptsc") + + FLAG(res_8000_0001.ecx & (1 << 28), "perfctr_llc") + FLAG(res_8000_0001.ecx & (1 << 29), "mwaitx") + + // We don't support ring 3 supporting mwait + FLAG(false, "ring3mwait") + // We don't support Intel CPUID fault support + FLAG(false, "cpuid_fault") + FLAG(res_8000_0007.edx & (1 << 9), "cpb") + FLAG(res_6.ecx & (1 << 3), "epb") + FLAG(res_10.ebx & (1 << 1), "cat_l3") + FLAG(res_10.ebx & (1 << 2), "cat_l2") + FLAG(false, // Needs leaf support + "cdp_l3") + FLAG(false, "invpcid_single") + FLAG(res_8000_0007.edx & (1 << 7), "hw_pstate") + FLAG(res_8000_001f.eax & (1 << 0), "sme") + + // Kernel page table isolation. + FLAG(false, "pti") + + // We don't support Intel's Protected Processor Inventory Number + FLAG(false, "intel_ppin") + FLAG(false, // Needs leaf support + "cdp_l2") + + FLAG(res_8000_0008.ebx & (1 << 6), "mba") + FLAG(res_8000_001f.eax & (1 << 1), "sev") + + {// Speculative bug workarounds + // We don't claim to have these bugs, so we don't need to claim these flags + FLAG(res_7.edx & (1 << 31), "ssbd") FLAG(false, "ibrs") FLAG(false, "ibpb") + + FLAG(res_7.edx & (1 << 27), "stibp") + + FLAG(false, "ibrs_enhanced")} + + // We don't support Intel's TPR Shadow feature + FLAG(false, "tpr_shadow") // Intel virtual NMI FLAG(false, "vnmi") // Intel FlexPriority @@ -396,423 +312,289 @@ namespace FEX::EmulatedFile { // Prefer VMMCall to VMCall FLAG(false, "vmmcall") // Intel extended page table access dirty bit - FLAG(false, "ept_ad") - FLAG(res_7.ebx & (1 << 0), - "fsgsbase") - FLAG(res_7.ebx & (1 << 1), - "tsc_adjust") - FLAG(res_7.ebx & (1 << 3), - "bmi1") - FLAG(res_7.ebx & (1 << 4), - "hle") - FLAG(res_7.ebx & (1 << 5), - "avx2") - FLAG(res_7.ebx & (1 << 7), - "smep") - FLAG(res_7.ebx & (1 << 8), - "bmi2") - FLAG(res_7.ebx & (1 << 9), - "erms") - FLAG(res_7.ebx & (1 << 10), - "invpcid") - FLAG(res_7.ebx & (1 << 11), - "rtm") - FLAG(false, // Needs leaf support - "cqm") - FLAG(res_7.ebx & (1 << 14), - "mpx") - FLAG(false, // Needs leaf support - "rdt_a") - FLAG(res_7.ebx & (1 << 16), - "avx512f") - FLAG(res_7.ebx & (1 << 17), - "avx512dq") - FLAG(res_7.ebx & (1 << 18), - "rdseed") - FLAG(res_7.ebx & (1 << 19), - "adx") - FLAG(res_7.ebx & (1 << 20), - "smap") - FLAG(res_7.ebx & (1 << 21), - "avx512ifma") - FLAG(res_7.ebx & (1 << 23), - "clflushopt") - FLAG(res_7.ebx & (1 << 24), - "clwb") - FLAG(res_7.ebx & (1 << 25), - "intel_pt") - FLAG(res_7.ebx & (1 << 26), - "avx512pf") - FLAG(res_7.ebx & (1 << 27), - "avx512er") - FLAG(res_7.ebx & (1 << 28), - "avx512cd") - FLAG(res_7.ebx & (1 << 29), - "sha_ni") - FLAG(res_7.ebx & (1 << 30), - "avx512bw") - FLAG(res_7.ebx & (1 << 31), - "avx512vl") - FLAG(false, // Needs leaf support // res_d.eax & (1 << 0) // Leaf 1h - "xsaveopt") - FLAG(false, // Needs leaf support // res_d.eax & (1 << 1) // Leaf 1h - "xsavec") - FLAG(false, // Needs leaf support // res_d.eax & (1 << 2) // Leaf 1h - "xgetbv1") - FLAG(false, // Needs leaf support // res_d.eax & (1 << 3) // Leaf 1h - "xsaves") - - FLAG(false, // Needs leaf support - "avx512_bf16") - FLAG(res_8000_0008.ebx & (1 << 0), - "clzero") - FLAG(res_8000_0008.ebx & (1 << 1), - "irperf") - FLAG(res_8000_0008.ebx & (1 << 2), - "xsaveerptr") + FLAG(false, "ept_ad") FLAG(res_7.ebx & (1 << 0), "fsgsbase") FLAG(res_7.ebx & (1 << 1), "tsc_adjust") + FLAG(res_7.ebx & (1 << 3), "bmi1") FLAG(res_7.ebx & (1 << 4), "hle") FLAG(res_7.ebx & (1 << 5), "avx2") + FLAG(res_7.ebx & (1 << 7), "smep") FLAG(res_7.ebx & (1 << 8), "bmi2") FLAG(res_7.ebx & (1 << 9), "erms") + FLAG(res_7.ebx & (1 << 10), "invpcid") FLAG(res_7.ebx & (1 << 11), + "rtm") FLAG(false, // Needs leaf support + "cqm") FLAG(res_7.ebx & (1 << 14), + "mpx") FLAG(false, // Needs leaf support + "rdt_a") FLAG(res_7.ebx & (1 << 16), "avx512f") + FLAG(res_7.ebx & (1 << 17), "avx512dq") FLAG(res_7.ebx & (1 << 18), "rdseed") FLAG(res_7.ebx & (1 << 19), "adx") + FLAG(res_7.ebx & (1 << 20), "smap") FLAG(res_7.ebx & (1 << 21), "avx512ifma") FLAG(res_7.ebx & (1 << 23), "clflushopt") + FLAG(res_7.ebx & (1 << 24), "clwb") FLAG(res_7.ebx & (1 << 25), "intel_pt") FLAG(res_7.ebx & (1 << 26), "avx512pf") + FLAG(res_7.ebx & (1 << 27), "avx512er") FLAG(res_7.ebx & (1 << 28), "avx512cd") FLAG(res_7.ebx & (1 << 29), "sha_ni") + FLAG(res_7.ebx & (1 << 30), "avx512bw") FLAG(res_7.ebx & (1 << 31), + "avx512vl") + FLAG(false, // Needs leaf support // res_d.eax & (1 << 0) // Leaf 1h + "xsaveopt") FLAG(false, // Needs leaf support // res_d.eax & (1 << 1) // Leaf 1h + "xsavec") FLAG(false, // Needs leaf support // res_d.eax & (1 << 2) // Leaf 1h + "xgetbv1") FLAG(false, // Needs leaf support // res_d.eax & (1 << 3) // Leaf 1h + "xsaves") + + FLAG(false, // Needs leaf support + "avx512_bf16") FLAG(res_8000_0008.ebx & (1 << 0), "clzero") FLAG(res_8000_0008.ebx & (1 << 1), "irperf") + FLAG(res_8000_0008.ebx & (1 << 2), "xsaveerptr") // Intel digital thermal sensor FLAG(false, "dtherm") // Intel turbo boost - FLAG(false, "ida") - FLAG(res_6.eax & (1 << 2), - "arat") + FLAG(false, "ida") FLAG(res_6.eax & (1 << 2), "arat") // Power limit notification controls FLAG(false, "pln") // Intel package thermal status FLAG(false, "pts") // Intel Hardware P-state features - FLAG(false, "hwp") - FLAG(false, "hwp_notify") - FLAG(false, "hwp_act_window") - FLAG(false, "hwp_epp") - FLAG(false, "hwp_pkg_req") - - FLAG(res_8000_000a.ebx & (1 << 0), - "npt") - FLAG(res_8000_000a.ebx & (1 << 1), - "lbrv") - FLAG(res_8000_000a.ebx & (1 << 2), - "svm_lock") - FLAG(res_8000_000a.ebx & (1 << 3), - "nrip_save") - FLAG(res_8000_000a.ebx & (1 << 4), - "tsc_scale") - FLAG(res_8000_000a.ebx & (1 << 5), - "vmcb_clean") - FLAG(res_8000_000a.ebx & (1 << 6), - "flushbyasid") - FLAG(res_8000_000a.ebx & (1 << 7), - "decodeassists") - FLAG(res_8000_000a.ebx & (1 << 10), - "pausefilter") - FLAG(res_8000_000a.ebx & (1 << 12), - "pfthreshold") - FLAG(res_8000_000a.ebx & (1 << 13), - "avic") - FLAG(res_8000_000a.ebx & (1 << 15), - "v_vmsave_vmload") - FLAG(res_8000_000a.ebx & (1 << 16), - "vgif") - - FLAG(res_7.ecx & (1 << 1), - "avx512vbmi") - FLAG(res_7.ecx & (1 << 2), - "umip") - FLAG(res_7.ecx & (1 << 3), - "pku") - FLAG(res_7.ecx & (1 << 4), - "ospke") - FLAG(res_7.ecx & (1 << 5), - "waitpkg") - FLAG(res_7.ecx & (1 << 6), - "avx512_vbmi2") - FLAG(res_7.ecx & (1 << 8), - "gfni") - FLAG(res_7.ecx & (1 << 9), - "vaes") - FLAG(res_7.ecx & (1 << 10), - "vpclmulqdq") - FLAG(res_7.ecx & (1 << 11), - "avx512_vnni") - FLAG(res_7.ecx & (1 << 12), - "avx512_bitalg") - FLAG(res_7.ecx & (1 << 13), - "tme") - FLAG(res_7.ecx & (1 << 14), - "avx512_vpopcntdq") - FLAG(res_7.ecx & (1 << 16), - "la57") - FLAG(res_7.ecx & (1 << 22), - "rdpid") - FLAG(res_7.ecx & (1 << 25), - "cldemote") - FLAG(res_7.ecx & (1 << 27), - "movdiri") - FLAG(res_7.ecx & (1 << 28), - "movdir64b") - - FLAG(res_8000_0007.ebx & (1 << 0), - "overflow_recov") - FLAG(res_8000_0007.ebx & (1 << 1), - "succor") - FLAG(res_8000_0007.ebx & (1 << 3), - "smca") - - FLAG(res_7.edx & (1 << 2), - "avx512_4vnniw") - FLAG(res_7.edx & (1 << 3), - "avx512_4fmaps") - FLAG(res_7.edx & (1 << 4), - "fsrm") - FLAG(res_7.edx & (1 << 8), - "avx512_vp2intersect") - FLAG(res_7.edx & (1 << 10), - "md_clear") - FLAG(res_7.edx & (1 << 14), - "serialize") - FLAG(res_7.edx & (1 << 18), - "pconfig") - FLAG(res_7.edx & (1 << 19), - "arch_lbr") - FLAG(res_7.edx & (1 << 28), - "flush_l1d") - FLAG(res_7.edx & (1 << 29), - "arch_capabilities") - } - - for (int i = 0; i < CPUCores; ++i) { - cpu_stream << "processor\t: " << i << std::endl; // Logical id - cpu_stream << "vendor_id\t: " << vendorid.Str << std::endl; - cpu_stream << "cpu family\t: " << Family << std::endl; - cpu_stream << "model\t\t: " << (info.Model + (info.FamilyID >= 6 ? (info.ExModelID << 4) : 0)) << std::endl; - ModelName modelname {}; - auto res_8000_0002 = ctx->RunCPUIDFunctionName(0x8000'0002, 0, i); - auto res_8000_0003 = ctx->RunCPUIDFunctionName(0x8000'0003, 0, i); - auto res_8000_0004 = ctx->RunCPUIDFunctionName(0x8000'0004, 0, i); - modelname.cpuid_2 = res_8000_0002; - modelname.cpuid_3 = res_8000_0003; - modelname.cpuid_4 = res_8000_0004; - modelname.null = 0; - - cpu_stream << "model name\t: " << modelname.Str << std::endl; - cpu_stream << "stepping\t: " << info.Stepping << std::endl; - cpu_stream << "microcode\t: 0x0" << std::endl; - cpu_stream << "cpu MHz\t\t: 3000" << std::endl; - cpu_stream << "cache size\t: 512 KB" << std::endl; - cpu_stream << "physical id\t: 0" << std::endl; // Socket id (always 0 for a single socket system) - cpu_stream << "siblings\t: " << CPUCores << std::endl; // Number of logical cores - cpu_stream << "core id\t\t: " << i << std::endl; // Physical id - cpu_stream << "cpu cores\t: " << CPUCores << std::endl; // Number of physical cores - cpu_stream << "apicid\t\t: " << i << std::endl; - cpu_stream << "initial apicid\t: " << i << std::endl; - cpu_stream << "fpu\t\t: " << (res_1.edx & (1 << 0) ? "yes" : "no") << std::endl; - cpu_stream << "fpu_exception\t: " << (res_1.edx & (1 << 0) ? "yes" : "no") << std::endl; - cpu_stream << "cpuid level\t: " << vendorid.id << std::endl; - cpu_stream << "wp\t\t: yes" << std::endl; - cpu_stream << "flags\t\t: " << flags_data.str() << std::endl; - - // We don't have any bugs, don't question it - cpu_stream << "bugs\t\t: " << std::endl; - cpu_stream << "bogomips\t: 8000.0" << std::endl; - // These next four aren't necessarily correct - cpu_stream << "TLB size\t: 2560 4K pages" << std::endl; - cpu_stream << "clflush size\t: 64" << std::endl; - cpu_stream << "cache_alignment\t : 64" << std::endl; - - // Cortex-A is 40 or 44 bits physical, and 48/52 virtual - // Choose the lesser configuration - cpu_stream << "address sizes\t: 40 bits physical, 48 bits virtual" << std::endl; - - // No power management but required to report - cpu_stream << "power management: " << std::endl; - - cpu_stream << std::endl; - } + FLAG(false, "hwp") FLAG(false, "hwp_notify") FLAG(false, "hwp_act_window") FLAG(false, "hwp_epp") FLAG(false, "hwp_pkg_req") + + FLAG(res_8000_000a.ebx & (1 << 0), "npt") FLAG(res_8000_000a.ebx & (1 << 1), "lbrv") FLAG(res_8000_000a.ebx & (1 << 2), "svm_lock") + FLAG(res_8000_000a.ebx & (1 << 3), "nrip_save") FLAG(res_8000_000a.ebx & (1 << 4), "tsc_scale") + FLAG(res_8000_000a.ebx & (1 << 5), "vmcb_clean") FLAG(res_8000_000a.ebx & (1 << 6), "flushbyasid") + FLAG(res_8000_000a.ebx & (1 << 7), "decodeassists") FLAG(res_8000_000a.ebx & (1 << 10), "pausefilter") + FLAG(res_8000_000a.ebx & (1 << 12), "pfthreshold") FLAG(res_8000_000a.ebx & (1 << 13), "avic") + FLAG(res_8000_000a.ebx & (1 << 15), "v_vmsave_vmload") FLAG(res_8000_000a.ebx & (1 << 16), "vgif") + + FLAG(res_7.ecx & (1 << 1), "avx512vbmi") FLAG(res_7.ecx & (1 << 2), "umip") FLAG(res_7.ecx & (1 << 3), "pku") + FLAG(res_7.ecx & (1 << 4), "ospke") FLAG(res_7.ecx & (1 << 5), "waitpkg") FLAG(res_7.ecx & (1 << 6), "avx512_vbmi2") + FLAG(res_7.ecx & (1 << 8), "gfni") FLAG(res_7.ecx & (1 << 9), "vaes") FLAG(res_7.ecx & (1 << 10), "vpclmulqdq") + FLAG(res_7.ecx & (1 << 11), "avx512_vnni") FLAG(res_7.ecx & (1 << 12), "avx512_bitalg") + FLAG(res_7.ecx & (1 << 13), "tme") FLAG(res_7.ecx & (1 << 14), "avx512_vpopcntdq") + FLAG(res_7.ecx & (1 << 16), "la57") FLAG(res_7.ecx & (1 << 22), "rdpid") FLAG(res_7.ecx & (1 << 25), "cldemot" + "e") + FLAG(res_7.ecx & (1 << 27), "movdiri") FLAG(res_7.ecx & (1 << 28), "movdir64b") + + FLAG(res_8000_0007.ebx & (1 << 0), "overflow_recov") FLAG(res_8000_0007.ebx & (1 << 1), "succor") + FLAG(res_8000_0007.ebx & (1 << 3), "smca") + + FLAG(res_7.edx & (1 << 2), "avx512_4vnniw") FLAG(res_7.edx & (1 << 3), "avx512_4fmaps") + FLAG(res_7.edx & (1 << 4), "fsrm") FLAG(res_7.edx & (1 << 8), "avx512_vp2intersect") + FLAG(res_7.edx & (1 << 10), "md_clear") FLAG(res_7.edx & (1 << 14), "serialize") + FLAG(res_7.edx & (1 << 18), "pconfig") FLAG(res_7.edx & (1 << 19), "arch_lbr") + FLAG(res_7.edx & (1 << 28), "flush_l1d") FLAG(res_7.edx & (1 << 29), "arch_capabilities") + } - return cpu_stream.str(); + for (int i = 0; i < CPUCores; ++i) { + cpu_stream << "processor\t: " << i << std::endl; // Logical id + cpu_stream << "vendor_id\t: " << vendorid.Str << std::endl; + cpu_stream << "cpu family\t: " << Family << std::endl; + cpu_stream << "model\t\t: " << (info.Model + (info.FamilyID >= 6 ? (info.ExModelID << 4) : 0)) << std::endl; + ModelName modelname {}; + auto res_8000_0002 = ctx->RunCPUIDFunctionName(0x8000'0002, 0, i); + auto res_8000_0003 = ctx->RunCPUIDFunctionName(0x8000'0003, 0, i); + auto res_8000_0004 = ctx->RunCPUIDFunctionName(0x8000'0004, 0, i); + modelname.cpuid_2 = res_8000_0002; + modelname.cpuid_3 = res_8000_0003; + modelname.cpuid_4 = res_8000_0004; + modelname.null = 0; + + cpu_stream << "model name\t: " << modelname.Str << std::endl; + cpu_stream << "stepping\t: " << info.Stepping << std::endl; + cpu_stream << "microcode\t: 0x0" << std::endl; + cpu_stream << "cpu MHz\t\t: 3000" << std::endl; + cpu_stream << "cache size\t: 512 KB" << std::endl; + cpu_stream << "physical id\t: 0" << std::endl; // Socket id (always 0 for a single socket system) + cpu_stream << "siblings\t: " << CPUCores << std::endl; // Number of logical cores + cpu_stream << "core id\t\t: " << i << std::endl; // Physical id + cpu_stream << "cpu cores\t: " << CPUCores << std::endl; // Number of physical cores + cpu_stream << "apicid\t\t: " << i << std::endl; + cpu_stream << "initial apicid\t: " << i << std::endl; + cpu_stream << "fpu\t\t: " << (res_1.edx & (1 << 0) ? "yes" : "no") << std::endl; + cpu_stream << "fpu_exception\t: " << (res_1.edx & (1 << 0) ? "yes" : "no") << std::endl; + cpu_stream << "cpuid level\t: " << vendorid.id << std::endl; + cpu_stream << "wp\t\t: yes" << std::endl; + cpu_stream << "flags\t\t: " << flags_data.str() << std::endl; + + // We don't have any bugs, don't question it + cpu_stream << "bugs\t\t: " << std::endl; + cpu_stream << "bogomips\t: 8000.0" << std::endl; + // These next four aren't necessarily correct + cpu_stream << "TLB size\t: 2560 4K pages" << std::endl; + cpu_stream << "clflush size\t: 64" << std::endl; + cpu_stream << "cache_alignment\t : 64" << std::endl; + + // Cortex-A is 40 or 44 bits physical, and 48/52 virtual + // Choose the lesser configuration + cpu_stream << "address sizes\t: 40 bits physical, 48 bits virtual" << std::endl; + + // No power management but required to report + cpu_stream << "power management: " << std::endl; + + cpu_stream << std::endl; } - EmulatedFDManager::EmulatedFDManager(FEXCore::Context::Context *ctx) - : CTX {ctx} - , ThreadsConfig { FEXCore::CPUInfo::CalculateNumberOfCPUs() } { - FDReadCreators["/proc/cpuinfo"] = [&](FEXCore::Context::Context *ctx, int32_t fd, const char *pathname, int32_t flags, mode_t mode) -> int32_t { - // Only allow a single thread to initialize the cpu_info. - // Jit in-case multiple threads try to initialize at once. - // Check if deferred cpuinfo initialization has occured. - std::call_once(cpu_info_initialized, [&]() { cpu_info = GenerateCPUInfo(ctx, ThreadsConfig); }); - - int FD = GenTmpFD(pathname, flags); - write(FD, (void*)&cpu_info.at(0), cpu_info.size()); - lseek(FD, 0, SEEK_SET); - SealTmpFD(FD); - return FD; - }; + return cpu_stream.str(); +} - FDReadCreators["/proc/sys/kernel/osrelease"] = [&](FEXCore::Context::Context *ctx, int32_t fd, const char *pathname, int32_t flags, mode_t mode) -> int32_t { - int FD = GenTmpFD(pathname, flags); - uint32_t GuestVersion = FEX::HLE::_SyscallHandler->GetGuestKernelVersion(); - char Tmp[64]{}; - snprintf(Tmp, sizeof(Tmp), "%d.%d.%d\n", - FEX::HLE::SyscallHandler::KernelMajor(GuestVersion), - FEX::HLE::SyscallHandler::KernelMinor(GuestVersion), - FEX::HLE::SyscallHandler::KernelPatch(GuestVersion)); - // + 1 to ensure null at the end - write(FD, Tmp, strlen(Tmp) + 1); - lseek(FD, 0, SEEK_SET); - SealTmpFD(FD); - return FD; - }; +EmulatedFDManager::EmulatedFDManager(FEXCore::Context::Context* ctx) + : CTX {ctx} + , ThreadsConfig {FEXCore::CPUInfo::CalculateNumberOfCPUs()} { + FDReadCreators["/proc/cpuinfo"] = [&](FEXCore::Context::Context* ctx, int32_t fd, const char* pathname, int32_t flags, mode_t mode) -> int32_t { + // Only allow a single thread to initialize the cpu_info. + // Jit in-case multiple threads try to initialize at once. + // Check if deferred cpuinfo initialization has occured. + std::call_once(cpu_info_initialized, [&]() { cpu_info = GenerateCPUInfo(ctx, ThreadsConfig); }); - FDReadCreators["/proc/version"] = [&](FEXCore::Context::Context *ctx, int32_t fd, const char *pathname, int32_t flags, mode_t mode) -> int32_t { - int FD = GenTmpFD(pathname, flags); - // UTS version NEEDS to be in a format that can pass to `date -d` - // Format of this is Linux version (@) () # {SMP, PREEMPT, PREEMPT_RT} \n" - const char kernel_version[] = "Linux version %d.%d.%d (FEX@FEX) (clang) #" GIT_DESCRIBE_STRING " SMP " __DATE__ " " __TIME__ "\n"; - uint32_t GuestVersion = FEX::HLE::_SyscallHandler->GetGuestKernelVersion(); - char Tmp[sizeof(kernel_version) + 64]{}; - snprintf(Tmp, sizeof(Tmp), kernel_version, - FEX::HLE::SyscallHandler::KernelMajor(GuestVersion), - FEX::HLE::SyscallHandler::KernelMinor(GuestVersion), - FEX::HLE::SyscallHandler::KernelPatch(GuestVersion)); - // + 1 to ensure null at the end - write(FD, Tmp, strlen(Tmp) + 1); - lseek(FD, 0, SEEK_SET); - SealTmpFD(FD); - return FD; - }; + int FD = GenTmpFD(pathname, flags); + write(FD, (void*)&cpu_info.at(0), cpu_info.size()); + lseek(FD, 0, SEEK_SET); + SealTmpFD(FD); + return FD; + }; - auto NumCPUCores = [&](FEXCore::Context::Context *ctx, int32_t fd, const char *pathname, int32_t flags, mode_t mode) -> int32_t { - int FD = GenTmpFD(pathname, flags); - write(FD, (void*)&cpus_online.at(0), cpus_online.size()); - lseek(FD, 0, SEEK_SET); - SealTmpFD(FD); - return FD; - }; + FDReadCreators["/proc/sys/kernel/osrelease"] = [&](FEXCore::Context::Context* ctx, int32_t fd, const char* pathname, int32_t flags, + mode_t mode) -> int32_t { + int FD = GenTmpFD(pathname, flags); + uint32_t GuestVersion = FEX::HLE::_SyscallHandler->GetGuestKernelVersion(); + char Tmp[64] {}; + snprintf(Tmp, sizeof(Tmp), "%d.%d.%d\n", FEX::HLE::SyscallHandler::KernelMajor(GuestVersion), + FEX::HLE::SyscallHandler::KernelMinor(GuestVersion), FEX::HLE::SyscallHandler::KernelPatch(GuestVersion)); + // + 1 to ensure null at the end + write(FD, Tmp, strlen(Tmp) + 1); + lseek(FD, 0, SEEK_SET); + SealTmpFD(FD); + return FD; + }; - FDReadCreators["/sys/devices/system/cpu/online"] = NumCPUCores; - FDReadCreators["/sys/devices/system/cpu/present"] = NumCPUCores; - - fextl::string procAuxv = fextl::fmt::format("/proc/{}/auxv", getpid()); - - FDReadCreators[procAuxv] = &EmulatedFDManager::ProcAuxv; - FDReadCreators["/proc/self/auxv"] = &EmulatedFDManager::ProcAuxv; - - auto cmdline_handler = [&](FEXCore::Context::Context *ctx, int32_t fd, const char *pathname, int32_t flags, mode_t mode) -> int32_t { - int FD = GenTmpFD(pathname, flags); - auto CodeLoader = FEX::HLE::_SyscallHandler->GetCodeLoader(); - auto Args = CodeLoader->GetApplicationArguments(); - char NullChar{}; - // cmdline is an array of null terminated arguments - for (size_t i = 0; i < Args->size(); ++i) { - auto &Arg = Args->at(i); - write(FD, Arg.c_str(), Arg.size()); - // Finish off with a null terminator - write(FD, &NullChar, sizeof(uint8_t)); - } + FDReadCreators["/proc/version"] = [&](FEXCore::Context::Context* ctx, int32_t fd, const char* pathname, int32_t flags, mode_t mode) -> int32_t { + int FD = GenTmpFD(pathname, flags); + // UTS version NEEDS to be in a format that can pass to `date -d` + // Format of this is Linux version (@) () # {SMP, PREEMPT, PREEMPT_RT} \n" + const char kernel_version[] = "Linux version %d.%d.%d (FEX@FEX) (clang) #" GIT_DESCRIBE_STRING " SMP " __DATE__ " " __TIME__ "\n"; + uint32_t GuestVersion = FEX::HLE::_SyscallHandler->GetGuestKernelVersion(); + char Tmp[sizeof(kernel_version) + 64] {}; + snprintf(Tmp, sizeof(Tmp), kernel_version, FEX::HLE::SyscallHandler::KernelMajor(GuestVersion), + FEX::HLE::SyscallHandler::KernelMinor(GuestVersion), FEX::HLE::SyscallHandler::KernelPatch(GuestVersion)); + // + 1 to ensure null at the end + write(FD, Tmp, strlen(Tmp) + 1); + lseek(FD, 0, SEEK_SET); + SealTmpFD(FD); + return FD; + }; - // One additional null terminator to finish the list - lseek(FD, 0, SEEK_SET); - SealTmpFD(FD); - return FD; - }; + auto NumCPUCores = [&](FEXCore::Context::Context* ctx, int32_t fd, const char* pathname, int32_t flags, mode_t mode) -> int32_t { + int FD = GenTmpFD(pathname, flags); + write(FD, (void*)&cpus_online.at(0), cpus_online.size()); + lseek(FD, 0, SEEK_SET); + SealTmpFD(FD); + return FD; + }; - FDReadCreators["/proc/self/cmdline"] = cmdline_handler; - fextl::string procCmdLine = fextl::fmt::format("/proc/{}/cmdline", getpid()); - FDReadCreators[procCmdLine] = cmdline_handler; + FDReadCreators["/sys/devices/system/cpu/online"] = NumCPUCores; + FDReadCreators["/sys/devices/system/cpu/present"] = NumCPUCores; - if (ThreadsConfig > 1) { - cpus_online = fextl::fmt::format("0-{}", ThreadsConfig - 1); - } - else { - cpus_online = "0"; + fextl::string procAuxv = fextl::fmt::format("/proc/{}/auxv", getpid()); + + FDReadCreators[procAuxv] = &EmulatedFDManager::ProcAuxv; + FDReadCreators["/proc/self/auxv"] = &EmulatedFDManager::ProcAuxv; + + auto cmdline_handler = [&](FEXCore::Context::Context* ctx, int32_t fd, const char* pathname, int32_t flags, mode_t mode) -> int32_t { + int FD = GenTmpFD(pathname, flags); + auto CodeLoader = FEX::HLE::_SyscallHandler->GetCodeLoader(); + auto Args = CodeLoader->GetApplicationArguments(); + char NullChar {}; + // cmdline is an array of null terminated arguments + for (size_t i = 0; i < Args->size(); ++i) { + auto& Arg = Args->at(i); + write(FD, Arg.c_str(), Arg.size()); + // Finish off with a null terminator + write(FD, &NullChar, sizeof(uint8_t)); } - } - EmulatedFDManager::~EmulatedFDManager() { + // One additional null terminator to finish the list + lseek(FD, 0, SEEK_SET); + SealTmpFD(FD); + return FD; + }; + + FDReadCreators["/proc/self/cmdline"] = cmdline_handler; + fextl::string procCmdLine = fextl::fmt::format("/proc/{}/cmdline", getpid()); + FDReadCreators[procCmdLine] = cmdline_handler; + + if (ThreadsConfig > 1) { + cpus_online = fextl::fmt::format("0-{}", ThreadsConfig - 1); + } else { + cpus_online = "0"; } +} - int32_t EmulatedFDManager::OpenAt(int dirfs, const char *pathname, int flags, uint32_t mode) { - char Tmp[PATH_MAX]; - const char *Path{}; +EmulatedFDManager::~EmulatedFDManager() {} - auto Creator = FDReadCreators.end(); - if (pathname) { - Creator = FDReadCreators.find(pathname); - Path = pathname; - } +int32_t EmulatedFDManager::OpenAt(int dirfs, const char* pathname, int flags, uint32_t mode) { + char Tmp[PATH_MAX]; + const char* Path {}; - if (Creator == FDReadCreators.end()) { - if (((pathname && pathname[0] != '/') || // If pathname exists then it must not be absolute - !pathname) && - dirfs != AT_FDCWD) { - // Passed in a dirfd that isn't magic FDCWD - // We need to get the path from the fd now - auto PathLength = FEX::get_fdpath(dirfs, Tmp); - if (PathLength != -1) { - if (pathname) { - Tmp[PathLength] = '/'; - PathLength += 1; - strncpy(&Tmp[PathLength], pathname, PATH_MAX - PathLength); - } - else { - Tmp[PathLength] = '\0'; - } - Path = Tmp; - } - else if (pathname) { - Path = pathname; - } - } - else { - if (!pathname || pathname[0] == 0) { - return -1; - } + auto Creator = FDReadCreators.end(); + if (pathname) { + Creator = FDReadCreators.find(pathname); + Path = pathname; + } + if (Creator == FDReadCreators.end()) { + if (((pathname && pathname[0] != '/') || // If pathname exists then it must not be absolute + !pathname) && + dirfs != AT_FDCWD) { + // Passed in a dirfd that isn't magic FDCWD + // We need to get the path from the fd now + auto PathLength = FEX::get_fdpath(dirfs, Tmp); + if (PathLength != -1) { + if (pathname) { + Tmp[PathLength] = '/'; + PathLength += 1; + strncpy(&Tmp[PathLength], pathname, PATH_MAX - PathLength); + } else { + Tmp[PathLength] = '\0'; + } + Path = Tmp; + } else if (pathname) { Path = pathname; } - - bool exists = access(Path, F_OK) == 0; - bool RealPathExists = false; - - if (exists) { - // If realpath fails then the temporary buffer is in an undefined state. - // Need to use another temporary just in-case realpath doesn't succeed. - char ExistsTempPath[PATH_MAX]; - char *RealPath = realpath(Path, ExistsTempPath); - if (RealPath) { - RealPathExists = true; - Creator = FDReadCreators.find(RealPath); - } + } else { + if (!pathname || pathname[0] == 0) { + return -1; } - if (!RealPathExists) { - Creator = FDReadCreators.find(FHU::Filesystem::LexicallyNormal(Path)); - } + Path = pathname; + } - if (Creator == FDReadCreators.end()) { - return -1; + bool exists = access(Path, F_OK) == 0; + bool RealPathExists = false; + + if (exists) { + // If realpath fails then the temporary buffer is in an undefined state. + // Need to use another temporary just in-case realpath doesn't succeed. + char ExistsTempPath[PATH_MAX]; + char* RealPath = realpath(Path, ExistsTempPath); + if (RealPath) { + RealPathExists = true; + Creator = FDReadCreators.find(RealPath); } } - return Creator->second(CTX, dirfs, Path, flags, mode); - } + if (!RealPathExists) { + Creator = FDReadCreators.find(FHU::Filesystem::LexicallyNormal(Path)); + } - int32_t EmulatedFDManager::ProcAuxv(FEXCore::Context::Context* ctx, int32_t fd, const char* pathname, int32_t flags, mode_t mode) - { - uint64_t auxvBase=0, auxvSize=0; - FEX::HLE::_SyscallHandler->GetCodeLoader()->GetAuxv(auxvBase, auxvSize); - if (!auxvBase) { - LogMan::Msg::DFmt("Failed to get Auxv stack address"); + if (Creator == FDReadCreators.end()) { return -1; } - - int FD = GenTmpFD(pathname, flags); - write(FD, (void*)auxvBase, auxvSize); - lseek(FD, 0, SEEK_SET); - SealTmpFD(FD); - return FD; } + + return Creator->second(CTX, dirfs, Path, flags, mode); } +int32_t EmulatedFDManager::ProcAuxv(FEXCore::Context::Context* ctx, int32_t fd, const char* pathname, int32_t flags, mode_t mode) { + uint64_t auxvBase = 0, auxvSize = 0; + FEX::HLE::_SyscallHandler->GetCodeLoader()->GetAuxv(auxvBase, auxvSize); + if (!auxvBase) { + LogMan::Msg::DFmt("Failed to get Auxv stack address"); + return -1; + } + + int FD = GenTmpFD(pathname, flags); + write(FD, (void*)auxvBase, auxvSize); + lseek(FD, 0, SEEK_SET); + SealTmpFD(FD); + return FD; +} +} // namespace FEX::EmulatedFile diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/FaultSafeMemcpy.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/FaultSafeMemcpy.cpp index 63954f5f94..f4abf5b256 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/FaultSafeMemcpy.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/FaultSafeMemcpy.cpp @@ -3,8 +3,7 @@ namespace FEX::HLE::FaultSafeMemcpy { #ifdef _M_ARM_64 -__attribute__((naked)) -size_t CopyFromUser(void *Dest, const void* Src, size_t Size) { +__attribute__((naked)) size_t CopyFromUser(void* Dest, const void* Src, size_t Size) { __asm volatile(R"( // Early exit if a memcpy of size zero. cbz x2, 2f; @@ -19,12 +18,11 @@ size_t CopyFromUser(void *Dest, const void* Src, size_t Size) { 2: mov x0, 0; ret; - )" - ::: "memory"); + )" :: + : "memory"); } -__attribute__((naked)) -size_t CopyToUser(void *Dest, const void* Src, size_t Size) { +__attribute__((naked)) size_t CopyToUser(void* Dest, const void* Src, size_t Size) { __asm volatile(R"( // Early exit if a memcpy of size zero. cbz x2, 2f; @@ -39,28 +37,27 @@ size_t CopyToUser(void *Dest, const void* Src, size_t Size) { 2: mov x0, 0; ret; - )" - ::: "memory"); + )" :: + : "memory"); } extern "C" uint64_t CopyFromUser_FaultInst; -void * const CopyFromUser_FaultLocation = &CopyFromUser_FaultInst; +void* const CopyFromUser_FaultLocation = &CopyFromUser_FaultInst; extern "C" uint64_t CopyToUser_FaultInst; -void * const CopyToUser_FaultLocation = &CopyToUser_FaultInst; +void* const CopyToUser_FaultLocation = &CopyToUser_FaultInst; bool IsFaultLocation(uint64_t PC) { - return reinterpret_cast(PC) == CopyFromUser_FaultLocation || - reinterpret_cast(PC) == CopyToUser_FaultLocation; + return reinterpret_cast(PC) == CopyFromUser_FaultLocation || reinterpret_cast(PC) == CopyToUser_FaultLocation; } #else -size_t CopyFromUser(void *Dest, const void* Src, size_t Size) { +size_t CopyFromUser(void* Dest, const void* Src, size_t Size) { memcpy(Dest, Src, Size); return Size; } -size_t CopyToUser(void *Dest, const void* Src, size_t Size) { +size_t CopyToUser(void* Dest, const void* Src, size_t Size) { memcpy(Dest, Src, Size); return Size; } @@ -69,4 +66,4 @@ bool IsFaultLocation(uint64_t PC) { return false; } #endif -} +} // namespace FEX::HLE::FaultSafeMemcpy diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp index 1b5e4c3cff..084a9e00ae 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp @@ -43,23 +43,23 @@ desc: Rootfs overlay logic #include namespace JSON { - struct JsonAllocator { - jsonPool_t PoolObject; - fextl::unique_ptr> json_objects; - }; - static_assert(offsetof(JsonAllocator, PoolObject) == 0, "This needs to be at offset zero"); +struct JsonAllocator { + jsonPool_t PoolObject; + fextl::unique_ptr> json_objects; +}; +static_assert(offsetof(JsonAllocator, PoolObject) == 0, "This needs to be at offset zero"); - json_t* PoolInit(jsonPool_t* Pool) { - JsonAllocator* alloc = reinterpret_cast(Pool); - alloc->json_objects = fextl::make_unique>(); - return &*alloc->json_objects->emplace(alloc->json_objects->end()); - } +json_t* PoolInit(jsonPool_t* Pool) { + JsonAllocator* alloc = reinterpret_cast(Pool); + alloc->json_objects = fextl::make_unique>(); + return &*alloc->json_objects->emplace(alloc->json_objects->end()); +} - json_t* PoolAlloc(jsonPool_t* Pool) { - JsonAllocator* alloc = reinterpret_cast(Pool); - return &*alloc->json_objects->emplace(alloc->json_objects->end()); - } +json_t* PoolAlloc(jsonPool_t* Pool) { + JsonAllocator* alloc = reinterpret_cast(Pool); + return &*alloc->json_objects->emplace(alloc->json_objects->end()); } +} // namespace JSON namespace FEX::HLE { bool FileManager::RootFSPathExists(const char* Filepath) { @@ -74,10 +74,9 @@ void FileManager::LoadThunkDatabase(fextl::unordered_map PathPrefixes{}; + fextl::vector PathPrefixes {}; if (RootFSIsMultiarch) { // Multi-arch debian distros have a fairly complex arrangement of filepaths. // These fractal out to the combination of library prefixes with arch suffixes. @@ -89,15 +88,12 @@ void FileManager::LoadThunkDatabase(fextl::unordered_mapsecond.LibraryName = json_getValue(LibraryItem); - } - else if (ItemName == "Depends") { + } else if (ItemName == "Depends") { jsonType_t PropertyType = json_getType(LibraryItem); if (PropertyType == JSON_TEXT) { DBObject->second.Depends.insert(json_getValue(LibraryItem)); - } - else if (PropertyType == JSON_ARRAY) { - for (json_t const* Depend = json_getChild(LibraryItem); Depend != nullptr; Depend = json_getSibling(Depend)) { + } else if (PropertyType == JSON_ARRAY) { + for (const json_t* Depend = json_getChild(LibraryItem); Depend != nullptr; Depend = json_getSibling(Depend)) { DBObject->second.Depends.insert(json_getValue(Depend)); } } - } - else if (ItemName == "Overlay") { + } else if (ItemName == "Overlay") { auto AddWithReplacement = [HomeDirectory, &PathPrefixes](ThunkDBObject& DBObject, fextl::string LibraryItem) { // Walk through template string and fill in prefixes from right to left using namespace std::string_view_literals; - const std::pair PrefixHome { "@HOME@"sv, LibraryItem.find("@HOME@") }; - const std::pair PrefixLib { "@PREFIX_LIB@"sv, LibraryItem.find("@PREFIX_LIB@") }; + const std::pair PrefixHome {"@HOME@"sv, LibraryItem.find("@HOME@")}; + const std::pair PrefixLib {"@PREFIX_LIB@"sv, LibraryItem.find("@PREFIX_LIB@")}; fextl::string::size_type PrefixPositions[] = { - PrefixHome.second, PrefixLib.second, + PrefixHome.second, + PrefixLib.second, }; // Sort offsets in descending order to enable safe in-place replacement - std::sort(std::begin(PrefixPositions), std::end(PrefixPositions), std::greater<>{}); + std::sort(std::begin(PrefixPositions), std::end(PrefixPositions), std::greater<> {}); for (auto& LibPrefix : PathPrefixes) { fextl::string Replacement = LibraryItem; @@ -195,9 +188,8 @@ void FileManager::LoadThunkDatabase(fextl::unordered_mapsecond, json_getValue(LibraryItem)); - } - else if (PropertyType == JSON_ARRAY) { - for (json_t const* Overlay = json_getChild(LibraryItem); Overlay != nullptr; Overlay = json_getSibling(Overlay)) { + } else if (PropertyType == JSON_ARRAY) { + for (const json_t* Overlay = json_getChild(LibraryItem); Overlay != nullptr; Overlay = json_getSibling(Overlay)) { AddWithReplacement(DBObject->second, json_getValue(Overlay)); } } @@ -207,7 +199,7 @@ void FileManager::LoadThunkDatabase(fextl::unordered_map FileData; if (FEXCore::FileLoading::LoadFile(FileData, Path)) { JSON::JsonAllocator Pool { - .PoolObject = { - .init = JSON::PoolInit, - .alloc = JSON::PoolAlloc, - }, + .PoolObject = + { + .init = JSON::PoolInit, + .alloc = JSON::PoolAlloc, + }, }; // If a thunks DB property exists then we pull in data from the thunks database - json_t const *json = json_createWithPool(&FileData.at(0), &Pool.PoolObject); - json_t const* ThunksDB = json_getProperty( json, "ThunksDB" ); + const json_t* json = json_createWithPool(&FileData.at(0), &Pool.PoolObject); + const json_t* ThunksDB = json_getProperty(json, "ThunksDB"); if (!ThunksDB) { continue; } - for (json_t const* Item = json_getChild(ThunksDB); Item != nullptr; Item = json_getSibling(Item)) { - const char *LibraryName = json_getName(Item); + for (const json_t* Item = json_getChild(ThunksDB); Item != nullptr; Item = json_getSibling(Item)) { + const char* LibraryName = json_getName(Item); bool LibraryEnabled = json_getInteger(Item) != 0; // If the library is enabled then find it in the DB auto DBObject = ThunkDB.find(LibraryName); @@ -286,8 +278,8 @@ FileManager::FileManager(FEXCore::Context::Context *ctx) } // Now that we loaded the thunks object, walk through and ensure dependencies are enabled as well - auto ThunkGuestPath = Is64BitMode() ? ThunkGuestLibs() : ThunkGuestLibs32() ; - for (auto const &DBObject : ThunkDB) { + auto ThunkGuestPath = Is64BitMode() ? ThunkGuestLibs() : ThunkGuestLibs32(); + for (const auto& DBObject : ThunkDB) { if (!DBObject.second.Enabled) { continue; } @@ -301,23 +293,23 @@ FileManager::FileManager(FEXCore::Context::Context *ctx) bool Is64BitMode; void SetupOverlay(const ThunkDBObject& DBDepend) { - auto ThunkPath = fextl::fmt::format("{}/{}", ThunkGuestPath, DBDepend.LibraryName); - if (!FHU::Filesystem::Exists(ThunkPath)) { - if (!Is64BitMode) { - // Guest libraries not existing is expected since not all libraries are thunked on 32-bit - return; - } - ERROR_AND_DIE_FMT("Requested thunking via guest library \"{}\" that does not exist", ThunkPath); + auto ThunkPath = fextl::fmt::format("{}/{}", ThunkGuestPath, DBDepend.LibraryName); + if (!FHU::Filesystem::Exists(ThunkPath)) { + if (!Is64BitMode) { + // Guest libraries not existing is expected since not all libraries are thunked on 32-bit + return; } + ERROR_AND_DIE_FMT("Requested thunking via guest library \"{}\" that does not exist", ThunkPath); + } - for (const auto& Overlay : DBDepend.Overlays) { - // Direct full path in guest RootFS to our overlay file - ThunkOverlays.emplace(Overlay, ThunkPath); - } + for (const auto& Overlay : DBDepend.Overlays) { + // Direct full path in guest RootFS to our overlay file + ThunkOverlays.emplace(Overlay, ThunkPath); + } }; - void InsertDependencies(const fextl::unordered_set &Depends) { - for (auto const &Depend : Depends) { + void InsertDependencies(const fextl::unordered_set& Depends) { + for (const auto& Depend : Depends) { auto& DBDepend = ThunkDB.at(Depend); if (DBDepend.Enabled) { continue; @@ -330,7 +322,7 @@ FileManager::FileManager(FEXCore::Context::Context *ctx) InsertDependencies(DBDepend.Depends); } }; - } DBObjectHandler { ThunkOverlays, ThunkDB, ThunkGuestPath, Is64BitMode() }; + } DBObjectHandler {ThunkOverlays, ThunkDB, ThunkGuestPath, Is64BitMode()}; DBObjectHandler.SetupOverlay(DBObject.second); DBObjectHandler.InsertDependencies(DBObject.second.Depends); @@ -379,9 +371,9 @@ FileManager::~FileManager() { close(RootFSFD); } -fextl::string FileManager::GetEmulatedPath(const char *pathname, bool FollowSymlink) { - if (!pathname || // If no pathname - pathname[0] != '/' || // If relative +fextl::string FileManager::GetEmulatedPath(const char* pathname, bool FollowSymlink) { + if (!pathname || // If no pathname + pathname[0] != '/' || // If relative strcmp(pathname, "/") == 0) { // If we are getting root return {}; } @@ -399,13 +391,12 @@ fextl::string FileManager::GetEmulatedPath(const char *pathname, bool FollowSyml fextl::string Path = RootFSPath + pathname; if (FollowSymlink) { char Filename[PATH_MAX]; - while(FEX::HLE::IsSymlink(AT_FDCWD, Path.c_str())) { + while (FEX::HLE::IsSymlink(AT_FDCWD, Path.c_str())) { auto SymlinkSize = FEX::HLE::GetSymlink(AT_FDCWD, Path.c_str(), Filename, PATH_MAX - 1); if (SymlinkSize > 0 && Filename[0] == '/') { Path = RootFSPath; Path += std::string_view(Filename, SymlinkSize); - } - else { + } else { break; } } @@ -413,7 +404,7 @@ fextl::string FileManager::GetEmulatedPath(const char *pathname, bool FollowSyml return Path; } -std::pair FileManager::GetEmulatedFDPath(int dirfd, const char *pathname, bool FollowSymlink, FDPathTmpData &TmpFilename) { +std::pair FileManager::GetEmulatedFDPath(int dirfd, const char* pathname, bool FollowSymlink, FDPathTmpData& TmpFilename) { constexpr auto NoEntry = std::make_pair(-1, nullptr); if (!pathname) { @@ -427,8 +418,8 @@ std::pair FileManager::GetEmulatedFDPath(int dirfd, const char } if (pathname[0] != '/' || // If relative - pathname[1] == 0 || // If we are getting root - dirfd != AT_FDCWD) { // If dirfd isn't special FDCWD + pathname[1] == 0 || // If we are getting root + dirfd != AT_FDCWD) { // If dirfd isn't special FDCWD return NoEntry; } @@ -443,22 +434,22 @@ std::pair FileManager::GetEmulatedFDPath(int dirfd, const char } // Starting subpath is the pathname passed in. - const char *SubPath = pathname; + const char* SubPath = pathname; // Current index for the temporary path to use. - uint32_t CurrentIndex{}; + uint32_t CurrentIndex {}; // The two temporary paths. - const std::array TmpPaths ={ + const std::array TmpPaths = { TmpFilename[0], TmpFilename[1], }; if (FollowSymlink) { // Check if the combination of RootFS FD and subpath with the front '/' stripped off is a symlink. - bool HadAtLeastOne{}; - struct stat Buffer{}; - for(;;) { + bool HadAtLeastOne {}; + struct stat Buffer {}; + for (;;) { // We need to check if the filepath exists and is a symlink. // If the initial filepath doesn't exist then early exit. // If it did exist at some state then trace it all all the way to the final link. @@ -488,15 +479,13 @@ std::pair FileManager::GetEmulatedFDPath(int dirfd, const char CurrentTmp[SymlinkSize] = 0; SubPath = CurrentTmp; CurrentIndex ^= 1; - } - else { + } else { // If the path wasn't a symlink or wasn't absolute. // 1) Break early, returning the previous found result. // 2) If first iteration then we return `pathname`. break; } - } - else { + } else { break; } } @@ -506,7 +495,7 @@ std::pair FileManager::GetEmulatedFDPath(int dirfd, const char return std::make_pair(RootFSFD, &SubPath[1]); } -std::optional FileManager::GetSelf(const char *Pathname) { +std::optional FileManager::GetSelf(const char* Pathname) { if (SupportsProcFSInterpreter) { // FEX doesn't need to track procfs/exe if this is supported. return Pathname; @@ -519,9 +508,7 @@ std::optional FileManager::GetSelf(const char *Pathname) { char PidSelfPath[50]; snprintf(PidSelfPath, 50, "/proc/%i/exe", CurrentPID); - if (strcmp(Pathname, "/proc/self/exe") == 0 || - strcmp(Pathname, "/proc/thread-self/exe") == 0 || - strcmp(Pathname, PidSelfPath) == 0) { + if (strcmp(Pathname, "/proc/self/exe") == 0 || strcmp(Pathname, "/proc/thread-self/exe") == 0 || strcmp(Pathname, PidSelfPath) == 0) { return Filename(); } @@ -547,9 +534,9 @@ static bool ShouldSkipOpenInEmu(int flags) { return false; } -uint64_t FileManager::Open(const char *pathname, int flags, uint32_t mode) { +uint64_t FileManager::Open(const char* pathname, int flags, uint32_t mode) { auto NewPath = GetSelf(pathname); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; int fd = -1; if (!ShouldSkipOpenInEmu(flags)) { @@ -581,92 +568,95 @@ uint64_t FileManager::CloseRange(unsigned int first, unsigned int last, unsigned return ::syscall(SYSCALL_DEF(close_range), first, last, flags); } -uint64_t FileManager::Stat(const char *pathname, void *buf) { +uint64_t FileManager::Stat(const char* pathname, void* buf) { auto NewPath = GetSelf(pathname); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; // Stat follows symlinks FDPathTmpData TmpFilename; auto Path = GetEmulatedFDPath(AT_FDCWD, SelfPath, true, TmpFilename); if (Path.first != -1) { uint64_t Result = ::fstatat(Path.first, Path.second, reinterpret_cast(buf), 0); - if (Result != -1) + if (Result != -1) { return Result; + } } return ::stat(SelfPath, reinterpret_cast(buf)); } -uint64_t FileManager::Lstat(const char *pathname, void *buf) { +uint64_t FileManager::Lstat(const char* pathname, void* buf) { auto NewPath = GetSelf(pathname); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; // lstat does not follow symlinks FDPathTmpData TmpFilename; auto Path = GetEmulatedFDPath(AT_FDCWD, SelfPath, false, TmpFilename); if (Path.first != -1) { uint64_t Result = ::fstatat(Path.first, Path.second, reinterpret_cast(buf), AT_SYMLINK_NOFOLLOW); - if (Result != -1) + if (Result != -1) { return Result; + } } return ::lstat(pathname, reinterpret_cast(buf)); } -uint64_t FileManager::Access(const char *pathname, [[maybe_unused]] int mode) { +uint64_t FileManager::Access(const char* pathname, [[maybe_unused]] int mode) { auto NewPath = GetSelf(pathname); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; // Access follows symlinks FDPathTmpData TmpFilename; auto Path = GetEmulatedFDPath(AT_FDCWD, SelfPath, true, TmpFilename); if (Path.first != -1) { uint64_t Result = ::faccessat(Path.first, Path.second, mode, 0); - if (Result != -1) + if (Result != -1) { return Result; + } } return ::access(SelfPath, mode); } -uint64_t FileManager::FAccessat(int dirfd, const char *pathname, int mode) { +uint64_t FileManager::FAccessat(int dirfd, const char* pathname, int mode) { auto NewPath = GetSelf(pathname); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; FDPathTmpData TmpFilename; auto Path = GetEmulatedFDPath(dirfd, SelfPath, true, TmpFilename); if (Path.first != -1) { uint64_t Result = ::syscall(SYSCALL_DEF(faccessat), Path.first, Path.second, mode); - if (Result != -1) + if (Result != -1) { return Result; + } } return ::syscall(SYS_faccessat, dirfd, SelfPath, mode); } -uint64_t FileManager::FAccessat2(int dirfd, const char *pathname, int mode, int flags) { +uint64_t FileManager::FAccessat2(int dirfd, const char* pathname, int mode, int flags) { auto NewPath = GetSelf(pathname); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; FDPathTmpData TmpFilename; auto Path = GetEmulatedFDPath(dirfd, SelfPath, (flags & AT_SYMLINK_NOFOLLOW) == 0, TmpFilename); if (Path.first != -1) { uint64_t Result = ::syscall(SYSCALL_DEF(faccessat2), Path.first, Path.second, mode, flags); - if (Result != -1) + if (Result != -1) { return Result; + } } return ::syscall(SYSCALL_DEF(faccessat2), dirfd, SelfPath, mode, flags); } -uint64_t FileManager::Readlink(const char *pathname, char *buf, size_t bufsiz) { +uint64_t FileManager::Readlink(const char* pathname, char* buf, size_t bufsiz) { if (!SupportsProcFSInterpreter) { // calculate the non-self link to exe // Some executables do getpid, stat("/proc/$pid/exe") char PidSelfPath[50]; snprintf(PidSelfPath, 50, "/proc/%i/exe", CurrentPID); - if (strcmp(pathname, "/proc/self/exe") == 0 || - strcmp(pathname, "/proc/thread-self/exe") == 0 || - strcmp(pathname, PidSelfPath) == 0) { + if (strcmp(pathname, "/proc/self/exe") == 0 || strcmp(pathname, "/proc/thread-self/exe") == 0 || strcmp(pathname, PidSelfPath) == 0) { auto App = Filename(); strncpy(buf, App.c_str(), bufsiz); return std::min(bufsiz, App.size()); @@ -677,11 +667,11 @@ uint64_t FileManager::Readlink(const char *pathname, char *buf, size_t bufsiz) { auto Path = GetEmulatedFDPath(AT_FDCWD, pathname, false, TmpFilename); if (Path.first != -1) { uint64_t Result = ::readlinkat(Path.first, Path.second, buf, bufsiz); - if (Result != -1) + if (Result != -1) { return Result; + } - if (Result == -1 && - errno == EINVAL) { + if (Result == -1 && errno == EINVAL) { // This means that the file wasn't a symlink // This is expected behaviour return -errno; @@ -691,30 +681,31 @@ uint64_t FileManager::Readlink(const char *pathname, char *buf, size_t bufsiz) { return ::readlink(pathname, buf, bufsiz); } -uint64_t FileManager::Chmod(const char *pathname, mode_t mode) { +uint64_t FileManager::Chmod(const char* pathname, mode_t mode) { auto NewPath = GetSelf(pathname); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; FDPathTmpData TmpFilename; auto Path = GetEmulatedFDPath(AT_FDCWD, SelfPath, false, TmpFilename); if (Path.first != -1) { uint64_t Result = ::fchmodat(Path.first, Path.second, mode, 0); - if (Result != -1) + if (Result != -1) { return Result; + } } return ::chmod(SelfPath, mode); } -uint64_t FileManager::Readlinkat(int dirfd, const char *pathname, char *buf, size_t bufsiz) { +uint64_t FileManager::Readlinkat(int dirfd, const char* pathname, char* buf, size_t bufsiz) { // calculate the non-self link to exe // Some executables do getpid, stat("/proc/$pid/exe") // Can't use `GetSelf` directly here since readlink{at,} returns EINVAL if it isn't a symlink // Self is always a symlink and isn't expected to fail - fextl::string Path{}; + fextl::string Path {}; if (((pathname && pathname[0] != '/') || // If pathname exists then it must not be absolute - !pathname) && - dirfd != AT_FDCWD) { + !pathname) && + dirfd != AT_FDCWD) { // Passed in a dirfd that isn't magic FDCWD // We need to get the path from the fd now char Tmp[PATH_MAX] = ""; @@ -730,12 +721,10 @@ uint64_t FileManager::Readlinkat(int dirfd, const char *pathname, char *buf, siz } Path += pathname; } - } - else { + } else { if (!pathname || strlen(pathname) == 0) { return -1; - } - else if (pathname) { + } else if (pathname) { Path = pathname; } } @@ -744,9 +733,7 @@ uint64_t FileManager::Readlinkat(int dirfd, const char *pathname, char *buf, siz char PidSelfPath[50]; snprintf(PidSelfPath, 50, "/proc/%i/exe", CurrentPID); - if (Path == "/proc/self/exe" || - Path == "/proc/thread-self/exe" || - Path == PidSelfPath) { + if (Path == "/proc/self/exe" || Path == "/proc/thread-self/exe" || Path == PidSelfPath) { auto App = Filename(); strncpy(buf, App.c_str(), bufsiz); return std::min(bufsiz, App.size()); @@ -757,11 +744,11 @@ uint64_t FileManager::Readlinkat(int dirfd, const char *pathname, char *buf, siz auto NewPath = GetEmulatedFDPath(dirfd, pathname, false, TmpFilename); if (NewPath.first != -1) { uint64_t Result = ::readlinkat(NewPath.first, NewPath.second, buf, bufsiz); - if (Result != -1) + if (Result != -1) { return Result; + } - if (Result == -1 && - errno == EINVAL) { + if (Result == -1 && errno == EINVAL) { // This means that the file wasn't a symlink // This is expected behaviour return -errno; @@ -771,9 +758,9 @@ uint64_t FileManager::Readlinkat(int dirfd, const char *pathname, char *buf, siz return ::readlinkat(dirfd, pathname, buf, bufsiz); } -uint64_t FileManager::Openat([[maybe_unused]] int dirfs, const char *pathname, int flags, uint32_t mode) { +uint64_t FileManager::Openat([[maybe_unused]] int dirfs, const char* pathname, int flags, uint32_t mode) { auto NewPath = GetSelf(pathname); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; int32_t fd = -1; @@ -795,9 +782,9 @@ uint64_t FileManager::Openat([[maybe_unused]] int dirfs, const char *pathname, i return fd; } -uint64_t FileManager::Openat2(int dirfs, const char *pathname, FEX::HLE::open_how *how, size_t usize) { +uint64_t FileManager::Openat2(int dirfs, const char* pathname, FEX::HLE::open_how* how, size_t usize) { auto NewPath = GetSelf(pathname); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; int32_t fd = -1; @@ -817,50 +804,52 @@ uint64_t FileManager::Openat2(int dirfs, const char *pathname, FEX::HLE::open_ho } return fd; - } -uint64_t FileManager::Statx(int dirfd, const char *pathname, int flags, uint32_t mask, struct statx *statxbuf) { +uint64_t FileManager::Statx(int dirfd, const char* pathname, int flags, uint32_t mask, struct statx* statxbuf) { auto NewPath = GetSelf(pathname); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; FDPathTmpData TmpFilename; auto Path = GetEmulatedFDPath(dirfd, SelfPath, (flags & AT_SYMLINK_NOFOLLOW) == 0, TmpFilename); if (Path.first != -1) { uint64_t Result = FHU::Syscalls::statx(Path.first, Path.second, flags, mask, statxbuf); - if (Result != -1) + if (Result != -1) { return Result; + } } return FHU::Syscalls::statx(dirfd, SelfPath, flags, mask, statxbuf); } -uint64_t FileManager::Mknod(const char *pathname, mode_t mode, dev_t dev) { +uint64_t FileManager::Mknod(const char* pathname, mode_t mode, dev_t dev) { auto NewPath = GetSelf(pathname); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; FDPathTmpData TmpFilename; auto Path = GetEmulatedFDPath(AT_FDCWD, SelfPath, false, TmpFilename); if (Path.first != -1) { uint64_t Result = ::mknodat(Path.first, Path.second, mode, dev); - if (Result != -1) + if (Result != -1) { return Result; + } } return ::mknod(SelfPath, mode, dev); } -uint64_t FileManager::Statfs(const char *path, void *buf) { +uint64_t FileManager::Statfs(const char* path, void* buf) { auto Path = GetEmulatedPath(path); if (!Path.empty()) { uint64_t Result = ::statfs(Path.c_str(), reinterpret_cast(buf)); - if (Result != -1) + if (Result != -1) { return Result; + } } return ::statfs(path, reinterpret_cast(buf)); } -uint64_t FileManager::NewFSStatAt(int dirfd, const char *pathname, struct stat *buf, int flag) { +uint64_t FileManager::NewFSStatAt(int dirfd, const char* pathname, struct stat* buf, int flag) { auto NewPath = GetSelf(pathname); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; FDPathTmpData TmpFilename; auto Path = GetEmulatedFDPath(dirfd, SelfPath, (flag & AT_SYMLINK_NOFOLLOW) == 0, TmpFilename); @@ -873,9 +862,9 @@ uint64_t FileManager::NewFSStatAt(int dirfd, const char *pathname, struct stat * return ::fstatat(dirfd, SelfPath, buf, flag); } -uint64_t FileManager::NewFSStatAt64(int dirfd, const char *pathname, struct stat64 *buf, int flag) { +uint64_t FileManager::NewFSStatAt64(int dirfd, const char* pathname, struct stat64* buf, int flag) { auto NewPath = GetSelf(pathname); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; FDPathTmpData TmpFilename; auto Path = GetEmulatedFDPath(dirfd, SelfPath, (flag & AT_SYMLINK_NOFOLLOW) == 0, TmpFilename); @@ -888,9 +877,9 @@ uint64_t FileManager::NewFSStatAt64(int dirfd, const char *pathname, struct stat return ::fstatat64(dirfd, SelfPath, buf, flag); } -uint64_t FileManager::Setxattr(const char *path, const char *name, const void *value, size_t size, int flags) { +uint64_t FileManager::Setxattr(const char* path, const char* name, const void* value, size_t size, int flags) { auto NewPath = GetSelf(path); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; auto Path = GetEmulatedPath(SelfPath, true); if (!Path.empty()) { @@ -903,9 +892,9 @@ uint64_t FileManager::Setxattr(const char *path, const char *name, const void *v return ::setxattr(SelfPath, name, value, size, flags); } -uint64_t FileManager::LSetxattr(const char *path, const char *name, const void *value, size_t size, int flags) { +uint64_t FileManager::LSetxattr(const char* path, const char* name, const void* value, size_t size, int flags) { auto NewPath = GetSelf(path); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; auto Path = GetEmulatedPath(SelfPath, false); if (!Path.empty()) { @@ -918,9 +907,9 @@ uint64_t FileManager::LSetxattr(const char *path, const char *name, const void * return ::lsetxattr(SelfPath, name, value, size, flags); } -uint64_t FileManager::Getxattr(const char *path, const char *name, void *value, size_t size) { +uint64_t FileManager::Getxattr(const char* path, const char* name, void* value, size_t size) { auto NewPath = GetSelf(path); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; auto Path = GetEmulatedPath(SelfPath, true); if (!Path.empty()) { @@ -933,9 +922,9 @@ uint64_t FileManager::Getxattr(const char *path, const char *name, void *value, return ::getxattr(SelfPath, name, value, size); } -uint64_t FileManager::LGetxattr(const char *path, const char *name, void *value, size_t size) { +uint64_t FileManager::LGetxattr(const char* path, const char* name, void* value, size_t size) { auto NewPath = GetSelf(path); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; auto Path = GetEmulatedPath(SelfPath, false); if (!Path.empty()) { @@ -948,9 +937,9 @@ uint64_t FileManager::LGetxattr(const char *path, const char *name, void *value, return ::lgetxattr(SelfPath, name, value, size); } -uint64_t FileManager::Listxattr(const char *path, char *list, size_t size) { +uint64_t FileManager::Listxattr(const char* path, char* list, size_t size) { auto NewPath = GetSelf(path); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; auto Path = GetEmulatedPath(SelfPath, true); if (!Path.empty()) { @@ -963,9 +952,9 @@ uint64_t FileManager::Listxattr(const char *path, char *list, size_t size) { return ::listxattr(SelfPath, list, size); } -uint64_t FileManager::LListxattr(const char *path, char *list, size_t size) { +uint64_t FileManager::LListxattr(const char* path, char* list, size_t size) { auto NewPath = GetSelf(path); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; auto Path = GetEmulatedPath(SelfPath, false); if (!Path.empty()) { @@ -978,9 +967,9 @@ uint64_t FileManager::LListxattr(const char *path, char *list, size_t size) { return ::llistxattr(SelfPath, list, size); } -uint64_t FileManager::Removexattr(const char *path, const char *name) { +uint64_t FileManager::Removexattr(const char* path, const char* name) { auto NewPath = GetSelf(path); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; auto Path = GetEmulatedPath(SelfPath, true); if (!Path.empty()) { @@ -993,9 +982,9 @@ uint64_t FileManager::Removexattr(const char *path, const char *name) { return ::removexattr(SelfPath, name); } -uint64_t FileManager::LRemovexattr(const char *path, const char *name) { +uint64_t FileManager::LRemovexattr(const char* path, const char* name) { auto NewPath = GetSelf(path); - const char *SelfPath = NewPath ? NewPath->data() : nullptr; + const char* SelfPath = NewPath ? NewPath->data() : nullptr; auto Path = GetEmulatedPath(SelfPath, false); if (!Path.empty()) { @@ -1008,4 +997,4 @@ uint64_t FileManager::LRemovexattr(const char *path, const char *name) { return ::lremovexattr(SelfPath, name); } -} +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/GdbServer.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/GdbServer.cpp index 521af25f41..e732bb0b99 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/GdbServer.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/GdbServer.cpp @@ -57,57 +57,35 @@ desc: Provides a gdb interface to the guest state #include "LinuxSyscalls/GdbServer.h" -namespace FEX -{ - -constexpr std::array FlagNames = { - "CF", - "", - "PF", - "", - "AF", - "", - "ZF", - "SF", - "TF", - "IF", - "DF", - "OF", - "IOPL", - "", - "NT", - "", - "RF", - "VM", - "AC", - "VIF", - "VIP", - "ID", +namespace FEX { + +constexpr std::array< const std::string_view, 22> FlagNames = { + "CF", "", "PF", "", "AF", "", "ZF", "SF", "TF", "IF", "DF", "OF", "IOPL", "", "NT", "", "RF", "VM", "AC", "VIF", "VIP", "ID", }; -static std::string_view const& GetFlagName(unsigned Flag) { +static const std::string_view& GetFlagName(unsigned Flag) { return FlagNames[Flag]; } -static std::string_view const GetGRegName(unsigned Reg) { +static const std::string_view GetGRegName(unsigned Reg) { switch (Reg) { - case FEXCore::X86State::REG_RAX: return "rax"; - case FEXCore::X86State::REG_RBX: return "rbx"; - case FEXCore::X86State::REG_RCX: return "rcx"; - case FEXCore::X86State::REG_RDX: return "rdx"; - case FEXCore::X86State::REG_RSP: return "rsp"; - case FEXCore::X86State::REG_RBP: return "rbp"; - case FEXCore::X86State::REG_RSI: return "rsi"; - case FEXCore::X86State::REG_RDI: return "rdi"; - case FEXCore::X86State::REG_R8: return "r8"; - case FEXCore::X86State::REG_R9: return "r9"; - case FEXCore::X86State::REG_R10: return "r10"; - case FEXCore::X86State::REG_R11: return "r11"; - case FEXCore::X86State::REG_R12: return "r12"; - case FEXCore::X86State::REG_R13: return "r13"; - case FEXCore::X86State::REG_R14: return "r14"; - case FEXCore::X86State::REG_R15: return "r15"; - default: FEX_UNREACHABLE; + case FEXCore::X86State::REG_RAX: return "rax"; + case FEXCore::X86State::REG_RBX: return "rbx"; + case FEXCore::X86State::REG_RCX: return "rcx"; + case FEXCore::X86State::REG_RDX: return "rdx"; + case FEXCore::X86State::REG_RSP: return "rsp"; + case FEXCore::X86State::REG_RBP: return "rbp"; + case FEXCore::X86State::REG_RSI: return "rsi"; + case FEXCore::X86State::REG_RDI: return "rdi"; + case FEXCore::X86State::REG_R8: return "r8"; + case FEXCore::X86State::REG_R9: return "r9"; + case FEXCore::X86State::REG_R10: return "r10"; + case FEXCore::X86State::REG_R11: return "r11"; + case FEXCore::X86State::REG_R12: return "r12"; + case FEXCore::X86State::REG_R13: return "r13"; + case FEXCore::X86State::REG_R14: return "r14"; + case FEXCore::X86State::REG_R15: return "r15"; + default: FEX_UNREACHABLE; } } @@ -136,7 +114,7 @@ GdbServer::~GdbServer() { } } -GdbServer::GdbServer(FEXCore::Context::Context *ctx, FEX::HLE::SignalDelegator *SignalDelegation, FEX::HLE::SyscallHandler *const SyscallHandler) +GdbServer::GdbServer(FEXCore::Context::Context* ctx, FEX::HLE::SignalDelegator* SignalDelegation, FEX::HLE::SyscallHandler* const SyscallHandler) : CTX(ctx) , SyscallHandler {SyscallHandler} { // Pass all signals by default @@ -155,7 +133,9 @@ GdbServer::GdbServer(FEXCore::Context::Context *ctx, FEX::HLE::SignalDelegator * // This is a total hack as there is currently no way to resume once hitting a segfault // But it's semi-useful for debugging. for (uint32_t Signal = 0; Signal <= FEX::HLE::SignalDelegator::MAX_SIGNALS; ++Signal) { - SignalDelegation->RegisterHostSignalHandler(Signal, [this] (FEXCore::Core::InternalThreadState *Thread, int Signal, void *info, void *ucontext) { + SignalDelegation->RegisterHostSignalHandler( + Signal, + [this](FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) { if (PassSignals[Signal]) { // Pass signal to the guest return false; @@ -167,40 +147,42 @@ GdbServer::GdbServer(FEXCore::Context::Context *ctx, FEX::HLE::SignalDelegator * WaitForThreadWakeup(); return true; - }, true); + }, + true); } StartThread(); } -static int calculateChecksum(const fextl::string &packet) { +static int calculateChecksum(const fextl::string& packet) { unsigned char checksum = 0; - for (const char &c : packet) { + for (const char& c : packet) { checksum += c; } return checksum; } -static fextl::string hexstring(fextl::istringstream &ss, int delm) { +static fextl::string hexstring(fextl::istringstream& ss, int delm) { fextl::string ret; char hexString[3] = {0, 0, 0}; while (ss.peek() != delm) { ss.read(hexString, 2); int c = std::strtoul(hexString, nullptr, 16); - ret.push_back((char) c); + ret.push_back((char)c); } - if (delm != -1) + if (delm != -1) { ss.get(); + } return ret; } -static fextl::string encodeHex(const unsigned char *data, size_t length) { +static fextl::string encodeHex(const unsigned char* data, size_t length) { fextl::ostringstream ss; - for (size_t i=0; i < length; i++) { + for (size_t i = 0; i < length; i++) { ss << std::setfill('0') << std::setw(2) << std::hex << int(data[i]); } return ss.str(); @@ -221,8 +203,8 @@ static fextl::string getThreadName(uint32_t ThreadID) { // Takes a serial stream and reads a single packet // Un-escapes chars, checks the checksum and request a retransmit if it fails. // Once the checksum is validated, it acknowledges and returns the packet in a string -fextl::string GdbServer::ReadPacket(std::iostream &stream) { - fextl::string packet{}; +fextl::string GdbServer::ReadPacket(std::iostream& stream) { + fextl::string packet {}; // The GDB "Remote Serial Protocal" was originally 7bit clean for use on serial ports. // Binary data is useally hex encoded. However some later extentions just put @@ -234,38 +216,37 @@ fextl::string GdbServer::ReadPacket(std::iostream &stream) { // The checksum is a single unsigned byte sum of the data, hex encoded. int c; - while ((c = stream.get()) > 0 ) { - switch(c) { - case '$': // start of packet - if (packet.size() != 0) - LogMan::Msg::EFmt("Dropping unexpected data: \"{}\"", packet); - - // clear any existing data, must have been a mistake. - packet = fextl::string(); - break; - case '}': // escape char - { - char escaped; - stream >> escaped; - packet.push_back(escaped ^ 0x20); - break; + while ((c = stream.get()) > 0) { + switch (c) { + case '$': // start of packet + if (packet.size() != 0) { + LogMan::Msg::EFmt("Dropping unexpected data: \"{}\"", packet); } - case '#': // end of packet - { - char hexString[3] = {0, 0, 0}; - stream.read(hexString, 2); - int expected_checksum = std::strtoul(hexString, nullptr, 16); - - if (calculateChecksum(packet) == expected_checksum) { - return packet; - } else { - LogMan::Msg::EFmt("Received Invalid Packet: ${}#{:02x}", packet, expected_checksum); - } - break; + + // clear any existing data, must have been a mistake. + packet = fextl::string(); + break; + case '}': // escape char + { + char escaped; + stream >> escaped; + packet.push_back(escaped ^ 0x20); + break; + } + case '#': // end of packet + { + char hexString[3] = {0, 0, 0}; + stream.read(hexString, 2); + int expected_checksum = std::strtoul(hexString, nullptr, 16); + + if (calculateChecksum(packet) == expected_checksum) { + return packet; + } else { + LogMan::Msg::EFmt("Received Invalid Packet: ${}#{:02x}", packet, expected_checksum); } - default: - packet.push_back((char) c); - break; + break; + } + default: packet.push_back((char)c); break; } } @@ -275,41 +256,38 @@ fextl::string GdbServer::ReadPacket(std::iostream &stream) { static fextl::string escapePacket(const fextl::string& packet) { fextl::ostringstream ss; - for(const auto &c : packet) { + for (const auto& c : packet) { switch (c) { - case '$': - case '#': - case '*': - case '}': { - char escaped = c ^ 0x20; - ss << '}' << (escaped); - break; - } - default: - ss << c; - break; + case '$': + case '#': + case '*': + case '}': { + char escaped = c ^ 0x20; + ss << '}' << (escaped); + break; + } + default: ss << c; break; } } return ss.str(); } -void GdbServer::SendPacket(std::ostream &stream, const fextl::string& packet) { +void GdbServer::SendPacket(std::ostream& stream, const fextl::string& packet) { const auto escaped = escapePacket(packet); const auto str = fextl::fmt::format("${}#{:02x}", escaped, calculateChecksum(escaped)); stream << str << std::flush; } -void GdbServer::SendACK(std::ostream &stream, bool NACK) { +void GdbServer::SendACK(std::ostream& stream, bool NACK) { if (NoAckMode) { return; } if (NACK) { stream << "-" << std::flush; - } - else { + } else { stream << "+" << std::flush; } @@ -337,14 +315,14 @@ struct FEX_PACKED GDBContextDefinition { }; fextl::string GdbServer::readRegs() { - GDBContextDefinition GDB{}; - FEXCore::Core::CPUState state{}; + GDBContextDefinition GDB {}; + FEXCore::Core::CPUState state {}; auto Threads = SyscallHandler->TM.GetThreads(); - FEXCore::Core::InternalThreadState *CurrentThread { Threads->at(0) }; + FEXCore::Core::InternalThreadState* CurrentThread {Threads->at(0)}; bool Found = false; - for (auto &Thread : *Threads) { + for (auto& Thread : *Threads) { if (Thread->ThreadManager.GetTID() != CurrentDebuggingThread) { continue; } @@ -372,7 +350,7 @@ fextl::string GdbServer::readRegs() { // Currently unsupported GDB.fctrl = 0x37F; - GDB.fstat = static_cast(state.flags[FEXCore::X86State::X87FLAG_TOP_LOC]) << 11; + GDB.fstat = static_cast(state.flags[FEXCore::X86State::X87FLAG_TOP_LOC]) << 11; GDB.fstat |= static_cast(state.flags[FEXCore::X86State::X87FLAG_C0_LOC]) << 8; GDB.fstat |= static_cast(state.flags[FEXCore::X86State::X87FLAG_C1_LOC]) << 9; GDB.fstat |= static_cast(state.flags[FEXCore::X86State::X87FLAG_C2_LOC]) << 10; @@ -380,7 +358,7 @@ fextl::string GdbServer::readRegs() { memcpy(&GDB.xmm[0], &state.xmm.avx.data[0], sizeof(GDB.xmm)); - return encodeHex((unsigned char *)&GDB, sizeof(GDBContextDefinition)); + return encodeHex((unsigned char*)&GDB, sizeof(GDBContextDefinition)); } GdbServer::HandledPacketType GdbServer::readReg(const fextl::string& packet) { @@ -389,13 +367,13 @@ GdbServer::HandledPacketType GdbServer::readReg(const fextl::string& packet) { ss.get(); // Drop first letter ss >> std::hex >> addr; - FEXCore::Core::CPUState state{}; + FEXCore::Core::CPUState state {}; auto Threads = SyscallHandler->TM.GetThreads(); - FEXCore::Core::InternalThreadState *CurrentThread { Threads->at(0) }; + FEXCore::Core::InternalThreadState* CurrentThread {Threads->at(0)}; bool Found = false; - for (auto &Thread : *Threads) { + for (auto& Thread : *Threads) { if (Thread->ThreadManager.GetTID() != CurrentDebuggingThread) { continue; } @@ -411,55 +389,42 @@ GdbServer::HandledPacketType GdbServer::readReg(const fextl::string& packet) { } - if (addr >= offsetof(GDBContextDefinition, gregs[0]) && - addr < offsetof(GDBContextDefinition, gregs[16])) { - return {encodeHex((unsigned char *)(&state.gregs[addr / sizeof(uint64_t)]), sizeof(uint64_t)), HandledPacketType::TYPE_ACK}; - } - else if (addr == offsetof(GDBContextDefinition, rip)) { - return {encodeHex((unsigned char *)(&state.rip), sizeof(uint64_t)), HandledPacketType::TYPE_ACK}; - } - else if (addr == offsetof(GDBContextDefinition, eflags)) { + if (addr >= offsetof(GDBContextDefinition, gregs[0]) && addr < offsetof(GDBContextDefinition, gregs[16])) { + return {encodeHex((unsigned char*)(&state.gregs[addr / sizeof(uint64_t)]), sizeof(uint64_t)), HandledPacketType::TYPE_ACK}; + } else if (addr == offsetof(GDBContextDefinition, rip)) { + return {encodeHex((unsigned char*)(&state.rip), sizeof(uint64_t)), HandledPacketType::TYPE_ACK}; + } else if (addr == offsetof(GDBContextDefinition, eflags)) { uint32_t eflags = CTX->ReconstructCompactedEFLAGS(CurrentThread, false, nullptr, 0); - return {encodeHex((unsigned char *)(&eflags), sizeof(uint32_t)), HandledPacketType::TYPE_ACK}; - } - else if (addr >= offsetof(GDBContextDefinition, cs) && - addr < offsetof(GDBContextDefinition, mm[0])) { - uint32_t Empty{}; - return {encodeHex((unsigned char *)(&Empty), sizeof(uint32_t)), HandledPacketType::TYPE_ACK}; - } - else if (addr >= offsetof(GDBContextDefinition, mm[0]) && - addr < offsetof(GDBContextDefinition, mm[8])) { - return {encodeHex((unsigned char *)(&state.mm[(addr - offsetof(GDBContextDefinition, mm[0])) / sizeof(X80Float)]), sizeof(X80Float)), HandledPacketType::TYPE_ACK}; - } - else if (addr == offsetof(GDBContextDefinition, fctrl)) { + return {encodeHex((unsigned char*)(&eflags), sizeof(uint32_t)), HandledPacketType::TYPE_ACK}; + } else if (addr >= offsetof(GDBContextDefinition, cs) && addr < offsetof(GDBContextDefinition, mm[0])) { + uint32_t Empty {}; + return {encodeHex((unsigned char*)(&Empty), sizeof(uint32_t)), HandledPacketType::TYPE_ACK}; + } else if (addr >= offsetof(GDBContextDefinition, mm[0]) && addr < offsetof(GDBContextDefinition, mm[8])) { + return {encodeHex((unsigned char*)(&state.mm[(addr - offsetof(GDBContextDefinition, mm[0])) / sizeof(X80Float)]), sizeof(X80Float)), + HandledPacketType::TYPE_ACK}; + } else if (addr == offsetof(GDBContextDefinition, fctrl)) { // XXX: We don't support this yet uint32_t FCW = 0x37F; - return {encodeHex((unsigned char *)(&FCW), sizeof(uint32_t)), HandledPacketType::TYPE_ACK}; - } - else if (addr == offsetof(GDBContextDefinition, fstat)) { - uint32_t FSW{}; + return {encodeHex((unsigned char*)(&FCW), sizeof(uint32_t)), HandledPacketType::TYPE_ACK}; + } else if (addr == offsetof(GDBContextDefinition, fstat)) { + uint32_t FSW {}; FSW = static_cast(state.flags[FEXCore::X86State::X87FLAG_TOP_LOC]) << 11; FSW |= static_cast(state.flags[FEXCore::X86State::X87FLAG_C0_LOC]) << 8; FSW |= static_cast(state.flags[FEXCore::X86State::X87FLAG_C1_LOC]) << 9; FSW |= static_cast(state.flags[FEXCore::X86State::X87FLAG_C2_LOC]) << 10; FSW |= static_cast(state.flags[FEXCore::X86State::X87FLAG_C3_LOC]) << 14; - return {encodeHex((unsigned char *)(&FSW), sizeof(uint32_t)), HandledPacketType::TYPE_ACK}; - } - else if (addr >= offsetof(GDBContextDefinition, dummies[0]) && - addr < offsetof(GDBContextDefinition, dummies[6])) { - uint32_t Empty{}; - return {encodeHex((unsigned char *)(&Empty), sizeof(uint32_t)), HandledPacketType::TYPE_ACK}; - } - else if (addr >= offsetof(GDBContextDefinition, xmm[0][0]) && - addr < offsetof(GDBContextDefinition, xmm[16][0])) { + return {encodeHex((unsigned char*)(&FSW), sizeof(uint32_t)), HandledPacketType::TYPE_ACK}; + } else if (addr >= offsetof(GDBContextDefinition, dummies[0]) && addr < offsetof(GDBContextDefinition, dummies[6])) { + uint32_t Empty {}; + return {encodeHex((unsigned char*)(&Empty), sizeof(uint32_t)), HandledPacketType::TYPE_ACK}; + } else if (addr >= offsetof(GDBContextDefinition, xmm[0][0]) && addr < offsetof(GDBContextDefinition, xmm[16][0])) { const auto XmmIndex = (addr - offsetof(GDBContextDefinition, xmm[0][0])) / FEXCore::Core::CPUState::XMM_AVX_REG_SIZE; - const auto *Data = (unsigned char *)&state.xmm.avx.data[XmmIndex][0]; + const auto* Data = (unsigned char*)&state.xmm.avx.data[XmmIndex][0]; return {encodeHex(Data, FEXCore::Core::CPUState::XMM_AVX_REG_SIZE), HandledPacketType::TYPE_ACK}; - } - else if (addr == offsetof(GDBContextDefinition, mxcsr)) { - uint32_t Empty{}; - return {encodeHex((unsigned char *)(&Empty), sizeof(uint32_t)), HandledPacketType::TYPE_ACK}; + } else if (addr == offsetof(GDBContextDefinition, mxcsr)) { + uint32_t Empty {}; + return {encodeHex((unsigned char*)(&Empty), sizeof(uint32_t)), HandledPacketType::TYPE_ACK}; } LogMan::Msg::EFmt("Unknown GDB register 0x{:x}", addr); @@ -474,69 +439,69 @@ fextl::string buildTargetXML() { xml << "\n"; xml << "i386:x86-64\n"; xml << "GNU/Linux\n"; - xml << "\n"; - - xml << "\n"; - // flags register - for(int i = 0; i < 22; i++) { - auto name = GetFlagName(i); - if (name.empty()) { - continue; - } - xml << "\t\n"; - } - xml << "\n"; + xml << "\n"; - int32_t TargetSize{}; - auto reg = [&](std::string_view name, std::string_view type, int size) { - TargetSize += size; - xml << "" << std::endl; - }; + xml << "\n"; + // flags register + for (int i = 0; i < 22; i++) { + auto name = GetFlagName(i); + if (name.empty()) { + continue; + } + xml << "\t\n"; + } + xml << "\n"; - // Register ordering. - // We want to just memcpy our x86 state to gdb, so we tell it the ordering. + int32_t TargetSize {}; + auto reg = [&](std::string_view name, std::string_view type, int size) { + TargetSize += size; + xml << "" << std::endl; + }; - // GPRs - for (uint32_t i = 0; i < FEXCore::Core::CPUState::NUM_GPRS; i++) { - reg(GetGRegName(i), "int64", 64); - } + // Register ordering. + // We want to just memcpy our x86 state to gdb, so we tell it the ordering. - reg("rip", "code_ptr", 64); + // GPRs + for (uint32_t i = 0; i < FEXCore::Core::CPUState::NUM_GPRS; i++) { + reg(GetGRegName(i), "int64", 64); + } - reg("eflags", "fex_eflags", 32); + reg("rip", "code_ptr", 64); - // Fake registers which GDB requires, but we don't support; - // We stick them past the end of our cpu state. + reg("eflags", "fex_eflags", 32); - // non-userspace segment registers - reg("cs", "int32", 32); - reg("ss", "int32", 32); - reg("ds", "int32", 32); - reg("es", "int32", 32); + // Fake registers which GDB requires, but we don't support; + // We stick them past the end of our cpu state. - reg("fs", "int32", 32); - reg("gs", "int32", 32); + // non-userspace segment registers + reg("cs", "int32", 32); + reg("ss", "int32", 32); + reg("ds", "int32", 32); + reg("es", "int32", 32); - // x87 stack - for (int i=0; i < 8; i++) { - reg(fextl::fmt::format("st{}", i), "i387_ext", 80); - } + reg("fs", "int32", 32); + reg("gs", "int32", 32); - // x87 control - reg("fctrl", "int32", 32); - reg("fstat", "int32", 32); - reg("ftag", "int32", 32); - reg("fiseg", "int32", 32); - reg("fioff", "int32", 32); - reg("foseg", "int32", 32); - reg("fooff", "int32", 32); - reg("fop", "int32", 32); - - - xml << "\n"; - xml << "\n"; - xml << - R"( + // x87 stack + for (int i = 0; i < 8; i++) { + reg(fextl::fmt::format("st{}", i), "i387_ext", 80); + } + + // x87 control + reg("fctrl", "int32", 32); + reg("fstat", "int32", 32); + reg("ftag", "int32", 32); + reg("fiseg", "int32", 32); + reg("fioff", "int32", 32); + reg("foseg", "int32", 32); + reg("fooff", "int32", 32); + reg("fop", "int32", 32); + + + xml << "\n"; + xml << "\n"; + xml << + R"( @@ -553,18 +518,18 @@ fextl::string buildTargetXML() { )"; - // SSE regs - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { - reg(fextl::fmt::format("xmm{}", i), "vec128", 128); - } + // SSE regs + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { + reg(fextl::fmt::format("xmm{}", i), "vec128", 128); + } - reg("mxcsr", "int", 32); + reg("mxcsr", "int", 32); - xml << "\n"; + xml << "\n"; - xml << ""; - xml << - R"( + xml << ""; + xml << + R"( @@ -580,10 +545,10 @@ fextl::string buildTargetXML() { )"; - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { - reg(fmt::format("ymm{}h", i), "vec128", 128); - } - xml << "\n"; + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { + reg(fmt::format("ymm{}h", i), "vec128", 128); + } + xml << "\n"; xml << ""; xml << std::flush; @@ -627,7 +592,7 @@ void GdbServer::buildLibraryMap() { fextl::map> SegmentMaps; // 7ff5dd6d2000-7ff5dd6d3000 rw-p 0000a000 103:0b 1881447 /usr/lib/x86_64-linux-gnu/libnss_compat.so.2 - fextl::string const &RuntimeExecutable = Filename(); + const fextl::string& RuntimeExecutable = Filename(); while (std::getline(MapsStream, Line)) { auto ss = fextl::istringstream(Line); fextl::string Tmp; @@ -649,7 +614,7 @@ void GdbServer::buildLibraryMap() { Name = FEXCore::StringUtils::Trim(Name); - struct stat sb{}; + struct stat sb {}; if (stat(Name.c_str(), &sb) != -1) { if (S_ISCHR(sb.st_mode)) { // Skip this special file type @@ -669,9 +634,9 @@ void GdbServer::buildLibraryMap() { } xml << "\n"; - for (auto &Array : SegmentMaps) { + for (auto& Array : SegmentMaps) { xml << "\t\n"; - for (auto &Data : Array.second) { + for (auto& Data : Array.second) { xml << "\t\t\n"; } xml << "\t\n"; @@ -683,7 +648,7 @@ void GdbServer::buildLibraryMap() { LibraryMapChanged = false; } -GdbServer::HandledPacketType GdbServer::handleXfer(const fextl::string &packet) { +GdbServer::HandledPacketType GdbServer::handleXfer(const fextl::string& packet) { fextl::string object; fextl::string rw; fextl::string annex; @@ -703,8 +668,7 @@ GdbServer::HandledPacketType GdbServer::handleXfer(const fextl::string &packet) std::getline(ss, annex, ':'); if (annex == "") { annex_pid = getpid(); - } - else { + } else { auto ss_pid = fextl::istringstream(annex); ss_pid >> std::hex >> annex_pid; } @@ -713,18 +677,22 @@ GdbServer::HandledPacketType GdbServer::handleXfer(const fextl::string &packet) ss >> std::hex >> length; // Bail on any errors - if (ss.fail() || !ss.eof() || expectXfer != "qXfer" || rw != "read" || expectComma != ',') + if (ss.fail() || !ss.eof() || expectXfer != "qXfer" || rw != "read" || expectComma != ',') { return {"E00", HandledPacketType::TYPE_ACK}; + } } // Lambda to correctly encode any reply auto encode = [&](fextl::string data) -> fextl::string { - if (offset == data.size()) + if (offset == data.size()) { return "l"; - if (offset >= data.size()) + } + if (offset >= data.size()) { return "E34"; // ERANGE - if ((data.size() - offset) > length) + } + if ((data.size() - offset) > length) { return "m" + data.substr(offset, length); + } return "l" + data.substr(offset); }; @@ -737,8 +705,9 @@ GdbServer::HandledPacketType GdbServer::handleXfer(const fextl::string &packet) } if (object == "features") { - if (annex == "target.xml") + if (annex == "target.xml") { return {encode(buildTargetXML()), HandledPacketType::TYPE_ACK}; + } return {"E00", HandledPacketType::TYPE_ACK}; } @@ -750,7 +719,7 @@ GdbServer::HandledPacketType GdbServer::handleXfer(const fextl::string &packet) ThreadString.clear(); fextl::ostringstream ss; ss << "\n"; - for (auto &Thread : *Threads) { + for (auto& Thread : *Threads) { // Thread id is in hex without 0x prefix const auto ThreadName = getThreadName(Thread->ThreadManager.GetTID()); ss << "ThreadManager.GetTID() << "\""; @@ -791,13 +760,12 @@ GdbServer::HandledPacketType GdbServer::handleXfer(const fextl::string &packet) if (Is64BitMode()) { data.resize(auxv_size); memcpy(data.data(), reinterpret_cast(auxv_ptr), data.size()); - } - else { + } else { // We need to transcode from 32-bit auxv_t to 64-bit data.resize(auxv_size / sizeof(Elf32_auxv_t) * sizeof(Elf64_auxv_t)); size_t NumAuxv = auxv_size / sizeof(Elf32_auxv_t); for (size_t i = 0; i < NumAuxv; ++i) { - Elf32_auxv_t *auxv = reinterpret_cast(auxv_ptr + i * sizeof(Elf32_auxv_t)); + Elf32_auxv_t* auxv = reinterpret_cast(auxv_ptr + i * sizeof(Elf32_auxv_t)); Elf64_auxv_t tmp; tmp.a_type = auxv->a_type; tmp.a_un.a_val = auxv->a_un.a_val; @@ -820,13 +788,14 @@ static size_t CheckMemMapping(uint64_t Address, size_t Size) { fextl::string Line; while (std::getline(MapsStream, Line)) { - if (MapsStream.eof()) break; + if (MapsStream.eof()) { + break; + } uint64_t Begin, End; - char r,w,x,p; + char r, w, x, p; if (sscanf(Line.c_str(), "%lx-%lx %c%c%c%c", &Begin, &End, &r, &w, &x, &p) == 6) { - if (Begin <= Address && - End > Address) { - ssize_t Overrun{}; + if (Begin <= Address && End > Address) { + ssize_t Overrun {}; if (AddressEnd > End) { Overrun = AddressEnd - End; } @@ -845,7 +814,7 @@ GdbServer::HandledPacketType GdbServer::handleProgramOffsets() { return {std::move(str), HandledPacketType::TYPE_ACK}; } -GdbServer::HandledPacketType GdbServer::handleMemory(const fextl::string &packet) { +GdbServer::HandledPacketType GdbServer::handleMemory(const fextl::string& packet) { bool write; size_t addr; size_t length; @@ -858,7 +827,7 @@ GdbServer::HandledPacketType GdbServer::handleMemory(const fextl::string &packet ss >> std::hex >> length; if (write) { - ss.get(); // discard colon + ss.get(); // discard colon data = hexstring(ss, -1); // grab data until end of file. } @@ -885,16 +854,19 @@ GdbServer::HandledPacketType GdbServer::handleMemory(const fextl::string &packet } } -GdbServer::HandledPacketType GdbServer::handleQuery(const fextl::string &packet) { - const auto match = [&](const char *str) -> bool { return packet.rfind(str, 0) == 0; }; - const auto MatchStr = [](const fextl::string &Str, const char *str) -> bool { return Str.rfind(str, 0) == 0; }; +GdbServer::HandledPacketType GdbServer::handleQuery(const fextl::string& packet) { + const auto match = [&](const char* str) -> bool { + return packet.rfind(str, 0) == 0; + }; + const auto MatchStr = [](const fextl::string& Str, const char* str) -> bool { + return Str.rfind(str, 0) == 0; + }; - const auto split = [](const fextl::string &Str, char deliminator) -> fextl::vector { + const auto split = [](const fextl::string& Str, char deliminator) -> fextl::vector { fextl::vector Elements; fextl::istringstream Input(Str); - for (fextl::string line; - std::getline(Input, line); - Elements.emplace_back(line)); + for (fextl::string line; std::getline(Input, line); Elements.emplace_back(line)) + ; return Elements; }; @@ -911,7 +883,7 @@ GdbServer::HandledPacketType GdbServer::handleQuery(const fextl::string &packet) // For feature documentation // https://sourceware.org/gdb/current/onlinedocs/gdb/General-Query-Packets.html#qSupported - fextl::string SupportedFeatures{}; + fextl::string SupportedFeatures {}; // Required features SupportedFeatures += "PacketSize=32768;"; @@ -946,7 +918,7 @@ GdbServer::HandledPacketType GdbServer::handleQuery(const fextl::string &packet) // TODO: If we want to support conditional breakpoints then we need to support single stepping. // SupportedFeatures += "ConditionalBreakpoints+;"; - for (auto &Feature : Features) { + for (auto& Feature : Features) { if (MatchStr(Feature, "swbreak+")) { SupportedFeatures += "swbreak+;"; } @@ -1012,7 +984,7 @@ GdbServer::HandledPacketType GdbServer::handleQuery(const fextl::string &packet) // Returns the current Thread ID auto Threads = SyscallHandler->TM.GetThreads(); fextl::ostringstream ss; - ss << "m" << std::hex << Threads->at(0)->ThreadManager.TID; + ss << "m" << std::hex << Threads->at(0)->ThreadManager.TID; return {ss.str(), HandledPacketType::TYPE_ACK}; } if (match("QStartNoAckMode")) { @@ -1029,8 +1001,7 @@ GdbServer::HandledPacketType GdbServer::handleQuery(const fextl::string &packet) if (Symbol_Val.empty() && Symbol_name.empty()) { return {"OK", HandledPacketType::TYPE_ACK}; - } - else { + } else { return {"", HandledPacketType::TYPE_UNKNOWN}; } } @@ -1045,7 +1016,7 @@ GdbServer::HandledPacketType GdbServer::handleQuery(const fextl::string &packet) ss.get(); // discard colon // We now have a semi-colon deliminated list of signals to pass to the guest process - for (fextl::string tmp; std::getline(ss, tmp, ';'); ) { + for (fextl::string tmp; std::getline(ss, tmp, ';');) { uint32_t Signal = std::stoi(tmp.c_str(), nullptr, 16); if (Signal < FEX::HLE::SignalDelegator::MAX_SIGNALS) { PassSignals[Signal] = true; @@ -1068,7 +1039,7 @@ GdbServer::HandledPacketType GdbServer::handleQuery(const fextl::string &packet) // os_build:362e332e332d3036303330332d67656e65726963; // os_kernel:2332303233303531373133333620534d5020505245454d50545f44594e414d494320576564204d61792031372031333a34353a3139205554432032303233; // hostname:7279616e682d545235303030; - fextl::string HostFeatures{}; + fextl::string HostFeatures {}; // 64-bit always returned for the host environment. // qProcessInfo will return i386 or not. @@ -1078,11 +1049,11 @@ GdbServer::HandledPacketType GdbServer::handleQuery(const fextl::string &packet) // Always little-endian. HostFeatures += "endian:little;"; - struct utsname buf{}; + struct utsname buf {}; if (uname(&buf) != -1) { - uint32_t Major{}; - uint32_t Minor{}; - uint32_t Patch{}; + uint32_t Major {}; + uint32_t Minor {}; + uint32_t Patch {}; // Parse kernel version in the form of `..[Optional Data]` const auto End = buf.release + sizeof(buf.release); @@ -1115,30 +1086,29 @@ GdbServer::HandledPacketType GdbServer::handleQuery(const fextl::string &packet) GdbServer::HandledPacketType GdbServer::ThreadAction(char action, uint32_t tid) { switch (action) { - case 'c': { - SyscallHandler->TM.Run(); - ThreadBreakEvent.NotifyAll(); - SyscallHandler->TM.WaitForThreadsToRun(); - return {"", HandledPacketType::TYPE_ONLYACK}; + case 'c': { + SyscallHandler->TM.Run(); + ThreadBreakEvent.NotifyAll(); + SyscallHandler->TM.WaitForThreadsToRun(); + return {"", HandledPacketType::TYPE_ONLYACK}; + } + case 's': { + SyscallHandler->TM.Step(); + SendPacketPair({"OK", HandledPacketType::TYPE_ACK}); + fextl::string str = fextl::fmt::format("T05thread:{:02x};", getpid()); + if (LibraryMapChanged) { + // If libraries have changed then let gdb know + str += "library:1;"; } - case 's': { - SyscallHandler->TM.Step(); - SendPacketPair({"OK", HandledPacketType::TYPE_ACK}); - fextl::string str = fextl::fmt::format("T05thread:{:02x};", getpid()); - if (LibraryMapChanged) { - // If libraries have changed then let gdb know - str += "library:1;"; - } - SendPacketPair({std::move(str), HandledPacketType::TYPE_ACK}); - return {"OK", HandledPacketType::TYPE_ACK}; - } - case 't': - // This thread isn't part of the thread pool - SyscallHandler->TM.Stop(); - return {"OK", HandledPacketType::TYPE_ACK}; - default: - return {"E00", HandledPacketType::TYPE_ACK}; + SendPacketPair({std::move(str), HandledPacketType::TYPE_ACK}); + return {"OK", HandledPacketType::TYPE_ACK}; + } + case 't': + // This thread isn't part of the thread pool + SyscallHandler->TM.Stop(); + return {"OK", HandledPacketType::TYPE_ACK}; + default: return {"E00", HandledPacketType::TYPE_ACK}; } } @@ -1152,15 +1122,19 @@ GdbServer::HandledPacketType GdbServer::handleV(const fextl::string& packet) { return std::nullopt; }; - const auto F = [](int result) -> fextl::string { return fextl::fmt::format("F{:x}", result); }; - const auto F_error = []() -> fextl::string { return fextl::fmt::format("F-1,{:x}", errno); }; - const auto F_data = [](int result, const fextl::string& data) -> fextl::string { + const auto F = [](int result) -> fextl::string { + return fextl::fmt::format("F{:x}", result); + }; + const auto F_error = []() -> fextl::string { + return fextl::fmt::format("F-1,{:x}", errno); + }; + const auto F_data = [](int result, const fextl::string& data) -> fextl::string { // Binary encoded data is raw appended to the end return fextl::fmt::format("F{:#x};", result) + data; }; std::optional ss; - if((ss = match("vFile:open:"))) { + if ((ss = match("vFile:open:"))) { fextl::string filename; int flags; int mode; @@ -1172,19 +1146,19 @@ GdbServer::HandledPacketType GdbServer::handleV(const fextl::string& packet) { return {F(open(filename.c_str(), flags, mode)), HandledPacketType::TYPE_ACK}; } - if((ss = match("vFile:setfs:"))) { + if ((ss = match("vFile:setfs:"))) { int pid; *ss >> pid; return {F(pid == 0 ? 0 : -1), HandledPacketType::TYPE_ACK}; // Only support the common filesystem } - if((ss = match("vFile:close:"))) { + if ((ss = match("vFile:close:"))) { int fd; *ss >> std::hex >> fd; close(fd); return {F(0), HandledPacketType::TYPE_ACK}; } - if((ss = match("vFile:pread:"))) { + if ((ss = match("vFile:pread:"))) { int fd, count, offset; *ss >> std::hex >> fd; @@ -1214,8 +1188,8 @@ GdbServer::HandledPacketType GdbServer::handleV(const fextl::string& packet) { // FIXME: We also claim to support continue with signal... because it's compulsory } if ((ss = match("vCont;"))) { - char action{}; - int thread{}; + char action {}; + int thread {}; action = ss->get(); @@ -1233,8 +1207,10 @@ GdbServer::HandledPacketType GdbServer::handleV(const fextl::string& packet) { return {"", HandledPacketType::TYPE_ACK}; } -GdbServer::HandledPacketType GdbServer::handleThreadOp(const fextl::string &packet) { - const auto match = [&](const char *str) -> bool { return packet.rfind(str, 0) == 0; }; +GdbServer::HandledPacketType GdbServer::handleThreadOp(const fextl::string& packet) { + const auto match = [&](const char* str) -> bool { + return packet.rfind(str, 0) == 0; + }; if (match("Hc")) { // Sets thread to this ID for stepping @@ -1261,11 +1237,11 @@ GdbServer::HandledPacketType GdbServer::handleThreadOp(const fextl::string &pack return {"", HandledPacketType::TYPE_UNKNOWN}; } -GdbServer::HandledPacketType GdbServer::handleBreakpoint(const fextl::string &packet) { +GdbServer::HandledPacketType GdbServer::handleBreakpoint(const fextl::string& packet) { auto ss = fextl::istringstream(packet); // Don't do anything with set breakpoints yet - [[maybe_unused]] bool Set{}; + [[maybe_unused]] bool Set {}; uint64_t Addr; uint64_t Type; Set = ss.get() == 'Z'; @@ -1278,75 +1254,66 @@ GdbServer::HandledPacketType GdbServer::handleBreakpoint(const fextl::string &pa return {"OK", HandledPacketType::TYPE_ACK}; } -GdbServer::HandledPacketType GdbServer::ProcessPacket(const fextl::string &packet) { +GdbServer::HandledPacketType GdbServer::ProcessPacket(const fextl::string& packet) { switch (packet[0]) { - case '?': { - // Indicates the reason that the thread has stopped - // Behaviour changes if the target is in non-stop mode - // Binja doesn't support S response here - fextl::string str = fextl::fmt::format("T00thread:{:x};", getpid()); - return {std::move(str), HandledPacketType::TYPE_ACK}; - } - case 'c': - // Continue - return ThreadAction('c', 0); - case 'D': - // Detach - // Ensure the threads are back in running state on detach - SyscallHandler->TM.Run(); - SyscallHandler->TM.WaitForThreadsToRun(); - return {"OK", HandledPacketType::TYPE_ACK}; - case 'g': - // We might be running while we try reading - // Pause up front - SyscallHandler->TM.Pause(); - return {readRegs(), HandledPacketType::TYPE_ACK}; - case 'p': - return readReg(packet); - case 'q': - case 'Q': - return handleQuery(packet); - case 'v': - return handleV(packet); - case 'm': // Memory read - case 'M': // Memory write - return handleMemory(packet); - case 'H': // Sets thread for subsequent operations - return handleThreadOp(packet); - case '!': // Enable extended mode - case 'T': // Is a thread alive? - return {"OK", HandledPacketType::TYPE_ACK}; - case 's': // Step - return ThreadAction('s', 0); - case 'z': // Remove breakpoint or watchpoint - case 'Z': // Inserts breakpoint or watchpoint - return handleBreakpoint(packet); - case 'k': // Kill the process - SyscallHandler->TM.Stop(); - SyscallHandler->TM.WaitForIdle(); // Block until exit - return {"", HandledPacketType::TYPE_NONE}; - default: - return {"", HandledPacketType::TYPE_UNKNOWN}; + case '?': { + // Indicates the reason that the thread has stopped + // Behaviour changes if the target is in non-stop mode + // Binja doesn't support S response here + fextl::string str = fextl::fmt::format("T00thread:{:x};", getpid()); + return {std::move(str), HandledPacketType::TYPE_ACK}; + } + case 'c': + // Continue + return ThreadAction('c', 0); + case 'D': + // Detach + // Ensure the threads are back in running state on detach + SyscallHandler->TM.Run(); + SyscallHandler->TM.WaitForThreadsToRun(); + return {"OK", HandledPacketType::TYPE_ACK}; + case 'g': + // We might be running while we try reading + // Pause up front + SyscallHandler->TM.Pause(); + return {readRegs(), HandledPacketType::TYPE_ACK}; + case 'p': return readReg(packet); + case 'q': + case 'Q': return handleQuery(packet); + case 'v': return handleV(packet); + case 'm': // Memory read + case 'M': // Memory write + return handleMemory(packet); + case 'H': // Sets thread for subsequent operations + return handleThreadOp(packet); + case '!': // Enable extended mode + case 'T': // Is a thread alive? + return {"OK", HandledPacketType::TYPE_ACK}; + case 's': // Step + return ThreadAction('s', 0); + case 'z': // Remove breakpoint or watchpoint + case 'Z': // Inserts breakpoint or watchpoint + return handleBreakpoint(packet); + case 'k': // Kill the process + SyscallHandler->TM.Stop(); + SyscallHandler->TM.WaitForIdle(); // Block until exit + return {"", HandledPacketType::TYPE_NONE}; + default: return {"", HandledPacketType::TYPE_UNKNOWN}; } } void GdbServer::SendPacketPair(const HandledPacketType& response) { std::lock_guard lk(sendMutex); - if (response.TypeResponse == HandledPacketType::TYPE_ACK || - response.TypeResponse == HandledPacketType::TYPE_ONLYACK) { + if (response.TypeResponse == HandledPacketType::TYPE_ACK || response.TypeResponse == HandledPacketType::TYPE_ONLYACK) { SendACK(*CommsStream, false); - } - else if (response.TypeResponse == HandledPacketType::TYPE_NACK || - response.TypeResponse == HandledPacketType::TYPE_ONLYNACK) { + } else if (response.TypeResponse == HandledPacketType::TYPE_NACK || response.TypeResponse == HandledPacketType::TYPE_ONLYNACK) { SendACK(*CommsStream, true); } if (response.TypeResponse == HandledPacketType::TYPE_UNKNOWN) { SendPacket(*CommsStream, ""); - } - else if (response.TypeResponse != HandledPacketType::TYPE_ONLYNACK && - response.TypeResponse != HandledPacketType::TYPE_ONLYACK && - response.TypeResponse != HandledPacketType::TYPE_NONE) { + } else if (response.TypeResponse != HandledPacketType::TYPE_ONLYNACK && response.TypeResponse != HandledPacketType::TYPE_ONLYACK && + response.TypeResponse != HandledPacketType::TYPE_NONE) { SendPacket(*CommsStream, response.Response); } } @@ -1354,23 +1321,19 @@ void GdbServer::SendPacketPair(const HandledPacketType& response) { GdbServer::WaitForConnectionResult GdbServer::WaitForConnection() { while (!CoreShuttingDown.load()) { struct pollfd PollFD { - .fd = ListenSocket, - .events = POLLIN | POLLPRI | POLLRDHUP, - .revents = 0, + .fd = ListenSocket, .events = POLLIN | POLLPRI | POLLRDHUP, .revents = 0, }; int Result = ppoll(&PollFD, 1, nullptr, nullptr); if (Result > 0) { if (PollFD.revents & POLLIN) { CommsStream = OpenSocket(); return WaitForConnectionResult::CONNECTION; - } - else if (PollFD.revents & (POLLHUP | POLLERR | POLLNVAL)) { + } else if (PollFD.revents & (POLLHUP | POLLERR | POLLNVAL)) { // Listen socket error or shutting down LogMan::Msg::EFmt("[GdbServer] gdbserver shutting down: {}"); return WaitForConnectionResult::ERROR; } - } - else if (Result == -1) { + } else if (Result == -1) { LogMan::Msg::EFmt("[GdbServer] poll failure: {}", errno); } } @@ -1391,56 +1354,55 @@ void GdbServer::GdbServerLoop() { break; } - HandledPacketType response{}; + HandledPacketType response {}; // Outer server loop. Handles packet start, ACK/NAK and break int c; - while ((c = CommsStream->get()) >= 0 ) { - switch (c) { - case '$': { - auto packet = ReadPacket(*CommsStream); - response = ProcessPacket(packet); - SendPacketPair(response); - if (response.TypeResponse == HandledPacketType::TYPE_UNKNOWN) { - LogMan::Msg::DFmt("Unknown packet {}", packet); - } - break; + while ((c = CommsStream->get()) >= 0) { + switch (c) { + case '$': { + auto packet = ReadPacket(*CommsStream); + response = ProcessPacket(packet); + SendPacketPair(response); + if (response.TypeResponse == HandledPacketType::TYPE_UNKNOWN) { + LogMan::Msg::DFmt("Unknown packet {}", packet); } - case '+': - // ACK, do nothing. - break; - case '-': - // NAK, Resend requested - { - std::lock_guard lk(sendMutex); - SendPacket(*CommsStream, response.Response); - } - break; - case '\x03': { // ASCII EOT - SyscallHandler->TM.Pause(); - fextl::string str = fextl::fmt::format("T02thread:{:02x};", getpid()); - if (LibraryMapChanged) { - // If libraries have changed then let gdb know - str += "library:1;"; - } - SendPacketPair({std::move(str), HandledPacketType::TYPE_ACK}); - break; - } - default: - LogMan::Msg::DFmt("GdbServer: Unexpected byte {} ({:02x})", static_cast(c), c); + break; + } + case '+': + // ACK, do nothing. + break; + case '-': + // NAK, Resend requested + { + std::lock_guard lk(sendMutex); + SendPacket(*CommsStream, response.Response); } + break; + case '\x03': { // ASCII EOT + SyscallHandler->TM.Pause(); + fextl::string str = fextl::fmt::format("T02thread:{:02x};", getpid()); + if (LibraryMapChanged) { + // If libraries have changed then let gdb know + str += "library:1;"; + } + SendPacketPair({std::move(str), HandledPacketType::TYPE_ACK}); + break; + } + default: LogMan::Msg::DFmt("GdbServer: Unexpected byte {} ({:02x})", static_cast(c), c); + } } { - std::lock_guard lk(sendMutex); - CommsStream.reset(); + std::lock_guard lk(sendMutex); + CommsStream.reset(); } } CloseListenSocket(); } -static void* ThreadHandler(void *Arg) { +static void* ThreadHandler(void* Arg) { FEXCore::Threads::SetThreadName("FEX:gdbserver"); auto This = reinterpret_cast(Arg); This->GdbServerLoop(); @@ -1468,13 +1430,13 @@ void GdbServer::OpenListenSocket() { return; } - struct sockaddr_un addr{}; + struct sockaddr_un addr {}; addr.sun_family = AF_UNIX; strncpy(addr.sun_path, GdbUnixSocketPath.data(), sizeof(addr.sun_path)); size_t SizeOfAddr = offsetof(sockaddr_un, sun_path) + GdbUnixSocketPath.size(); // Bind the socket to the path - int Result{}; + int Result {}; for (int attempt = 0; attempt < 2; ++attempt) { Result = bind(ListenSocket, reinterpret_cast(&addr), SizeOfAddr); if (Result == 0) { @@ -1508,13 +1470,13 @@ void GdbServer::CloseListenSocket() { fextl::unique_ptr GdbServer::OpenSocket() { // Block until a connection arrives - struct sockaddr_storage their_addr{}; - socklen_t addr_size{}; + struct sockaddr_storage their_addr {}; + socklen_t addr_size {}; - int new_fd = accept(ListenSocket, (struct sockaddr *)&their_addr, &addr_size); + int new_fd = accept(ListenSocket, (struct sockaddr*)&their_addr, &addr_size); return fextl::make_unique(new_fd); } #endif -} // namespace FEXCore +} // namespace FEX diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/LinuxAllocator.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/LinuxAllocator.cpp index d78211dd3a..4c89ff2975 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/LinuxAllocator.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/LinuxAllocator.cpp @@ -39,18 +39,17 @@ class MemAllocator32Bit final : public FEX::HLE::MemAllocator { LastKeyLocation = TOP_KEY; LastKeyLocation32Bit = TOP_KEY32BIT; FindPageRangePtr = &MemAllocator32Bit::FindPageRange_TopDown; - } - else { + } else { LastScanLocation = BASE_KEY; LastKeyLocation = BASE_KEY; FindPageRangePtr = &MemAllocator32Bit::FindPageRange; } } - void *Mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) override; - int Munmap(void *addr, size_t length) override; - void *Mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address) override; - uint64_t Shmat(int shmid, const void* shmaddr, int shmflg, uint32_t *ResultAddress) override; + void* Mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) override; + int Munmap(void* addr, size_t length) override; + void* Mremap(void* old_address, size_t old_size, size_t new_size, int flags, void* new_address) override; + uint64_t Shmat(int shmid, const void* shmaddr, int shmflg, uint32_t* ResultAddress) override; uint64_t Shmdt(const void* shmaddr) override; static constexpr bool SearchDown = true; @@ -76,15 +75,15 @@ class MemAllocator32Bit final : public FEX::HLE::MemAllocator { // Set that contains 4k mapped pages // This is the full 32bit memory range std::bitset<0x10'0000> MappedPages; - fextl::map PageToShm{}; - uint64_t LastScanLocation{}; - uint64_t LastKeyLocation{}; - uint64_t LastKeyLocation32Bit{}; - std::mutex AllocMutex{}; + fextl::map PageToShm {}; + uint64_t LastScanLocation {}; + uint64_t LastKeyLocation {}; + uint64_t LastKeyLocation32Bit {}; + std::mutex AllocMutex {}; uint64_t FindPageRange(uint64_t Start, size_t Pages) const; uint64_t FindPageRange_TopDown(uint64_t Start, size_t Pages) const; - using FindHandler = uint64_t(MemAllocator32Bit::*)(uint64_t Start, size_t Pages) const; - FindHandler FindPageRangePtr{}; + using FindHandler = uint64_t (MemAllocator32Bit::*)(uint64_t Start, size_t Pages) const; + FindHandler FindPageRangePtr {}; }; uint64_t MemAllocator32Bit::FindPageRange(uint64_t Start, size_t Pages) const { @@ -113,8 +112,7 @@ uint64_t MemAllocator32Bit::FindPageRange(uint64_t Start, size_t Pages) const { uint64_t MemAllocator32Bit::FindPageRange_TopDown(uint64_t Start, size_t Pages) const { // Linear range scan - while (Start >= BASE_KEY && - Start <= TOP_KEY) { + while (Start >= BASE_KEY && Start <= TOP_KEY) { bool Free = true; uint64_t Offset = 0; @@ -134,8 +132,8 @@ uint64_t MemAllocator32Bit::FindPageRange_TopDown(uint64_t Start, size_t Pages) return 0; } -void *MemAllocator32Bit::Mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) { - std::scoped_lock lk{AllocMutex}; +void* MemAllocator32Bit::Mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) { + std::scoped_lock lk {AllocMutex}; size_t PagesLength = FEXCore::AlignUp(length, FHU::FEX_PAGE_SIZE) >> FHU::FEX_PAGE_SHIFT; uintptr_t Addr = reinterpret_cast(addr); @@ -143,8 +141,7 @@ void *MemAllocator32Bit::Mmap(void *addr, size_t length, int prot, int flags, in // Define MAP_FIXED_NOREPLACE ourselves to ensure we always parse this flag constexpr int FEX_MAP_FIXED_NOREPLACE = 0x100000; - bool Fixed = ((flags & MAP_FIXED) || - (flags & FEX_MAP_FIXED_NOREPLACE)); + bool Fixed = ((flags & MAP_FIXED) || (flags & FEX_MAP_FIXED_NOREPLACE)); // Both Addr and length must be page aligned if (Addr & ~FHU::FEX_PAGE_MASK) { @@ -152,8 +149,7 @@ void *MemAllocator32Bit::Mmap(void *addr, size_t length, int prot, int flags, in } // If we do have an fd then offset must be page aligned - if (fd != -1 && - offset & ~FHU::FEX_PAGE_MASK) { + if (fd != -1 && offset & ~FHU::FEX_PAGE_MASK) { return reinterpret_cast(-EINVAL); } @@ -177,116 +173,90 @@ void *MemAllocator32Bit::Mmap(void *addr, size_t length, int prot, int flags, in // Remove the MAP_32BIT flag if it exists now flags &= ~FEX::HLE::X86_64_MAP_32BIT; - auto AllocateNoHint = [&]() -> void*{ + auto AllocateNoHint = [&]() -> void* { bool Wrapped = false; uint64_t BottomPage = Map32Bit && (LastScanLocation >= LastKeyLocation32Bit) ? LastKeyLocation32Bit : LastScanLocation; -restart: - { - // Linear range scan - uint64_t LowerPage = (this->*FindPageRangePtr)(BottomPage, PagesLength); - if (LowerPage == 0) { - // Try again but this time from the start - BottomPage = Map32Bit ? LastKeyLocation32Bit : LastKeyLocation; - LowerPage = (this->*FindPageRangePtr)(BottomPage, PagesLength); - } +restart: { + // Linear range scan + uint64_t LowerPage = (this->*FindPageRangePtr)(BottomPage, PagesLength); + if (LowerPage == 0) { + // Try again but this time from the start + BottomPage = Map32Bit ? LastKeyLocation32Bit : LastKeyLocation; + LowerPage = (this->*FindPageRangePtr)(BottomPage, PagesLength); + } + + uint64_t UpperPage = LowerPage + PagesLength; + if (LowerPage == 0) { + return reinterpret_cast(-ENOMEM); + } + { + // Try and map the range + void* MappedPtr = ::mmap(reinterpret_cast(LowerPage << FHU::FEX_PAGE_SHIFT), length, prot, flags | FEX_MAP_FIXED_NOREPLACE, fd, offset); - uint64_t UpperPage = LowerPage + PagesLength; - if (LowerPage == 0) { - return reinterpret_cast(-ENOMEM); + if (MappedPtr == MAP_FAILED && errno != EEXIST) { + return reinterpret_cast(-errno); + } else if (MappedPtr == MAP_FAILED || MappedPtr >= reinterpret_cast(TOP_KEY << FHU::FEX_PAGE_SHIFT)) { + // Handles the case where MAP_FIXED_NOREPLACE failed with MAP_FAILED + // or if the host system's kernel isn't new enough then it returns the wrong pointer + if (MappedPtr != MAP_FAILED && MappedPtr >= reinterpret_cast(TOP_KEY << FHU::FEX_PAGE_SHIFT)) { + // Make sure to munmap this so we don't leak memory + ::munmap(MappedPtr, length); } - { - // Try and map the range - void *MappedPtr = ::mmap( - reinterpret_cast(LowerPage << FHU::FEX_PAGE_SHIFT), - length, - prot, - flags | FEX_MAP_FIXED_NOREPLACE, - fd, - offset); - - if (MappedPtr == MAP_FAILED && - errno != EEXIST) { - return reinterpret_cast(-errno); - } - else if (MappedPtr == MAP_FAILED || - MappedPtr >= reinterpret_cast(TOP_KEY << FHU::FEX_PAGE_SHIFT)) { - // Handles the case where MAP_FIXED_NOREPLACE failed with MAP_FAILED - // or if the host system's kernel isn't new enough then it returns the wrong pointer - if (MappedPtr != MAP_FAILED && - MappedPtr >= reinterpret_cast(TOP_KEY << FHU::FEX_PAGE_SHIFT)) { - // Make sure to munmap this so we don't leak memory - ::munmap(MappedPtr, length); - } - if (UpperPage == TOP_KEY) { - BottomPage = BASE_KEY; - Wrapped = true; - goto restart; - } - else if (Wrapped && - LowerPage >= LastScanLocation) { - // We linear scanned the entire memory range. Give up - return (void*)(uintptr_t)-errno; - } - else { - // Try again - if (SearchDown) { - --BottomPage; - } - else { - ++BottomPage; - } - goto restart; - } - } - else { - if (SearchDown) { - LastScanLocation = LowerPage; - } - else { - LastScanLocation = UpperPage; - } - SetUsedPages(LowerPage, PagesLength); - return MappedPtr; + if (UpperPage == TOP_KEY) { + BottomPage = BASE_KEY; + Wrapped = true; + goto restart; + } else if (Wrapped && LowerPage >= LastScanLocation) { + // We linear scanned the entire memory range. Give up + return (void*)(uintptr_t)-errno; + } else { + // Try again + if (SearchDown) { + --BottomPage; + } else { + ++BottomPage; } + goto restart; } + } else { + if (SearchDown) { + LastScanLocation = LowerPage; + } else { + LastScanLocation = UpperPage; + } + SetUsedPages(LowerPage, PagesLength); + return MappedPtr; } + } +} }; // Find a region that fits our address if (Addr == 0) { return AllocateNoHint(); - } - else { - void *MappedPtr = ::mmap( - reinterpret_cast(PageAddr << FHU::FEX_PAGE_SHIFT), - PagesLength << FHU::FEX_PAGE_SHIFT, - prot, - flags, - fd, - offset); - - if (MappedPtr >= reinterpret_cast(TOP_KEY << FHU::FEX_PAGE_SHIFT) && - (flags & FEX_MAP_FIXED_NOREPLACE)) { + } else { + void* MappedPtr = + ::mmap(reinterpret_cast(PageAddr << FHU::FEX_PAGE_SHIFT), PagesLength << FHU::FEX_PAGE_SHIFT, prot, flags, fd, offset); + + if (MappedPtr >= reinterpret_cast(TOP_KEY << FHU::FEX_PAGE_SHIFT) && (flags & FEX_MAP_FIXED_NOREPLACE)) { // Handles the case where MAP_FIXED_NOREPLACE isn't handled by the host system's // kernel and returns the wrong pointer // Make sure to munmap this so we don't leak memory ::munmap(MappedPtr, length); return reinterpret_cast(-EEXIST); - } - else if (MappedPtr != MAP_FAILED) { + } else if (MappedPtr != MAP_FAILED) { SetUsedPages(PageAddr, PagesLength); return MappedPtr; - } - else { + } else { return reinterpret_cast(-errno); } } return 0; } -int MemAllocator32Bit::Munmap(void *addr, size_t length) { - std::scoped_lock lk{AllocMutex}; +int MemAllocator32Bit::Munmap(void* addr, size_t length) { + std::scoped_lock lk {AllocMutex}; size_t PagesLength = FEXCore::AlignUp(length, FHU::FEX_PAGE_SIZE) >> FHU::FEX_PAGE_SHIFT; uintptr_t Addr = reinterpret_cast(addr); @@ -330,14 +300,14 @@ int MemAllocator32Bit::Munmap(void *addr, size_t length) { return 0; } -void *MemAllocator32Bit::Mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address) { +void* MemAllocator32Bit::Mremap(void* old_address, size_t old_size, size_t new_size, int flags, void* new_address) { size_t OldPagesLength = FEXCore::AlignUp(old_size, FHU::FEX_PAGE_SIZE) >> FHU::FEX_PAGE_SHIFT; size_t NewPagesLength = FEXCore::AlignUp(new_size, FHU::FEX_PAGE_SIZE) >> FHU::FEX_PAGE_SHIFT; { - std::scoped_lock lk{AllocMutex}; + std::scoped_lock lk {AllocMutex}; if (flags & MREMAP_FIXED) { - void *MappedPtr = ::mremap(old_address, old_size, new_size, flags, new_address); + void* MappedPtr = ::mremap(old_address, old_size, new_size, flags, new_address); if (MappedPtr != MAP_FAILED) { if (!(flags & MREMAP_DONTUNMAP)) { @@ -349,17 +319,15 @@ void *MemAllocator32Bit::Mremap(void *old_address, size_t old_size, size_t new_s // Map the new pages uintptr_t NewAddr = reinterpret_cast(MappedPtr); SetUsedPages(NewAddr >> FHU::FEX_PAGE_SHIFT, NewPagesLength); - } - else { + } else { return reinterpret_cast(-errno); } - } - else { + } else { uintptr_t OldAddr = reinterpret_cast(old_address); uintptr_t OldPageAddr = OldAddr >> FHU::FEX_PAGE_SHIFT; if (NewPagesLength < OldPagesLength) { - void *MappedPtr = ::mremap(old_address, old_size, new_size, flags & ~MREMAP_MAYMOVE); + void* MappedPtr = ::mremap(old_address, old_size, new_size, flags & ~MREMAP_MAYMOVE); if (MappedPtr != MAP_FAILED) { // Clear the pages that we just shrunk @@ -367,14 +335,12 @@ void *MemAllocator32Bit::Mremap(void *old_address, size_t old_size, size_t new_s uintptr_t NewPageAddr = reinterpret_cast(MappedPtr) >> FHU::FEX_PAGE_SHIFT; SetFreePages(NewPageAddr + NewPagesLength, OldPagesLength - NewPagesLength); return MappedPtr; - } - else { + } else { return reinterpret_cast(-errno); } - } - else { + } else { // Scan the region forward from our first region's endd to see if it can be extended - bool CanExtend{true}; + bool CanExtend {true}; for (size_t i = OldPagesLength; i < NewPagesLength; ++i) { if (MappedPages[OldPageAddr + i]) { @@ -384,7 +350,7 @@ void *MemAllocator32Bit::Mremap(void *old_address, size_t old_size, size_t new_s } if (CanExtend) { - void *MappedPtr = ::mremap(old_address, old_size, new_size, flags & ~MREMAP_MAYMOVE); + void* MappedPtr = ::mremap(old_address, old_size, new_size, flags & ~MREMAP_MAYMOVE); if (MappedPtr != MAP_FAILED) { // Map the new pages @@ -392,8 +358,7 @@ void *MemAllocator32Bit::Mremap(void *old_address, size_t old_size, size_t new_s uintptr_t NewAddr = reinterpret_cast(MappedPtr); SetUsedPages(NewAddr >> FHU::FEX_PAGE_SHIFT, NewPagesLength); return MappedPtr; - } - else if (!(flags & MREMAP_MAYMOVE)) { + } else if (!(flags & MREMAP_MAYMOVE)) { // We have one more chance if MAYMOVE is specified return reinterpret_cast(-errno); } @@ -407,8 +372,8 @@ void *MemAllocator32Bit::Mremap(void *old_address, size_t old_size, size_t new_s // New Size is >= old size // First, try and allocate a region the size of the new size - void *MappedPtr = this->Mmap(nullptr, new_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - std::scoped_lock lk{AllocMutex}; + void* MappedPtr = this->Mmap(nullptr, new_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + std::scoped_lock lk {AllocMutex}; if (FEX::HLE::HasSyscallError(MappedPtr)) { // Couldn't find a region that fit our space return MappedPtr; @@ -419,12 +384,11 @@ void *MemAllocator32Bit::Mremap(void *old_address, size_t old_size, size_t new_s MappedPtr = ::mremap(old_address, old_size, new_size, flags | MREMAP_FIXED | MREMAP_MAYMOVE, MappedPtr); if (MappedPtr != MAP_FAILED) { - if (!(flags & MREMAP_DONTUNMAP) && - MappedPtr != old_address) { + if (!(flags & MREMAP_DONTUNMAP) && MappedPtr != old_address) { // If we have both MREMAP_DONTUNMAP not set and the new pointer is at a new location // Make sure to clear the old mapping uintptr_t OldAddr = reinterpret_cast(old_address); - SetFreePages(OldAddr >> FHU::FEX_PAGE_SHIFT , OldPagesLength); + SetFreePages(OldAddr >> FHU::FEX_PAGE_SHIFT, OldPagesLength); } // Map the new pages @@ -438,16 +402,15 @@ void *MemAllocator32Bit::Mremap(void *old_address, size_t old_size, size_t new_s return reinterpret_cast(-errno); } -uint64_t MemAllocator32Bit::Shmat(int shmid, const void* shmaddr, int shmflg, uint32_t *ResultAddress) { - std::scoped_lock lk{AllocMutex}; +uint64_t MemAllocator32Bit::Shmat(int shmid, const void* shmaddr, int shmflg, uint32_t* ResultAddress) { + std::scoped_lock lk {AllocMutex}; if (shmaddr != nullptr) { // shmaddr must be valid uint64_t Result = reinterpret_cast(::shmat(shmid, shmaddr, shmflg)); if (Result != -1) { uint32_t SmallRet = Result >> 32; - if (!(SmallRet == 0 || - SmallRet == ~0U)) { + if (!(SmallRet == 0 || SmallRet == ~0U)) { LOGMAN_MSG_A_FMT("Syscall returning something with data in the upper 32bits! BUG!"); return -ENOMEM; } @@ -461,7 +424,7 @@ uint64_t MemAllocator32Bit::Shmat(int shmid, const void* shmaddr, int shmflg, ui *ResultAddress = Result; // We must get the shm size and track it - struct shmid_ds buf{}; + struct shmid_ds buf {}; if (shmctl(shmid, IPC_STAT, &buf) == 0) { // Map the new pages @@ -471,88 +434,76 @@ uint64_t MemAllocator32Bit::Shmat(int shmid, const void* shmaddr, int shmflg, ui // Zero on working result Result = 0; - } - else { + } else { Result = -errno; } return Result; - } - else { + } else { // We must get the shm size and track it - struct shmid_ds buf{}; - uint64_t PagesLength{}; + struct shmid_ds buf {}; + uint64_t PagesLength {}; if (shmctl(shmid, IPC_STAT, &buf) == 0) { PagesLength = FEXCore::AlignUp(buf.shm_segsz, FHU::FEX_PAGE_SIZE) >> FHU::FEX_PAGE_SHIFT; - } - else { + } else { return -EINVAL; } bool Wrapped = false; uint64_t BottomPage = LastScanLocation; -restart: - { - // Linear range scan - uint64_t LowerPage = (this->*FindPageRangePtr)(BottomPage, PagesLength); - if (LowerPage == 0) { - // Try again but this time from the start +restart: { + // Linear range scan + uint64_t LowerPage = (this->*FindPageRangePtr)(BottomPage, PagesLength); + if (LowerPage == 0) { + // Try again but this time from the start + BottomPage = LastKeyLocation; + LowerPage = (this->*FindPageRangePtr)(BottomPage, PagesLength); + } + + uint64_t UpperPage = LowerPage + PagesLength; + if (LowerPage == 0) { + return -ENOMEM; + } + { + // Try and map the range + void* MappedPtr = ::shmat(shmid, reinterpret_cast(LowerPage << FHU::FEX_PAGE_SHIFT), shmflg); + + if (MappedPtr == MAP_FAILED) { + if (UpperPage == TOP_KEY) { BottomPage = LastKeyLocation; - LowerPage = (this->*FindPageRangePtr)(BottomPage, PagesLength); + Wrapped = true; + goto restart; + } else if (Wrapped && LowerPage >= LastScanLocation) { + // We linear scanned the entire memory range. Give up + return -errno; + } else { + // Try again + BottomPage += PagesLength; + goto restart; } - - uint64_t UpperPage = LowerPage + PagesLength; - if (LowerPage == 0) { - return -ENOMEM; + } else { + if (SearchDown) { + LastScanLocation = LowerPage; + } else { + LastScanLocation = UpperPage; } - { - // Try and map the range - void *MappedPtr = ::shmat( - shmid, - reinterpret_cast(LowerPage << FHU::FEX_PAGE_SHIFT), - shmflg); - - if (MappedPtr == MAP_FAILED) { - if (UpperPage == TOP_KEY) { - BottomPage = LastKeyLocation; - Wrapped = true; - goto restart; - } - else if (Wrapped && - LowerPage >= LastScanLocation) { - // We linear scanned the entire memory range. Give up - return -errno; - } - else { - // Try again - BottomPage += PagesLength; - goto restart; - } - } - else { - if (SearchDown) { - LastScanLocation = LowerPage; - } - else { - LastScanLocation = UpperPage; - } - // Set the range as mapped - SetUsedPages(LowerPage, PagesLength); + // Set the range as mapped + SetUsedPages(LowerPage, PagesLength); - *ResultAddress = reinterpret_cast(MappedPtr); + *ResultAddress = reinterpret_cast(MappedPtr); - // Add to the map - PageToShm[LowerPage] = shmid; + // Add to the map + PageToShm[LowerPage] = shmid; - // Zero on working result - return 0; - } - } + // Zero on working result + return 0; } } +} + } } uint64_t MemAllocator32Bit::Shmdt(const void* shmaddr) { - std::scoped_lock lk{AllocMutex}; + std::scoped_lock lk {AllocMutex}; uint32_t AddrPage = reinterpret_cast(shmaddr) >> FHU::FEX_PAGE_SHIFT; auto it = PageToShm.find(AddrPage); @@ -570,7 +521,7 @@ uint64_t MemAllocator32Bit::Shmdt(const void* shmaddr) { class MemAllocatorPassThrough final : public FEX::HLE::MemAllocator { public: - void *Mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) override { + void* Mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) override { uint64_t Result = (uint64_t)::mmap(addr, length, prot, flags, fd, offset); if (Result == ~0ULL) { return reinterpret_cast(-errno); @@ -578,12 +529,12 @@ class MemAllocatorPassThrough final : public FEX::HLE::MemAllocator { return reinterpret_cast(Result); } - int Munmap(void *addr, size_t length) override { + int Munmap(void* addr, size_t length) override { uint64_t Result = (uint64_t)::munmap(addr, length); SYSCALL_ERRNO(); } - void *Mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address) override { + void* Mremap(void* old_address, size_t old_size, size_t new_size, int flags, void* new_address) override { uint64_t Result = (uint64_t)::mremap(old_address, old_size, new_size, flags, new_address); if (Result == ~0ULL) { return reinterpret_cast(-errno); @@ -591,7 +542,7 @@ class MemAllocatorPassThrough final : public FEX::HLE::MemAllocator { return reinterpret_cast(Result); } - uint64_t Shmat(int shmid, const void* shmaddr, int shmflg, uint32_t *ResultAddress) override { + uint64_t Shmat(int shmid, const void* shmaddr, int shmflg, uint32_t* ResultAddress) override { uint64_t Result = (uint64_t)::shmat(shmid, reinterpret_cast(shmaddr), shmflg); if (Result != ~0ULL) { *ResultAddress = Result; @@ -614,4 +565,4 @@ fextl::unique_ptr CreatePassthroughAllocator() { return fextl::make_unique(); } -} +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/NetStream.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/NetStream.cpp index 7e8d3735f2..31a9f52070 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/NetStream.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/NetStream.cpp @@ -14,16 +14,17 @@ namespace FEXCore::Utils { namespace { -class NetBuf final : public std::streambuf, public FEXCore::Allocator::FEXAllocOperators { -public: - explicit NetBuf(int socketfd) : socket{socketfd} { - reset_output_buffer(); + class NetBuf final : public std::streambuf, public FEXCore::Allocator::FEXAllocOperators { + public: + explicit NetBuf(int socketfd) + : socket {socketfd} { + reset_output_buffer(); } ~NetBuf() override { - close(socket); + close(socket); } -private: + private: std::streamsize xsputn(const char* buffer, std::streamsize size) override; std::streambuf::int_type underflow() override; @@ -31,84 +32,84 @@ class NetBuf final : public std::streambuf, public FEXCore::Allocator::FEXAllocO int sync() override; void reset_output_buffer() { - // we always leave room for one extra char - setp(std::begin(output_buffer), std::end(output_buffer) -1); + // we always leave room for one extra char + setp(std::begin(output_buffer), std::end(output_buffer) - 1); } - int flushBuffer(const char *buffer, size_t size); + int flushBuffer(const char* buffer, size_t size); int socket; std::array output_buffer; std::array input_buffer; // enough for a typical packet -}; + }; -int NetBuf::flushBuffer(const char *buffer, size_t size) { + int NetBuf::flushBuffer(const char* buffer, size_t size) { #ifndef _WIN32 size_t total = 0; // Send data while (total < size) { - size_t sent = send(socket, (const void*)(buffer + total), size - total, MSG_NOSIGNAL); - if (sent == -1) { - // lets just assume all errors are end of file. - return -1; - } - total += sent; + size_t sent = send(socket, (const void*)(buffer + total), size - total, MSG_NOSIGNAL); + if (sent == -1) { + // lets just assume all errors are end of file. + return -1; + } + total += sent; } return 0; #else - ERROR_AND_DIE_FMT("Unsupported"); + ERROR_AND_DIE_FMT("Unsupported"); #endif -} + } -std::streamsize NetBuf::xsputn(const char* buffer, std::streamsize size) { + std::streamsize NetBuf::xsputn(const char* buffer, std::streamsize size) { size_t buf_remaining = epptr() - pptr(); // Check if the string fits neatly in our buffer if (size <= buf_remaining) { - ::memcpy(pptr(), buffer, size); - pbump(size); - return size; + ::memcpy(pptr(), buffer, size); + pbump(size); + return size; } // Otherwise, flush the buffer first if (sync() < 0) { - return traits_type::eof(); + return traits_type::eof(); } if (size > sizeof(output_buffer) / 2) { - // If we have a large string, bypass the buffer - flushBuffer(buffer, size); - return size; + // If we have a large string, bypass the buffer + flushBuffer(buffer, size); + return size; } else { - return xsputn(buffer, size); + return xsputn(buffer, size); } -} + } -std::streambuf::int_type NetBuf::overflow(std::streambuf::int_type ch) { + std::streambuf::int_type NetBuf::overflow(std::streambuf::int_type ch) { // we always leave room for one extra char - *pptr() = (char) ch; + *pptr() = (char)ch; pbump(1); return sync(); -} + } -int NetBuf::sync() { + int NetBuf::sync() { // Flush and reset output buffer to zero if (flushBuffer(pbase(), pptr() - pbase()) < 0) { - return -1; + return -1; } reset_output_buffer(); return 0; -} + } -std::streambuf::int_type NetBuf::underflow() { + std::streambuf::int_type NetBuf::underflow() { #ifndef _WIN32 - ssize_t size = recv(socket, (void *)std::begin(input_buffer), sizeof(input_buffer), 0); + ssize_t size = recv(socket, (void*)std::begin(input_buffer), sizeof(input_buffer), 0); if (size <= 0) { - setg(nullptr, nullptr, nullptr); - return traits_type::eof(); + setg(nullptr, nullptr, nullptr); + return traits_type::eof(); } setg(&input_buffer[0], &input_buffer[0], &input_buffer[size]); @@ -117,13 +118,14 @@ std::streambuf::int_type NetBuf::underflow() { #else ERROR_AND_DIE_FMT("Unsupported"); #endif -} + } } // Anonymous namespace -NetStream::NetStream(int socketfd) : std::iostream(new NetBuf(socketfd)) {} +NetStream::NetStream(int socketfd) + : std::iostream(new NetBuf(socketfd)) {} NetStream::~NetStream() { - delete rdbuf(); + delete rdbuf(); } } // namespace FEXCore::Utils diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/SignalDelegator.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/SignalDelegator.cpp index 39f912c117..69c6f39452 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/SignalDelegator.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/SignalDelegator.cpp @@ -43,696 +43,378 @@ desc: Handles host -> host and host -> guest signal routing, emulates procmask & namespace FEX::HLE { #ifdef _M_X86_64 - __attribute__((naked)) - static void sigrestore() { - __asm volatile("syscall;" - :: "a" (0xF) - : "memory"); - } +__attribute__((naked)) static void sigrestore() { + __asm volatile("syscall;" ::"a"(0xF) : "memory"); +} #endif - constexpr static uint32_t X86_MINSIGSTKSZ = 0x2000U; +constexpr static uint32_t X86_MINSIGSTKSZ = 0x2000U; - // We can only have one delegator per process - static SignalDelegator *GlobalDelegator{}; +// We can only have one delegator per process +static SignalDelegator* GlobalDelegator {}; - struct ThreadState { - FEXCore::Core::InternalThreadState *Thread{}; +struct ThreadState { + FEXCore::Core::InternalThreadState* Thread {}; - void *AltStackPtr{}; - stack_t GuestAltStack { - .ss_sp = nullptr, - .ss_flags = SS_DISABLE, // By default the guest alt stack is disabled - .ss_size = 0, - }; - // This is the thread's current signal mask - GuestSAMask CurrentSignalMask{}; - // The mask prior to a suspend - GuestSAMask PreviousSuspendMask{}; - - uint64_t PendingSignals{}; + void* AltStackPtr {}; + stack_t GuestAltStack { + .ss_sp = nullptr, + .ss_flags = SS_DISABLE, // By default the guest alt stack is disabled + .ss_size = 0, }; + // This is the thread's current signal mask + GuestSAMask CurrentSignalMask {}; + // The mask prior to a suspend + GuestSAMask PreviousSuspendMask {}; - thread_local ThreadState ThreadData{}; - - static void SignalHandlerThunk(int Signal, siginfo_t *Info, void *UContext) { - GlobalDelegator->HandleSignal(Signal, Info, UContext); - } + uint64_t PendingSignals {}; +}; - uint64_t SigIsMember(GuestSAMask *Set, int Signal) { - // Signal 0 isn't real, so everything is offset by one inside the set - Signal -= 1; - return (Set->Val >> Signal) & 1; - } +thread_local ThreadState ThreadData {}; - uint64_t SetSignal(GuestSAMask *Set, int Signal) { - // Signal 0 isn't real, so everything is offset by one inside the set - Signal -= 1; - return Set->Val | (1ULL << Signal); - } +static void SignalHandlerThunk(int Signal, siginfo_t* Info, void* UContext) { + GlobalDelegator->HandleSignal(Signal, Info, UContext); +} - /** - * @name Signal frame setup - * @{ */ +uint64_t SigIsMember(GuestSAMask* Set, int Signal) { + // Signal 0 isn't real, so everything is offset by one inside the set + Signal -= 1; + return (Set->Val >> Signal) & 1; +} - // Total number of layouts that siginfo supports. - enum class SigInfoLayout { - LAYOUT_KILL, - LAYOUT_TIMER, - LAYOUT_POLL, - LAYOUT_FAULT, - LAYOUT_FAULT_RIP, - LAYOUT_CHLD, - LAYOUT_RT, - LAYOUT_SYS, - }; +uint64_t SetSignal(GuestSAMask* Set, int Signal) { + // Signal 0 isn't real, so everything is offset by one inside the set + Signal -= 1; + return Set->Val | (1ULL << Signal); +} - // Calculate the siginfo layout based on Signal and si_code. - static SigInfoLayout CalculateSigInfoLayout(int Signal, int si_code) { - if (si_code > SI_USER && si_code < SI_KERNEL) { - // For signals that are not considered RT. - if (Signal == SIGSEGV || - Signal == SIGBUS || - Signal == SIGTRAP) { - // Regular FAULT layout. - return SigInfoLayout::LAYOUT_FAULT; - } - else if (Signal == SIGILL || - Signal == SIGFPE) { - // Fault layout but addr refers to RIP. - return SigInfoLayout::LAYOUT_FAULT_RIP; - } - else if (Signal == SIGCHLD) { - // Child layout - return SigInfoLayout::LAYOUT_CHLD; - } - else if (Signal == SIGPOLL) { - // Poll layout - return SigInfoLayout::LAYOUT_POLL; - } - else if (Signal == SIGSYS) { - // Sys layout - return SigInfoLayout::LAYOUT_SYS; - } +/** + * @name Signal frame setup + * @{ */ + +// Total number of layouts that siginfo supports. +enum class SigInfoLayout { + LAYOUT_KILL, + LAYOUT_TIMER, + LAYOUT_POLL, + LAYOUT_FAULT, + LAYOUT_FAULT_RIP, + LAYOUT_CHLD, + LAYOUT_RT, + LAYOUT_SYS, +}; + +// Calculate the siginfo layout based on Signal and si_code. +static SigInfoLayout CalculateSigInfoLayout(int Signal, int si_code) { + if (si_code > SI_USER && si_code < SI_KERNEL) { + // For signals that are not considered RT. + if (Signal == SIGSEGV || Signal == SIGBUS || Signal == SIGTRAP) { + // Regular FAULT layout. + return SigInfoLayout::LAYOUT_FAULT; + } else if (Signal == SIGILL || Signal == SIGFPE) { + // Fault layout but addr refers to RIP. + return SigInfoLayout::LAYOUT_FAULT_RIP; + } else if (Signal == SIGCHLD) { + // Child layout + return SigInfoLayout::LAYOUT_CHLD; + } else if (Signal == SIGPOLL) { + // Poll layout + return SigInfoLayout::LAYOUT_POLL; + } else if (Signal == SIGSYS) { + // Sys layout + return SigInfoLayout::LAYOUT_SYS; + } + } else { + // Negative si_codes are kernel specific things. + if (si_code == SI_TIMER) { + return SigInfoLayout::LAYOUT_TIMER; + } else if (si_code == SI_SIGIO) { + return SigInfoLayout::LAYOUT_POLL; + } else if (si_code < 0) { + return SigInfoLayout::LAYOUT_RT; } - else { - // Negative si_codes are kernel specific things. - if (si_code == SI_TIMER) { - return SigInfoLayout::LAYOUT_TIMER; - } - else if (si_code == SI_SIGIO) { - return SigInfoLayout::LAYOUT_POLL; - } - else if (si_code < 0) { - return SigInfoLayout::LAYOUT_RT; - } - } - - return SigInfoLayout::LAYOUT_KILL; } - void SignalDelegator::HandleSignal(int Signal, void *Info, void *UContext) { - // Let the host take first stab at handling the signal - auto Thread = GetTLSThread(); - - if (!Thread) { - LogMan::Msg::AFmt("[{}] Thread has received a signal and hasn't registered itself with the delegate! Programming error!", FHU::Syscalls::gettid()); - } - else { - SignalHandler &Handler = HostHandlers[Signal]; - for (auto &HandlerFunc : Handler.Handlers) { - if (HandlerFunc(Thread, Signal, Info, UContext)) { - // If the host handler handled the fault then we can continue now - return; - } - } + return SigInfoLayout::LAYOUT_KILL; +} - if (Handler.FrontendHandler && - Handler.FrontendHandler(Thread, Signal, Info, UContext)) { +void SignalDelegator::HandleSignal(int Signal, void* Info, void* UContext) { + // Let the host take first stab at handling the signal + auto Thread = GetTLSThread(); + + if (!Thread) { + LogMan::Msg::AFmt("[{}] Thread has received a signal and hasn't registered itself with the delegate! Programming error!", + FHU::Syscalls::gettid()); + } else { + SignalHandler& Handler = HostHandlers[Signal]; + for (auto& HandlerFunc : Handler.Handlers) { + if (HandlerFunc(Thread, Signal, Info, UContext)) { + // If the host handler handled the fault then we can continue now return; } + } - // Now let the frontend handle the signal - // It's clearly a guest signal and this ends up being an OS specific issue - HandleGuestSignal(Thread, Signal, Info, UContext); + if (Handler.FrontendHandler && Handler.FrontendHandler(Thread, Signal, Info, UContext)) { + return; } - } - void SignalDelegator::RegisterHostSignalHandler(int Signal, HostSignalDelegatorFunction Func, bool Required) { - SetHostSignalHandler(Signal, Func, Required); - FrontendRegisterHostSignalHandler(Signal, Func, Required); + // Now let the frontend handle the signal + // It's clearly a guest signal and this ends up being an OS specific issue + HandleGuestSignal(Thread, Signal, Info, UContext); } +} - void SignalDelegator::SpillSRA(FEXCore::Core::InternalThreadState *Thread, void *ucontext, uint32_t IgnoreMask) { -#ifdef _M_ARM_64 - for (size_t i = 0; i < Config.SRAGPRCount; i++) { - const uint8_t SRAIdxMap = Config.SRAGPRMapping[i]; - if (IgnoreMask & (1U << SRAIdxMap)) { - // Skip this one, it's already spilled - continue; - } - Thread->CurrentFrame->State.gregs[i] = ArchHelpers::Context::GetArmReg(ucontext, SRAIdxMap); - } +void SignalDelegator::RegisterHostSignalHandler(int Signal, HostSignalDelegatorFunction Func, bool Required) { + SetHostSignalHandler(Signal, Func, Required); + FrontendRegisterHostSignalHandler(Signal, Func, Required); +} - if (Config.SupportsAVX) { - // TODO: This doesn't save the upper 128-bits of the 256-bit registers. - // This needs to be implemented still. - for (size_t i = 0; i < Config.SRAFPRCount; i++) { - auto FPR = ArchHelpers::Context::GetArmFPR(ucontext, Config.SRAFPRMapping[i]); - memcpy(&Thread->CurrentFrame->State.xmm.avx.data[i][0], &FPR, sizeof(__uint128_t)); - } - } else { - for (size_t i = 0; i < Config.SRAFPRCount; i++) { - auto FPR = ArchHelpers::Context::GetArmFPR(ucontext, Config.SRAFPRMapping[i]); - memcpy(&Thread->CurrentFrame->State.xmm.sse.data[i][0], &FPR, sizeof(__uint128_t)); - } +void SignalDelegator::SpillSRA(FEXCore::Core::InternalThreadState* Thread, void* ucontext, uint32_t IgnoreMask) { +#ifdef _M_ARM_64 + for (size_t i = 0; i < Config.SRAGPRCount; i++) { + const uint8_t SRAIdxMap = Config.SRAGPRMapping[i]; + if (IgnoreMask & (1U << SRAIdxMap)) { + // Skip this one, it's already spilled + continue; } -#endif + Thread->CurrentFrame->State.gregs[i] = ArchHelpers::Context::GetArmReg(ucontext, SRAIdxMap); } - static uint32_t ConvertSignalToTrapNo(int Signal, siginfo_t *HostSigInfo) { - switch (Signal) { - case SIGSEGV: - if (HostSigInfo->si_code == SEGV_MAPERR || - HostSigInfo->si_code == SEGV_ACCERR) { - // Protection fault - return FEXCore::X86State::X86_TRAPNO_PF; - } - break; + if (Config.SupportsAVX) { + // TODO: This doesn't save the upper 128-bits of the 256-bit registers. + // This needs to be implemented still. + for (size_t i = 0; i < Config.SRAFPRCount; i++) { + auto FPR = ArchHelpers::Context::GetArmFPR(ucontext, Config.SRAFPRMapping[i]); + memcpy(&Thread->CurrentFrame->State.xmm.avx.data[i][0], &FPR, sizeof(__uint128_t)); } - - // Unknown mapping, fall back to old behaviour and just pass signal - return Signal; - } - - static uint32_t ConvertSignalToError(void *ucontext, int Signal, siginfo_t *HostSigInfo) { - switch (Signal) { - case SIGSEGV: - if (HostSigInfo->si_code == SEGV_MAPERR || - HostSigInfo->si_code == SEGV_ACCERR) { - // Protection fault - // Always a user fault for us - return ArchHelpers::Context::GetProtectFlags(ucontext); - } - break; + } else { + for (size_t i = 0; i < Config.SRAFPRCount; i++) { + auto FPR = ArchHelpers::Context::GetArmFPR(ucontext, Config.SRAFPRMapping[i]); + memcpy(&Thread->CurrentFrame->State.xmm.sse.data[i][0], &FPR, sizeof(__uint128_t)); } - - // Not a page fault issue - return 0; } +#endif +} - template - static void SetXStateInfo(T* xstate, bool is_avx_enabled) { - auto* fpstate = &xstate->fpstate; - - fpstate->sw_reserved.magic1 = FEXCore::x86_64::fpx_sw_bytes::FP_XSTATE_MAGIC; - fpstate->sw_reserved.extended_size = is_avx_enabled ? sizeof(T) : 0; - - fpstate->sw_reserved.xfeatures |= FEXCore::x86_64::fpx_sw_bytes::FEATURE_FP | - FEXCore::x86_64::fpx_sw_bytes::FEATURE_SSE; - if (is_avx_enabled) { - fpstate->sw_reserved.xfeatures |= FEXCore::x86_64::fpx_sw_bytes::FEATURE_YMM; +static uint32_t ConvertSignalToTrapNo(int Signal, siginfo_t* HostSigInfo) { + switch (Signal) { + case SIGSEGV: + if (HostSigInfo->si_code == SEGV_MAPERR || HostSigInfo->si_code == SEGV_ACCERR) { + // Protection fault + return FEXCore::X86State::X86_TRAPNO_PF; } + break; + } - fpstate->sw_reserved.xstate_size = fpstate->sw_reserved.extended_size; + // Unknown mapping, fall back to old behaviour and just pass signal + return Signal; +} - if (is_avx_enabled) { - xstate->xstate_hdr.xfeatures = 0; +static uint32_t ConvertSignalToError(void* ucontext, int Signal, siginfo_t* HostSigInfo) { + switch (Signal) { + case SIGSEGV: + if (HostSigInfo->si_code == SEGV_MAPERR || HostSigInfo->si_code == SEGV_ACCERR) { + // Protection fault + // Always a user fault for us + return ArchHelpers::Context::GetProtectFlags(ucontext); } + break; } - ArchHelpers::Context::ContextBackup* SignalDelegator::StoreThreadState(FEXCore::Core::InternalThreadState *Thread, int Signal, void *ucontext) { - // We can end up getting a signal at any point in our host state - // Jump to a handler that saves all state so we can safely return - uint64_t OldSP = ArchHelpers::Context::GetSp(ucontext); - uintptr_t NewSP = OldSP; - - size_t StackOffset = sizeof(ArchHelpers::Context::ContextBackup); - - // We need to back up behind the host's red zone - // We do this on the guest side as well - // (does nothing on arm hosts) - NewSP -= ArchHelpers::Context::ContextBackup::RedZoneSize; - - NewSP -= StackOffset; - NewSP = FEXCore::AlignDown(NewSP, 16); - - auto Context = reinterpret_cast(NewSP); - ArchHelpers::Context::BackupContext(ucontext, Context); - - // Retain the action pointer so we can see it when we return - Context->Signal = Signal; + // Not a page fault issue + return 0; +} - // Save guest state - // We can't guarantee if registers are in context or host GPRs - // So we need to save everything - memcpy(&Context->GuestState, &Thread->CurrentFrame->State, sizeof(FEXCore::Core::CPUState)); +template +static void SetXStateInfo(T* xstate, bool is_avx_enabled) { + auto* fpstate = &xstate->fpstate; - // Set the new SP - ArchHelpers::Context::SetSp(ucontext, NewSP); + fpstate->sw_reserved.magic1 = FEXCore::x86_64::fpx_sw_bytes::FP_XSTATE_MAGIC; + fpstate->sw_reserved.extended_size = is_avx_enabled ? sizeof(T) : 0; - Context->Flags = 0; - Context->FPStateLocation = 0; - Context->UContextLocation = 0; - Context->SigInfoLocation = 0; - Context->InSyscallInfo = 0; + fpstate->sw_reserved.xfeatures |= FEXCore::x86_64::fpx_sw_bytes::FEATURE_FP | FEXCore::x86_64::fpx_sw_bytes::FEATURE_SSE; + if (is_avx_enabled) { + fpstate->sw_reserved.xfeatures |= FEXCore::x86_64::fpx_sw_bytes::FEATURE_YMM; + } - // Store fault to top status and then reset it - Context->FaultToTopAndGeneratedException = Thread->CurrentFrame->SynchronousFaultData.FaultToTopAndGeneratedException; - Thread->CurrentFrame->SynchronousFaultData.FaultToTopAndGeneratedException = false; + fpstate->sw_reserved.xstate_size = fpstate->sw_reserved.extended_size; - return Context; + if (is_avx_enabled) { + xstate->xstate_hdr.xfeatures = 0; } +} - void SignalDelegator::RestoreThreadState(FEXCore::Core::InternalThreadState *Thread, void *ucontext, RestoreType Type) { - const bool IsAVXEnabled = Config.SupportsAVX; +ArchHelpers::Context::ContextBackup* SignalDelegator::StoreThreadState(FEXCore::Core::InternalThreadState* Thread, int Signal, void* ucontext) { + // We can end up getting a signal at any point in our host state + // Jump to a handler that saves all state so we can safely return + uint64_t OldSP = ArchHelpers::Context::GetSp(ucontext); + uintptr_t NewSP = OldSP; - uint64_t OldSP{}; - if (Type == RestoreType::TYPE_PAUSE) [[unlikely]] { - OldSP = ArchHelpers::Context::GetSp(ucontext); - } - else { - // Some fun introspection here. - // We store a pointer to our host-stack on the guest stack. - // We need to inspect the guest state coming in, so we can get our host stack back. - uint64_t GuestSP = Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP]; + size_t StackOffset = sizeof(ArchHelpers::Context::ContextBackup); - if (Is64BitMode) { - // Signal frame layout on stack needs to be as follows - // void* ReturnPointer - // ucontext_t - // siginfo_t - // FP state - // Host stack location + // We need to back up behind the host's red zone + // We do this on the guest side as well + // (does nothing on arm hosts) + NewSP -= ArchHelpers::Context::ContextBackup::RedZoneSize; - GuestSP += sizeof(FEXCore::x86_64::ucontext_t); - GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86_64::ucontext_t)); + NewSP -= StackOffset; + NewSP = FEXCore::AlignDown(NewSP, 16); - GuestSP += sizeof(siginfo_t); - GuestSP = FEXCore::AlignUp(GuestSP, alignof(siginfo_t)); + auto Context = reinterpret_cast(NewSP); + ArchHelpers::Context::BackupContext(ucontext, Context); - if (IsAVXEnabled) { - GuestSP += sizeof(FEXCore::x86_64::xstate); - GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86_64::xstate)); - } else { - GuestSP += sizeof(FEXCore::x86_64::_libc_fpstate); - GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86_64::_libc_fpstate)); - } - } - else { - if (Type == RestoreType::TYPE_NONREALTIME) { - // Signal frame layout on stack needs to be as follows - // SigFrame_i32 - // FPState - // Host stack location - - // Remove the 4-byte pretcode /AND/ a legacy argument that is ignored. - GuestSP += sizeof(SigFrame_i32) - 8; - GuestSP = FEXCore::AlignUp(GuestSP, alignof(SigFrame_i32)); - - if (IsAVXEnabled) { - GuestSP += sizeof(FEXCore::x86::xstate); - GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86::xstate)); - } else { - GuestSP += sizeof(FEXCore::x86::_libc_fpstate); - GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86::_libc_fpstate)); - } - } - else { - // Signal frame layout on stack needs to be as follows - // RTSigFrame_i32 - // FPState - // Host stack location - - // Remove the 4-byte pretcode. - GuestSP += sizeof(RTSigFrame_i32) - 4; - GuestSP = FEXCore::AlignUp(GuestSP, alignof(RTSigFrame_i32)); - - if (IsAVXEnabled) { - GuestSP += sizeof(FEXCore::x86::xstate); - GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86::xstate)); - } else { - GuestSP += sizeof(FEXCore::x86::_libc_fpstate); - GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86::_libc_fpstate)); - } - } - } + // Retain the action pointer so we can see it when we return + Context->Signal = Signal; - OldSP = *reinterpret_cast(GuestSP); - } + // Save guest state + // We can't guarantee if registers are in context or host GPRs + // So we need to save everything + memcpy(&Context->GuestState, &Thread->CurrentFrame->State, sizeof(FEXCore::Core::CPUState)); - uintptr_t NewSP = OldSP; - auto Context = reinterpret_cast(NewSP); + // Set the new SP + ArchHelpers::Context::SetSp(ucontext, NewSP); - // Restore host state - ArchHelpers::Context::RestoreContext(ucontext, Context); + Context->Flags = 0; + Context->FPStateLocation = 0; + Context->UContextLocation = 0; + Context->SigInfoLocation = 0; + Context->InSyscallInfo = 0; - // Reset the guest state - memcpy(&Thread->CurrentFrame->State, &Context->GuestState, sizeof(FEXCore::Core::CPUState)); + // Store fault to top status and then reset it + Context->FaultToTopAndGeneratedException = Thread->CurrentFrame->SynchronousFaultData.FaultToTopAndGeneratedException; + Thread->CurrentFrame->SynchronousFaultData.FaultToTopAndGeneratedException = false; - if (Context->UContextLocation) { - auto Frame = Thread->CurrentFrame; + return Context; +} - if (Context->Flags &ArchHelpers::Context::ContextFlags::CONTEXT_FLAG_INJIT) { - // XXX: Unsupported since it needs state reconstruction - // If we are in the JIT then SRA might need to be restored to values from the context - // We can't currently support this since it might result in tearing without real state reconstruction - } +void SignalDelegator::RestoreThreadState(FEXCore::Core::InternalThreadState* Thread, void* ucontext, RestoreType Type) { + const bool IsAVXEnabled = Config.SupportsAVX; - if (Is64BitMode) { - RestoreFrame_x64(Thread, Context, Frame, ucontext); - } - else { - if (Type == RestoreType::TYPE_NONREALTIME) { - RestoreFrame_ia32(Thread, Context, Frame, ucontext); - } - else { - RestoreRTFrame_ia32(Thread, Context, Frame, ucontext); - } - } - } - } + uint64_t OldSP {}; + if (Type == RestoreType::TYPE_PAUSE) [[unlikely]] { + OldSP = ArchHelpers::Context::GetSp(ucontext); + } else { + // Some fun introspection here. + // We store a pointer to our host-stack on the guest stack. + // We need to inspect the guest state coming in, so we can get our host stack back. + uint64_t GuestSP = Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP]; - void SignalDelegator::RestoreFrame_x64(FEXCore::Core::InternalThreadState *Thread, ArchHelpers::Context::ContextBackup* Context, FEXCore::Core::CpuStateFrame *Frame, void *ucontext) { - const bool IsAVXEnabled = Config.SupportsAVX; - - auto *guest_uctx = reinterpret_cast(Context->UContextLocation); - [[maybe_unused]] auto *guest_siginfo = reinterpret_cast(Context->SigInfoLocation); - - // If the guest modified the RIP then we need to take special precautions here - if (Context->OriginalRIP != guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_RIP] || - Context->FaultToTopAndGeneratedException) { - - // Restore previous `InSyscallInfo` structure. - Frame->InSyscallInfo = Context->InSyscallInfo; - - // Hack! Go back to the top of the dispatcher top - // This is only safe inside the JIT rather than anything outside of it - ArchHelpers::Context::SetPc(ucontext, Config.AbsoluteLoopTopAddressFillSRA); - // Set our state register to point to our guest thread data - ArchHelpers::Context::SetState(ucontext, reinterpret_cast(Frame)); - - Frame->State.rip = guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_RIP]; - // XXX: Full context setting - CTX->SetFlagsFromCompactedEFLAGS(Thread, guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_EFL]); - -#define COPY_REG(x) \ - Frame->State.gregs[FEXCore::X86State::REG_##x] = guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_##x]; - COPY_REG(R8); - COPY_REG(R9); - COPY_REG(R10); - COPY_REG(R11); - COPY_REG(R12); - COPY_REG(R13); - COPY_REG(R14); - COPY_REG(R15); - COPY_REG(RDI); - COPY_REG(RSI); - COPY_REG(RBP); - COPY_REG(RBX); - COPY_REG(RDX); - COPY_REG(RAX); - COPY_REG(RCX); - COPY_REG(RSP); -#undef COPY_REG - auto *xstate = reinterpret_cast(guest_uctx->uc_mcontext.fpregs); - auto *fpstate = &xstate->fpstate; + if (Is64BitMode) { + // Signal frame layout on stack needs to be as follows + // void* ReturnPointer + // ucontext_t + // siginfo_t + // FP state + // Host stack location + + GuestSP += sizeof(FEXCore::x86_64::ucontext_t); + GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86_64::ucontext_t)); - // Copy float registers - memcpy(Frame->State.mm, fpstate->_st, sizeof(Frame->State.mm)); + GuestSP += sizeof(siginfo_t); + GuestSP = FEXCore::AlignUp(GuestSP, alignof(siginfo_t)); if (IsAVXEnabled) { - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { - memcpy(&Frame->State.xmm.avx.data[i][0], &fpstate->_xmm[i], sizeof(__uint128_t)); - } - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { - memcpy(&Frame->State.xmm.avx.data[i][2], &xstate->ymmh.ymmh_space[i], sizeof(__uint128_t)); - } + GuestSP += sizeof(FEXCore::x86_64::xstate); + GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86_64::xstate)); } else { - memcpy(Frame->State.xmm.sse.data, fpstate->_xmm, sizeof(Frame->State.xmm.sse.data)); + GuestSP += sizeof(FEXCore::x86_64::_libc_fpstate); + GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86_64::_libc_fpstate)); } + } else { + if (Type == RestoreType::TYPE_NONREALTIME) { + // Signal frame layout on stack needs to be as follows + // SigFrame_i32 + // FPState + // Host stack location - // FCW store default - Frame->State.FCW = fpstate->fcw; - Frame->State.AbridgedFTW = fpstate->ftw; - - // Deconstruct FSW - Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] = (fpstate->fsw >> 8) & 1; - Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] = (fpstate->fsw >> 9) & 1; - Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] = (fpstate->fsw >> 10) & 1; - Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] = (fpstate->fsw >> 14) & 1; - Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] = (fpstate->fsw >> 11) & 0b111; - } - } - - void SignalDelegator::RestoreFrame_ia32(FEXCore::Core::InternalThreadState *Thread, ArchHelpers::Context::ContextBackup* Context, FEXCore::Core::CpuStateFrame *Frame, void *ucontext) { - const bool IsAVXEnabled = Config.SupportsAVX; - - SigFrame_i32 *guest_uctx = reinterpret_cast(Context->UContextLocation); - // If the guest modified the RIP then we need to take special precautions here - if (Context->OriginalRIP != guest_uctx->sc.ip || - Context->FaultToTopAndGeneratedException) { - // Restore previous `InSyscallInfo` structure. - Frame->InSyscallInfo = Context->InSyscallInfo; - - // Hack! Go back to the top of the dispatcher top - // This is only safe inside the JIT rather than anything outside of it - ArchHelpers::Context::SetPc(ucontext, Config.AbsoluteLoopTopAddressFillSRA); - // Set our state register to point to our guest thread data - ArchHelpers::Context::SetState(ucontext, reinterpret_cast(Frame)); - - // XXX: Full context setting - CTX->SetFlagsFromCompactedEFLAGS(Thread, guest_uctx->sc.flags); - - Frame->State.rip = guest_uctx->sc.ip; - Frame->State.cs_idx = guest_uctx->sc.cs; - Frame->State.ds_idx = guest_uctx->sc.ds; - Frame->State.es_idx = guest_uctx->sc.es; - Frame->State.fs_idx = guest_uctx->sc.fs; - Frame->State.gs_idx = guest_uctx->sc.gs; - Frame->State.ss_idx = guest_uctx->sc.ss; - - Frame->State.cs_cached = Frame->State.gdt[Frame->State.cs_idx >> 3].base; - Frame->State.ds_cached = Frame->State.gdt[Frame->State.ds_idx >> 3].base; - Frame->State.es_cached = Frame->State.gdt[Frame->State.es_idx >> 3].base; - Frame->State.fs_cached = Frame->State.gdt[Frame->State.fs_idx >> 3].base; - Frame->State.gs_cached = Frame->State.gdt[Frame->State.gs_idx >> 3].base; - Frame->State.ss_cached = Frame->State.gdt[Frame->State.ss_idx >> 3].base; - -#define COPY_REG(x, y) \ - Frame->State.gregs[FEXCore::X86State::REG_##x] = guest_uctx->sc.y; - COPY_REG(RDI, di); - COPY_REG(RSI, si); - COPY_REG(RBP, bp); - COPY_REG(RBX, bx); - COPY_REG(RDX, dx); - COPY_REG(RAX, ax); - COPY_REG(RCX, cx); - COPY_REG(RSP, sp); -#undef COPY_REG - auto *xstate = reinterpret_cast(guest_uctx->sc.fpstate); - auto *fpstate = &xstate->fpstate; - - // Copy float registers - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) { - // 32-bit st register size is only 10 bytes. Not padded to 16byte like x86-64 - memcpy(&Frame->State.mm[i], &fpstate->_st[i], 10); - } + // Remove the 4-byte pretcode /AND/ a legacy argument that is ignored. + GuestSP += sizeof(SigFrame_i32) - 8; + GuestSP = FEXCore::AlignUp(GuestSP, alignof(SigFrame_i32)); - // Extended XMM state - if (IsAVXEnabled) { - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { - memcpy(&Frame->State.xmm.avx.data[i][0], &fpstate->_xmm[i], sizeof(__uint128_t)); - } - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { - memcpy(&Frame->State.xmm.avx.data[i][2], &xstate->ymmh.ymmh_space[i], sizeof(__uint128_t)); + if (IsAVXEnabled) { + GuestSP += sizeof(FEXCore::x86::xstate); + GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86::xstate)); + } else { + GuestSP += sizeof(FEXCore::x86::_libc_fpstate); + GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86::_libc_fpstate)); } } else { - memcpy(Frame->State.xmm.sse.data, fpstate->_xmm, sizeof(Frame->State.xmm.sse.data)); - } - - // FCW store default - Frame->State.FCW = fpstate->fcw; - Frame->State.AbridgedFTW = FEXCore::FPState::ConvertToAbridgedFTW(fpstate->ftw); - - // Deconstruct FSW - Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] = (fpstate->fsw >> 8) & 1; - Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] = (fpstate->fsw >> 9) & 1; - Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] = (fpstate->fsw >> 10) & 1; - Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] = (fpstate->fsw >> 14) & 1; - Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] = (fpstate->fsw >> 11) & 0b111; - } - } - - void SignalDelegator::RestoreRTFrame_ia32(FEXCore::Core::InternalThreadState *Thread, ArchHelpers::Context::ContextBackup* Context, FEXCore::Core::CpuStateFrame *Frame, void *ucontext) { - const bool IsAVXEnabled = Config.SupportsAVX; - - RTSigFrame_i32 *guest_uctx = reinterpret_cast(Context->UContextLocation); - // If the guest modified the RIP then we need to take special precautions here - if (Context->OriginalRIP != guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_EIP] || - Context->FaultToTopAndGeneratedException) { - - // Restore previous `InSyscallInfo` structure. - Frame->InSyscallInfo = Context->InSyscallInfo; - - // Hack! Go back to the top of the dispatcher top - // This is only safe inside the JIT rather than anything outside of it - ArchHelpers::Context::SetPc(ucontext, Config.AbsoluteLoopTopAddressFillSRA); - // Set our state register to point to our guest thread data - ArchHelpers::Context::SetState(ucontext, reinterpret_cast(Frame)); - - // XXX: Full context setting - CTX->SetFlagsFromCompactedEFLAGS(Thread, guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_EFL]); - - Frame->State.rip = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_EIP]; - Frame->State.cs_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_CS]; - Frame->State.ds_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_DS]; - Frame->State.es_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_ES]; - Frame->State.fs_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_FS]; - Frame->State.gs_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_GS]; - Frame->State.ss_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_SS]; - - Frame->State.cs_cached = Frame->State.gdt[Frame->State.cs_idx >> 3].base; - Frame->State.ds_cached = Frame->State.gdt[Frame->State.ds_idx >> 3].base; - Frame->State.es_cached = Frame->State.gdt[Frame->State.es_idx >> 3].base; - Frame->State.fs_cached = Frame->State.gdt[Frame->State.fs_idx >> 3].base; - Frame->State.gs_cached = Frame->State.gdt[Frame->State.gs_idx >> 3].base; - Frame->State.ss_cached = Frame->State.gdt[Frame->State.ss_idx >> 3].base; - -#define COPY_REG(x) \ - Frame->State.gregs[FEXCore::X86State::REG_##x] = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_##x]; - COPY_REG(RDI); - COPY_REG(RSI); - COPY_REG(RBP); - COPY_REG(RBX); - COPY_REG(RDX); - COPY_REG(RAX); - COPY_REG(RCX); - COPY_REG(RSP); -#undef COPY_REG - auto *xstate = reinterpret_cast(guest_uctx->uc.uc_mcontext.fpregs); - auto *fpstate = &xstate->fpstate; + // Signal frame layout on stack needs to be as follows + // RTSigFrame_i32 + // FPState + // Host stack location - // Copy float registers - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) { - // 32-bit st register size is only 10 bytes. Not padded to 16byte like x86-64 - memcpy(&Frame->State.mm[i], &fpstate->_st[i], 10); - } + // Remove the 4-byte pretcode. + GuestSP += sizeof(RTSigFrame_i32) - 4; + GuestSP = FEXCore::AlignUp(GuestSP, alignof(RTSigFrame_i32)); - // Extended XMM state - if (IsAVXEnabled) { - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { - memcpy(&Frame->State.xmm.avx.data[i][0], &fpstate->_xmm[i], sizeof(__uint128_t)); - } - for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { - memcpy(&Frame->State.xmm.avx.data[i][2], &xstate->ymmh.ymmh_space[i], sizeof(__uint128_t)); + if (IsAVXEnabled) { + GuestSP += sizeof(FEXCore::x86::xstate); + GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86::xstate)); + } else { + GuestSP += sizeof(FEXCore::x86::_libc_fpstate); + GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86::_libc_fpstate)); } - } else { - memcpy(Frame->State.xmm.sse.data, fpstate->_xmm, sizeof(Frame->State.xmm.sse.data)); } - - // FCW store default - Frame->State.FCW = fpstate->fcw; - Frame->State.AbridgedFTW = FEXCore::FPState::ConvertToAbridgedFTW(fpstate->ftw); - - // Deconstruct FSW - Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] = (fpstate->fsw >> 8) & 1; - Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] = (fpstate->fsw >> 9) & 1; - Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] = (fpstate->fsw >> 10) & 1; - Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] = (fpstate->fsw >> 14) & 1; - Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] = (fpstate->fsw >> 11) & 0b111; } + + OldSP = *reinterpret_cast(GuestSP); } - uint64_t SignalDelegator::SetupFrame_x64( - FEXCore::Core::InternalThreadState *Thread, ArchHelpers::Context::ContextBackup* ContextBackup, FEXCore::Core::CpuStateFrame *Frame, - int Signal, siginfo_t *HostSigInfo, void *ucontext, - GuestSigAction *GuestAction, stack_t *GuestStack, - uint64_t NewGuestSP, const uint32_t eflags) { + uintptr_t NewSP = OldSP; + auto Context = reinterpret_cast(NewSP); - // Back up past the redzone, which is 128bytes - // 32-bit doesn't have a redzone - NewGuestSP -= 128; + // Restore host state + ArchHelpers::Context::RestoreContext(ucontext, Context); - const bool IsAVXEnabled = Config.SupportsAVX; + // Reset the guest state + memcpy(&Thread->CurrentFrame->State, &Context->GuestState, sizeof(FEXCore::Core::CPUState)); - // On 64-bit the kernel sets up the siginfo_t and ucontext_t regardless of SA_SIGINFO set. - // This allows the application to /always/ get the siginfo and ucontext even if it didn't set this flag. - // - // Signal frame layout on stack needs to be as follows - // void* ReturnPointer - // ucontext_t - // siginfo_t - // FP state - // Host stack location - NewGuestSP -= sizeof(uint64_t); - NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(uint64_t)); + if (Context->UContextLocation) { + auto Frame = Thread->CurrentFrame; - uint64_t HostStackLocation = NewGuestSP; + if (Context->Flags & ArchHelpers::Context::ContextFlags::CONTEXT_FLAG_INJIT) { + // XXX: Unsupported since it needs state reconstruction + // If we are in the JIT then SRA might need to be restored to values from the context + // We can't currently support this since it might result in tearing without real state reconstruction + } - if (IsAVXEnabled) { - NewGuestSP -= sizeof(FEXCore::x86_64::xstate); - NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86_64::xstate)); + if (Is64BitMode) { + RestoreFrame_x64(Thread, Context, Frame, ucontext); } else { - NewGuestSP -= sizeof(FEXCore::x86_64::_libc_fpstate); - NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86_64::_libc_fpstate)); + if (Type == RestoreType::TYPE_NONREALTIME) { + RestoreFrame_ia32(Thread, Context, Frame, ucontext); + } else { + RestoreRTFrame_ia32(Thread, Context, Frame, ucontext); + } } + } +} - uint64_t FPStateLocation = NewGuestSP; - - NewGuestSP -= sizeof(siginfo_t); - NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(siginfo_t)); - uint64_t SigInfoLocation = NewGuestSP; - - NewGuestSP -= sizeof(FEXCore::x86_64::ucontext_t); - NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86_64::ucontext_t)); - uint64_t UContextLocation = NewGuestSP; - - ContextBackup->FPStateLocation = FPStateLocation; - ContextBackup->UContextLocation = UContextLocation; - ContextBackup->SigInfoLocation = SigInfoLocation; - - FEXCore::x86_64::ucontext_t *guest_uctx = reinterpret_cast(UContextLocation); - siginfo_t *guest_siginfo = reinterpret_cast(SigInfoLocation); - // Store where the host context lives in the guest stack. - *(uint64_t*)HostStackLocation = (uint64_t)ContextBackup; - - // We have extended float information - guest_uctx->uc_flags = FEXCore::x86_64::UC_FP_XSTATE | - FEXCore::x86_64::UC_SIGCONTEXT_SS | - FEXCore::x86_64::UC_STRICT_RESTORE_SS; +void SignalDelegator::RestoreFrame_x64(FEXCore::Core::InternalThreadState* Thread, ArchHelpers::Context::ContextBackup* Context, + FEXCore::Core::CpuStateFrame* Frame, void* ucontext) { + const bool IsAVXEnabled = Config.SupportsAVX; - // Pointer to where the fpreg memory is - guest_uctx->uc_mcontext.fpregs = reinterpret_cast(FPStateLocation); - auto *xstate = reinterpret_cast(FPStateLocation); - SetXStateInfo(xstate, IsAVXEnabled); + auto* guest_uctx = reinterpret_cast(Context->UContextLocation); + [[maybe_unused]] auto* guest_siginfo = reinterpret_cast(Context->SigInfoLocation); - guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_RIP] = ContextBackup->OriginalRIP; - guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_EFL] = eflags; - guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_CSGSFS] = 0; + // If the guest modified the RIP then we need to take special precautions here + if (Context->OriginalRIP != guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_RIP] || Context->FaultToTopAndGeneratedException) { - // aarch64 and x86_64 siginfo_t matches. We can just copy this over - // SI_USER could also potentially have random data in it, needs to be bit perfect - // For guest faults we don't have a real way to reconstruct state to a real guest RIP - *guest_siginfo = *HostSigInfo; + // Restore previous `InSyscallInfo` structure. + Frame->InSyscallInfo = Context->InSyscallInfo; - if (ContextBackup->FaultToTopAndGeneratedException) { - guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_TRAPNO] = Frame->SynchronousFaultData.TrapNo; - guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_ERR] = Frame->SynchronousFaultData.err_code; + // Hack! Go back to the top of the dispatcher top + // This is only safe inside the JIT rather than anything outside of it + ArchHelpers::Context::SetPc(ucontext, Config.AbsoluteLoopTopAddressFillSRA); + // Set our state register to point to our guest thread data + ArchHelpers::Context::SetState(ucontext, reinterpret_cast(Frame)); - // Overwrite si_code - guest_siginfo->si_code = Thread->CurrentFrame->SynchronousFaultData.si_code; - Signal = Frame->SynchronousFaultData.Signal; - } - else { - guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_TRAPNO] = ConvertSignalToTrapNo(Signal, HostSigInfo); - guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_ERR] = ConvertSignalToError(ucontext, Signal, HostSigInfo); - } - guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_OLDMASK] = 0; - guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_CR2] = 0; + Frame->State.rip = guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_RIP]; + // XXX: Full context setting + CTX->SetFlagsFromCompactedEFLAGS(Thread, guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_EFL]); -#define COPY_REG(x) \ - guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_##x] = Frame->State.gregs[FEXCore::X86State::REG_##x]; +#define COPY_REG(x) Frame->State.gregs[FEXCore::X86State::REG_##x] = guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_##x]; COPY_REG(R8); COPY_REG(R9); COPY_REG(R10); @@ -750,273 +432,150 @@ namespace FEX::HLE { COPY_REG(RCX); COPY_REG(RSP); #undef COPY_REG - + auto* xstate = reinterpret_cast(guest_uctx->uc_mcontext.fpregs); auto* fpstate = &xstate->fpstate; // Copy float registers - memcpy(fpstate->_st, Frame->State.mm, sizeof(Frame->State.mm)); + memcpy(Frame->State.mm, fpstate->_st, sizeof(Frame->State.mm)); if (IsAVXEnabled) { for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { - memcpy(&fpstate->_xmm[i], &Frame->State.xmm.avx.data[i][0], sizeof(__uint128_t)); + memcpy(&Frame->State.xmm.avx.data[i][0], &fpstate->_xmm[i], sizeof(__uint128_t)); } for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { - memcpy(&xstate->ymmh.ymmh_space[i], &Frame->State.xmm.avx.data[i][2], sizeof(__uint128_t)); + memcpy(&Frame->State.xmm.avx.data[i][2], &xstate->ymmh.ymmh_space[i], sizeof(__uint128_t)); } } else { - memcpy(fpstate->_xmm, Frame->State.xmm.sse.data, sizeof(Frame->State.xmm.sse.data)); + memcpy(Frame->State.xmm.sse.data, fpstate->_xmm, sizeof(Frame->State.xmm.sse.data)); } // FCW store default - fpstate->fcw = Frame->State.FCW; - fpstate->ftw = Frame->State.AbridgedFTW; - - // Reconstruct FSW - fpstate->fsw = - (Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] << 11) | - (Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] << 8) | - (Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] << 9) | - (Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] << 10) | - (Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] << 14); - - // Copy over signal stack information - guest_uctx->uc_stack.ss_flags = GuestStack->ss_flags; - guest_uctx->uc_stack.ss_sp = GuestStack->ss_sp; - guest_uctx->uc_stack.ss_size = GuestStack->ss_size; - - // Apparently RAX is always set to zero in case of badly misbehaving C applications and variadics. - Frame->State.gregs[FEXCore::X86State::REG_RAX] = 0; - Frame->State.gregs[FEXCore::X86State::REG_RDI] = Signal; - Frame->State.gregs[FEXCore::X86State::REG_RSI] = SigInfoLocation; - Frame->State.gregs[FEXCore::X86State::REG_RDX] = UContextLocation; - - // Set up the new SP for stack handling - // The host is required to provide us a restorer. - // If the guest didn't provide a restorer then the application should fail with a SIGSEGV. - // TODO: Emulate SIGSEGV when the guest doesn't provide a restorer. - NewGuestSP -= 8; - if (GuestAction->restorer) { - *(uint64_t*)NewGuestSP = (uint64_t)GuestAction->restorer; - } - else { - // XXX: Emulate SIGSEGV here - // *(uint64_t*)NewGuestSP = SignalReturn; - } - - return NewGuestSP; + Frame->State.FCW = fpstate->fcw; + Frame->State.AbridgedFTW = fpstate->ftw; + + // Deconstruct FSW + Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] = (fpstate->fsw >> 8) & 1; + Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] = (fpstate->fsw >> 9) & 1; + Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] = (fpstate->fsw >> 10) & 1; + Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] = (fpstate->fsw >> 14) & 1; + Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] = (fpstate->fsw >> 11) & 0b111; } +} - uint64_t SignalDelegator::SetupFrame_ia32( - ArchHelpers::Context::ContextBackup* ContextBackup, FEXCore::Core::CpuStateFrame *Frame, - int Signal, siginfo_t *HostSigInfo, void *ucontext, - GuestSigAction *GuestAction, stack_t *GuestStack, - uint64_t NewGuestSP, const uint32_t eflags) { - - const bool IsAVXEnabled = Config.SupportsAVX; - const uint64_t SignalReturn = reinterpret_cast(VDSOPointers.VDSO_kernel_sigreturn); - - NewGuestSP -= sizeof(uint64_t); - NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(uint64_t)); - - uint64_t HostStackLocation = NewGuestSP; - - if (IsAVXEnabled) { - NewGuestSP -= sizeof(FEXCore::x86::xstate); - NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86::xstate)); - } else { - NewGuestSP -= sizeof(FEXCore::x86::_libc_fpstate); - NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86::_libc_fpstate)); - } - - uint64_t FPStateLocation = NewGuestSP; - - NewGuestSP -= sizeof(SigFrame_i32); - NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(SigFrame_i32)); - uint64_t SigFrameLocation = NewGuestSP; - - ContextBackup->FPStateLocation = FPStateLocation; - ContextBackup->UContextLocation = SigFrameLocation; - ContextBackup->SigInfoLocation = 0; - - SigFrame_i32 *guest_uctx = reinterpret_cast(SigFrameLocation); - // Store where the host context lives in the guest stack. - *(uint64_t*)HostStackLocation = (uint64_t)ContextBackup; - - // Pointer to where the fpreg memory is - guest_uctx->sc.fpstate = static_cast(FPStateLocation); - auto *xstate = reinterpret_cast(FPStateLocation); - SetXStateInfo(xstate, IsAVXEnabled); +void SignalDelegator::RestoreFrame_ia32(FEXCore::Core::InternalThreadState* Thread, ArchHelpers::Context::ContextBackup* Context, + FEXCore::Core::CpuStateFrame* Frame, void* ucontext) { + const bool IsAVXEnabled = Config.SupportsAVX; - guest_uctx->sc.cs = Frame->State.cs_idx; - guest_uctx->sc.ds = Frame->State.ds_idx; - guest_uctx->sc.es = Frame->State.es_idx; - guest_uctx->sc.fs = Frame->State.fs_idx; - guest_uctx->sc.gs = Frame->State.gs_idx; - guest_uctx->sc.ss = Frame->State.ss_idx; + SigFrame_i32* guest_uctx = reinterpret_cast(Context->UContextLocation); + // If the guest modified the RIP then we need to take special precautions here + if (Context->OriginalRIP != guest_uctx->sc.ip || Context->FaultToTopAndGeneratedException) { + // Restore previous `InSyscallInfo` structure. + Frame->InSyscallInfo = Context->InSyscallInfo; - if (ContextBackup->FaultToTopAndGeneratedException) { - guest_uctx->sc.trapno = Frame->SynchronousFaultData.TrapNo; - guest_uctx->sc.err = Frame->SynchronousFaultData.err_code; - Signal = Frame->SynchronousFaultData.Signal; - } - else { - guest_uctx->sc.trapno = ConvertSignalToTrapNo(Signal, HostSigInfo); - guest_uctx->sc.err = ConvertSignalToError(ucontext, Signal, HostSigInfo); - } + // Hack! Go back to the top of the dispatcher top + // This is only safe inside the JIT rather than anything outside of it + ArchHelpers::Context::SetPc(ucontext, Config.AbsoluteLoopTopAddressFillSRA); + // Set our state register to point to our guest thread data + ArchHelpers::Context::SetState(ucontext, reinterpret_cast(Frame)); - guest_uctx->sc.ip = ContextBackup->OriginalRIP; - guest_uctx->sc.flags = eflags; - guest_uctx->sc.sp_at_signal = 0; - -#define COPY_REG(x, y) \ - guest_uctx->sc.x = Frame->State.gregs[FEXCore::X86State::REG_##y]; - COPY_REG(di, RDI); - COPY_REG(si, RSI); - COPY_REG(bp, RBP); - COPY_REG(bx, RBX); - COPY_REG(dx, RDX); - COPY_REG(ax, RAX); - COPY_REG(cx, RCX); - COPY_REG(sp, RSP); + // XXX: Full context setting + CTX->SetFlagsFromCompactedEFLAGS(Thread, guest_uctx->sc.flags); + + Frame->State.rip = guest_uctx->sc.ip; + Frame->State.cs_idx = guest_uctx->sc.cs; + Frame->State.ds_idx = guest_uctx->sc.ds; + Frame->State.es_idx = guest_uctx->sc.es; + Frame->State.fs_idx = guest_uctx->sc.fs; + Frame->State.gs_idx = guest_uctx->sc.gs; + Frame->State.ss_idx = guest_uctx->sc.ss; + + Frame->State.cs_cached = Frame->State.gdt[Frame->State.cs_idx >> 3].base; + Frame->State.ds_cached = Frame->State.gdt[Frame->State.ds_idx >> 3].base; + Frame->State.es_cached = Frame->State.gdt[Frame->State.es_idx >> 3].base; + Frame->State.fs_cached = Frame->State.gdt[Frame->State.fs_idx >> 3].base; + Frame->State.gs_cached = Frame->State.gdt[Frame->State.gs_idx >> 3].base; + Frame->State.ss_cached = Frame->State.gdt[Frame->State.ss_idx >> 3].base; + +#define COPY_REG(x, y) Frame->State.gregs[FEXCore::X86State::REG_##x] = guest_uctx->sc.y; + COPY_REG(RDI, di); + COPY_REG(RSI, si); + COPY_REG(RBP, bp); + COPY_REG(RBX, bx); + COPY_REG(RDX, dx); + COPY_REG(RAX, ax); + COPY_REG(RCX, cx); + COPY_REG(RSP, sp); #undef COPY_REG - - auto *fpstate = &xstate->fpstate; + auto* xstate = reinterpret_cast(guest_uctx->sc.fpstate); + auto* fpstate = &xstate->fpstate; // Copy float registers for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) { // 32-bit st register size is only 10 bytes. Not padded to 16byte like x86-64 - memcpy(&fpstate->_st[i], &Frame->State.mm[i], 10); + memcpy(&Frame->State.mm[i], &fpstate->_st[i], 10); } // Extended XMM state - fpstate->status = FEXCore::x86::fpstate_magic::MAGIC_XFPSTATE; if (IsAVXEnabled) { - for (size_t i = 0; i < std::size(Frame->State.xmm.avx.data); i++) { - memcpy(&fpstate->_xmm[i], &Frame->State.xmm.avx.data[i][0], sizeof(__uint128_t)); + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { + memcpy(&Frame->State.xmm.avx.data[i][0], &fpstate->_xmm[i], sizeof(__uint128_t)); } - for (size_t i = 0; i < std::size(Frame->State.xmm.avx.data); i++) { - memcpy(&xstate->ymmh.ymmh_space[i], &Frame->State.xmm.avx.data[i][2], sizeof(__uint128_t)); + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { + memcpy(&Frame->State.xmm.avx.data[i][2], &xstate->ymmh.ymmh_space[i], sizeof(__uint128_t)); } } else { - memcpy(fpstate->_xmm, Frame->State.xmm.sse.data, sizeof(Frame->State.xmm.sse.data)); + memcpy(Frame->State.xmm.sse.data, fpstate->_xmm, sizeof(Frame->State.xmm.sse.data)); } // FCW store default - fpstate->fcw = Frame->State.FCW; - // Reconstruct FSW - fpstate->fsw = - (Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] << 11) | - (Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] << 8) | - (Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] << 9) | - (Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] << 10) | - (Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] << 14); - fpstate->ftw = FEXCore::FPState::ConvertFromAbridgedFTW(fpstate->fsw, Frame->State.mm, Frame->State.AbridgedFTW); - - // Curiously non-rt signals don't support altstack. So that state doesn't exist here. - - // Copy over the signal information. - guest_uctx->Signal = Signal; - - // Retcode needs to be bit-exact for debuggers - constexpr static uint8_t retcode[] = { - 0x58, // pop eax - 0xb8, // mov - 0x77, 0x00, 0x00, 0x00, // 32-bit sigreturn - 0xcd, 0x80, // int 0x80 - }; - - memcpy(guest_uctx->retcode, &retcode, sizeof(retcode)); - - // 32-bit Guest can provide its own restorer or we need to provide our own. - // On a real host this restorer will live in VDSO. - constexpr uint32_t SA_RESTORER = 0x04000000; - const bool HasRestorer = (GuestAction->sa_flags & SA_RESTORER) == SA_RESTORER; - if (HasRestorer) { - guest_uctx->pretcode = (uint32_t)(uint64_t)GuestAction->restorer; - } - else { - guest_uctx->pretcode = SignalReturn; - LOGMAN_THROW_AA_FMT(SignalReturn < 0x1'0000'0000ULL, "This needs to be below 4GB"); - } - - // Support regparm=3 - Frame->State.gregs[FEXCore::X86State::REG_RAX] = Signal; - Frame->State.gregs[FEXCore::X86State::REG_RDX] = 0; - Frame->State.gregs[FEXCore::X86State::REG_RCX] = 0; - - return NewGuestSP; + Frame->State.FCW = fpstate->fcw; + Frame->State.AbridgedFTW = FEXCore::FPState::ConvertToAbridgedFTW(fpstate->ftw); + + // Deconstruct FSW + Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] = (fpstate->fsw >> 8) & 1; + Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] = (fpstate->fsw >> 9) & 1; + Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] = (fpstate->fsw >> 10) & 1; + Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] = (fpstate->fsw >> 14) & 1; + Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] = (fpstate->fsw >> 11) & 0b111; } +} - uint64_t SignalDelegator::SetupRTFrame_ia32( - ArchHelpers::Context::ContextBackup* ContextBackup, FEXCore::Core::CpuStateFrame *Frame, - int Signal, siginfo_t *HostSigInfo, void *ucontext, - GuestSigAction *GuestAction, stack_t *GuestStack, - uint64_t NewGuestSP, const uint32_t eflags) { - - const bool IsAVXEnabled = Config.SupportsAVX; - const uint64_t SignalReturn = reinterpret_cast(VDSOPointers.VDSO_kernel_rt_sigreturn); - - NewGuestSP -= sizeof(uint64_t); - NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(uint64_t)); - - uint64_t HostStackLocation = NewGuestSP; - - if (IsAVXEnabled) { - NewGuestSP -= sizeof(FEXCore::x86::xstate); - NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86::xstate)); - } else { - NewGuestSP -= sizeof(FEXCore::x86::_libc_fpstate); - NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86::_libc_fpstate)); - } - - uint64_t FPStateLocation = NewGuestSP; - - NewGuestSP -= sizeof(RTSigFrame_i32); - NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(RTSigFrame_i32)); - - uint64_t SigFrameLocation = NewGuestSP; - RTSigFrame_i32 *guest_uctx = reinterpret_cast(SigFrameLocation); - // Store where the host context lives in the guest stack. - *(uint64_t*)HostStackLocation = (uint64_t)ContextBackup; - - ContextBackup->FPStateLocation = FPStateLocation; - ContextBackup->UContextLocation = SigFrameLocation; - ContextBackup->SigInfoLocation = 0; // Part of frame. - - // We have extended float information - guest_uctx->uc.uc_flags = FEXCore::x86::UC_FP_XSTATE; - guest_uctx->uc.uc_link = 0; - - // Pointer to where the fpreg memory is - guest_uctx->uc.uc_mcontext.fpregs = static_cast(FPStateLocation); - auto *xstate = reinterpret_cast(FPStateLocation); - SetXStateInfo(xstate, IsAVXEnabled); +void SignalDelegator::RestoreRTFrame_ia32(FEXCore::Core::InternalThreadState* Thread, ArchHelpers::Context::ContextBackup* Context, + FEXCore::Core::CpuStateFrame* Frame, void* ucontext) { + const bool IsAVXEnabled = Config.SupportsAVX; - guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_CS] = Frame->State.cs_idx; - guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_DS] = Frame->State.ds_idx; - guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_ES] = Frame->State.es_idx; - guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_FS] = Frame->State.fs_idx; - guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_GS] = Frame->State.gs_idx; - guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_SS] = Frame->State.ss_idx; + RTSigFrame_i32* guest_uctx = reinterpret_cast(Context->UContextLocation); + // If the guest modified the RIP then we need to take special precautions here + if (Context->OriginalRIP != guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_EIP] || Context->FaultToTopAndGeneratedException) { - if (ContextBackup->FaultToTopAndGeneratedException) { - guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_TRAPNO] = Frame->SynchronousFaultData.TrapNo; - guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_ERR] = Frame->SynchronousFaultData.err_code; - Signal = Frame->SynchronousFaultData.Signal; - } - else { - guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_TRAPNO] = ConvertSignalToTrapNo(Signal, HostSigInfo); - guest_uctx->info.si_code = HostSigInfo->si_code; - guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_ERR] = ConvertSignalToError(ucontext, Signal, HostSigInfo); - } + // Restore previous `InSyscallInfo` structure. + Frame->InSyscallInfo = Context->InSyscallInfo; - guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_EIP] = ContextBackup->OriginalRIP; - guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_EFL] = eflags; - guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_UESP] = Frame->State.gregs[FEXCore::X86State::REG_RSP]; - guest_uctx->uc.uc_mcontext.cr2 = 0; + // Hack! Go back to the top of the dispatcher top + // This is only safe inside the JIT rather than anything outside of it + ArchHelpers::Context::SetPc(ucontext, Config.AbsoluteLoopTopAddressFillSRA); + // Set our state register to point to our guest thread data + ArchHelpers::Context::SetState(ucontext, reinterpret_cast(Frame)); -#define COPY_REG(x) \ - guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_##x] = Frame->State.gregs[FEXCore::X86State::REG_##x]; + // XXX: Full context setting + CTX->SetFlagsFromCompactedEFLAGS(Thread, guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_EFL]); + + Frame->State.rip = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_EIP]; + Frame->State.cs_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_CS]; + Frame->State.ds_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_DS]; + Frame->State.es_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_ES]; + Frame->State.fs_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_FS]; + Frame->State.gs_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_GS]; + Frame->State.ss_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_SS]; + + Frame->State.cs_cached = Frame->State.gdt[Frame->State.cs_idx >> 3].base; + Frame->State.ds_cached = Frame->State.gdt[Frame->State.ds_idx >> 3].base; + Frame->State.es_cached = Frame->State.gdt[Frame->State.es_idx >> 3].base; + Frame->State.fs_cached = Frame->State.gdt[Frame->State.fs_idx >> 3].base; + Frame->State.gs_cached = Frame->State.gdt[Frame->State.gs_idx >> 3].base; + Frame->State.ss_cached = Frame->State.gdt[Frame->State.ss_idx >> 3].base; + +#define COPY_REG(x) Frame->State.gregs[FEXCore::X86State::REG_##x] = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_##x]; COPY_REG(RDI); COPY_REG(RSI); COPY_REG(RBP); @@ -1026,1135 +585,1492 @@ namespace FEX::HLE { COPY_REG(RCX); COPY_REG(RSP); #undef COPY_REG - - auto *fpstate = &xstate->fpstate; + auto* xstate = reinterpret_cast(guest_uctx->uc.uc_mcontext.fpregs); + auto* fpstate = &xstate->fpstate; // Copy float registers for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) { // 32-bit st register size is only 10 bytes. Not padded to 16byte like x86-64 - memcpy(&fpstate->_st[i], &Frame->State.mm[i], 10); + memcpy(&Frame->State.mm[i], &fpstate->_st[i], 10); } // Extended XMM state - fpstate->status = FEXCore::x86::fpstate_magic::MAGIC_XFPSTATE; if (IsAVXEnabled) { - for (size_t i = 0; i < std::size(Frame->State.xmm.avx.data); i++) { - memcpy(&fpstate->_xmm[i], &Frame->State.xmm.avx.data[i][0], sizeof(__uint128_t)); + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { + memcpy(&Frame->State.xmm.avx.data[i][0], &fpstate->_xmm[i], sizeof(__uint128_t)); } - for (size_t i = 0; i < std::size(Frame->State.xmm.avx.data); i++) { - memcpy(&xstate->ymmh.ymmh_space[i], &Frame->State.xmm.avx.data[i][2], sizeof(__uint128_t)); + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { + memcpy(&Frame->State.xmm.avx.data[i][2], &xstate->ymmh.ymmh_space[i], sizeof(__uint128_t)); } } else { - memcpy(fpstate->_xmm, Frame->State.xmm.sse.data, sizeof(Frame->State.xmm.sse.data)); + memcpy(Frame->State.xmm.sse.data, fpstate->_xmm, sizeof(Frame->State.xmm.sse.data)); } // FCW store default - fpstate->fcw = Frame->State.FCW; - // Reconstruct FSW - fpstate->fsw = - (Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] << 11) | - (Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] << 8) | - (Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] << 9) | - (Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] << 10) | - (Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] << 14); - fpstate->ftw = FEXCore::FPState::ConvertFromAbridgedFTW(fpstate->fsw, Frame->State.mm, Frame->State.AbridgedFTW); - - // Copy over signal stack information - guest_uctx->uc.uc_stack.ss_flags = GuestStack->ss_flags; - guest_uctx->uc.uc_stack.ss_sp = static_cast(reinterpret_cast(GuestStack->ss_sp)); - guest_uctx->uc.uc_stack.ss_size = GuestStack->ss_size; - - // Setup siginfo - if (ContextBackup->FaultToTopAndGeneratedException) { - guest_uctx->info.si_code = Frame->SynchronousFaultData.si_code; - } - else { - guest_uctx->info.si_code = HostSigInfo->si_code; - } - - // These three elements are in every siginfo - guest_uctx->info.si_signo = HostSigInfo->si_signo; - guest_uctx->info.si_errno = HostSigInfo->si_errno; - - const SigInfoLayout Layout = CalculateSigInfoLayout(Signal, guest_uctx->info.si_code); - - switch (Layout) { - case SigInfoLayout::LAYOUT_KILL: - guest_uctx->info._sifields._kill.pid = HostSigInfo->si_pid; - guest_uctx->info._sifields._kill.uid = HostSigInfo->si_uid; - break; - case SigInfoLayout::LAYOUT_TIMER: - guest_uctx->info._sifields._timer.tid = HostSigInfo->si_timerid; - guest_uctx->info._sifields._timer.overrun = HostSigInfo->si_overrun; - guest_uctx->info._sifields._timer.sigval.sival_int = HostSigInfo->si_int; - break; - case SigInfoLayout::LAYOUT_POLL: - guest_uctx->info._sifields._poll.band= HostSigInfo->si_band; - guest_uctx->info._sifields._poll.fd= HostSigInfo->si_fd; - break; - case SigInfoLayout::LAYOUT_FAULT: - // Macro expansion to get the si_addr - // This is the address trying to be accessed, not the RIP - guest_uctx->info._sifields._sigfault.addr = static_cast(reinterpret_cast(HostSigInfo->si_addr)); - break; - case SigInfoLayout::LAYOUT_FAULT_RIP: - // Macro expansion to get the si_addr - // Can't really give a real result here. Pull from the context for now - guest_uctx->info._sifields._sigfault.addr = ContextBackup->OriginalRIP; - break; - case SigInfoLayout::LAYOUT_CHLD: - guest_uctx->info._sifields._sigchld.pid = HostSigInfo->si_pid; - guest_uctx->info._sifields._sigchld.uid = HostSigInfo->si_uid; - guest_uctx->info._sifields._sigchld.status = HostSigInfo->si_status; - guest_uctx->info._sifields._sigchld.utime = HostSigInfo->si_utime; - guest_uctx->info._sifields._sigchld.stime = HostSigInfo->si_stime; - break; - case SigInfoLayout::LAYOUT_RT: - guest_uctx->info._sifields._rt.pid = HostSigInfo->si_pid; - guest_uctx->info._sifields._rt.uid = HostSigInfo->si_uid; - guest_uctx->info._sifields._rt.sigval.sival_int = HostSigInfo->si_int; - break; - case SigInfoLayout::LAYOUT_SYS: - guest_uctx->info._sifields._sigsys.call_addr = static_cast(reinterpret_cast(HostSigInfo->si_call_addr)); - guest_uctx->info._sifields._sigsys.syscall = HostSigInfo->si_syscall; - // We need to lie about the architecture here. - // Otherwise we would expose incorrect information to the guest. - constexpr uint32_t AUDIT_LE = 0x4000'0000U; - constexpr uint32_t MACHINE_I386 = 3; // This matches the ELF definition. - guest_uctx->info._sifields._sigsys.arch = AUDIT_LE | MACHINE_I386; - break; - } + Frame->State.FCW = fpstate->fcw; + Frame->State.AbridgedFTW = FEXCore::FPState::ConvertToAbridgedFTW(fpstate->ftw); + + // Deconstruct FSW + Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] = (fpstate->fsw >> 8) & 1; + Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] = (fpstate->fsw >> 9) & 1; + Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] = (fpstate->fsw >> 10) & 1; + Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] = (fpstate->fsw >> 14) & 1; + Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] = (fpstate->fsw >> 11) & 0b111; + } +} - // Setup the guest stack context. - guest_uctx->Signal = Signal; - guest_uctx->pinfo = (uint32_t)(uint64_t)&guest_uctx->info; - guest_uctx->puc = (uint32_t)(uint64_t)&guest_uctx->uc; - - // Retcode needs to be bit-exact for debuggers - constexpr static uint8_t rt_retcode[] = { - 0xb8, // mov - 0xad, 0x00, 0x00, 0x00, // 32-bit rt_sigreturn - 0xcd, 0x80, // int 0x80 - 0x0, // Pad - }; - - memcpy(guest_uctx->retcode, &rt_retcode, sizeof(rt_retcode)); - - // 32-bit Guest can provide its own restorer or we need to provide our own. - // On a real host this restorer will live in VDSO. - constexpr uint32_t SA_RESTORER = 0x04000000; - const bool HasRestorer = (GuestAction->sa_flags & SA_RESTORER) == SA_RESTORER; - if (HasRestorer) { - guest_uctx->pretcode = (uint32_t)(uint64_t)GuestAction->restorer; - } - else { - guest_uctx->pretcode = SignalReturn; - LOGMAN_THROW_AA_FMT(SignalReturn < 0x1'0000'0000ULL, "This needs to be below 4GB"); - } +uint64_t SignalDelegator::SetupFrame_x64(FEXCore::Core::InternalThreadState* Thread, ArchHelpers::Context::ContextBackup* ContextBackup, + FEXCore::Core::CpuStateFrame* Frame, int Signal, siginfo_t* HostSigInfo, void* ucontext, + GuestSigAction* GuestAction, stack_t* GuestStack, uint64_t NewGuestSP, const uint32_t eflags) { + + // Back up past the redzone, which is 128bytes + // 32-bit doesn't have a redzone + NewGuestSP -= 128; + + const bool IsAVXEnabled = Config.SupportsAVX; + + // On 64-bit the kernel sets up the siginfo_t and ucontext_t regardless of SA_SIGINFO set. + // This allows the application to /always/ get the siginfo and ucontext even if it didn't set this flag. + // + // Signal frame layout on stack needs to be as follows + // void* ReturnPointer + // ucontext_t + // siginfo_t + // FP state + // Host stack location + NewGuestSP -= sizeof(uint64_t); + NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(uint64_t)); + + uint64_t HostStackLocation = NewGuestSP; + + if (IsAVXEnabled) { + NewGuestSP -= sizeof(FEXCore::x86_64::xstate); + NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86_64::xstate)); + } else { + NewGuestSP -= sizeof(FEXCore::x86_64::_libc_fpstate); + NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86_64::_libc_fpstate)); + } - // Support regparm=3 - Frame->State.gregs[FEXCore::X86State::REG_RAX] = Signal; - Frame->State.gregs[FEXCore::X86State::REG_RDX] = guest_uctx->pinfo; - Frame->State.gregs[FEXCore::X86State::REG_RCX] = guest_uctx->puc; + uint64_t FPStateLocation = NewGuestSP; - return NewGuestSP; - } + NewGuestSP -= sizeof(siginfo_t); + NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(siginfo_t)); + uint64_t SigInfoLocation = NewGuestSP; - bool SignalDelegator::HandleDispatcherGuestSignal(FEXCore::Core::InternalThreadState *Thread, int Signal, void *info, void *ucontext, GuestSigAction *GuestAction, stack_t *GuestStack) { - auto ContextBackup = StoreThreadState(Thread, Signal, ucontext); + NewGuestSP -= sizeof(FEXCore::x86_64::ucontext_t); + NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86_64::ucontext_t)); + uint64_t UContextLocation = NewGuestSP; - auto Frame = Thread->CurrentFrame; + ContextBackup->FPStateLocation = FPStateLocation; + ContextBackup->UContextLocation = UContextLocation; + ContextBackup->SigInfoLocation = SigInfoLocation; - // Ref count our faults - // We use this to track if it is safe to clear cache - ++Thread->CurrentFrame->SignalHandlerRefCounter; + FEXCore::x86_64::ucontext_t* guest_uctx = reinterpret_cast(UContextLocation); + siginfo_t* guest_siginfo = reinterpret_cast(SigInfoLocation); + // Store where the host context lives in the guest stack. + *(uint64_t*)HostStackLocation = (uint64_t)ContextBackup; - uint64_t OldPC = ArchHelpers::Context::GetPc(ucontext); - const bool WasInJIT = Thread->CPUBackend->IsAddressInCodeBuffer(OldPC); + // We have extended float information + guest_uctx->uc_flags = FEXCore::x86_64::UC_FP_XSTATE | FEXCore::x86_64::UC_SIGCONTEXT_SS | FEXCore::x86_64::UC_STRICT_RESTORE_SS; - // Spill the SRA regardless of signal handler type - // We are going to be returning to the top of the dispatcher which will fill again - // Otherwise we might load garbage - if (WasInJIT) { - uint32_t IgnoreMask{}; -#ifdef _M_ARM_64 - if (Frame->InSyscallInfo != 0) { - // We are in a syscall, this means we are in a weird register state - // We need to spill SRA but only some of it, since some values have already been spilled - // Lower 16 bits tells us which registers are already spilled to the context - // So we ignore spilling those ones - IgnoreMask = Frame->InSyscallInfo & 0xFFFF; - } - else { - // We must spill everything - IgnoreMask = 0; - } -#endif + // Pointer to where the fpreg memory is + guest_uctx->uc_mcontext.fpregs = reinterpret_cast(FPStateLocation); + auto* xstate = reinterpret_cast(FPStateLocation); + SetXStateInfo(xstate, IsAVXEnabled); - // We are in jit, SRA must be spilled - SpillSRA(Thread, ucontext, IgnoreMask); + guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_RIP] = ContextBackup->OriginalRIP; + guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_EFL] = eflags; + guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_CSGSFS] = 0; - ContextBackup->Flags |= ArchHelpers::Context::ContextFlags::CONTEXT_FLAG_INJIT; + // aarch64 and x86_64 siginfo_t matches. We can just copy this over + // SI_USER could also potentially have random data in it, needs to be bit perfect + // For guest faults we don't have a real way to reconstruct state to a real guest RIP + *guest_siginfo = *HostSigInfo; - // We are leaving the syscall information behind. Make sure to store the previous state. - ContextBackup->InSyscallInfo = Thread->CurrentFrame->InSyscallInfo; - Thread->CurrentFrame->InSyscallInfo = 0; - } else { - if (!IsAddressInDispatcher(OldPC)) { - // This is likely to cause issues but in some cases it isn't fatal - // This can also happen if we have put a signal on hold, then we just reenabled the signal - // So we are in the syscall handler - // Only throw a log message in this case - if constexpr (false) { - // XXX: Messages in the signal handler can cause us to crash - LogMan::Msg::EFmt("Signals in dispatcher have unsynchronized context"); - } - } - } + if (ContextBackup->FaultToTopAndGeneratedException) { + guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_TRAPNO] = Frame->SynchronousFaultData.TrapNo; + guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_ERR] = Frame->SynchronousFaultData.err_code; - uint64_t OldGuestSP = Frame->State.gregs[FEXCore::X86State::REG_RSP]; - uint64_t NewGuestSP = OldGuestSP; - - // altstack is only used if the signal handler was setup with SA_ONSTACK - if (GuestAction->sa_flags & SA_ONSTACK) { - // Additionally the altstack is only used if the enabled (SS_DISABLE flag is not set) - if (!(GuestStack->ss_flags & SS_DISABLE)) { - // If our guest is already inside of the alternative stack - // Then that means we are hitting recursive signals and we need to walk back the stack correctly - uint64_t AltStackBase = reinterpret_cast(GuestStack->ss_sp); - uint64_t AltStackEnd = AltStackBase + GuestStack->ss_size; - if (OldGuestSP >= AltStackBase && - OldGuestSP <= AltStackEnd) { - // We are already in the alt stack, the rest of the code will handle adjusting this - } - else { - NewGuestSP = AltStackEnd; - } - } - } + // Overwrite si_code + guest_siginfo->si_code = Thread->CurrentFrame->SynchronousFaultData.si_code; + Signal = Frame->SynchronousFaultData.Signal; + } else { + guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_TRAPNO] = ConvertSignalToTrapNo(Signal, HostSigInfo); + guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_ERR] = ConvertSignalToError(ucontext, Signal, HostSigInfo); + } + guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_OLDMASK] = 0; + guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_CR2] = 0; + +#define COPY_REG(x) guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_##x] = Frame->State.gregs[FEXCore::X86State::REG_##x]; + COPY_REG(R8); + COPY_REG(R9); + COPY_REG(R10); + COPY_REG(R11); + COPY_REG(R12); + COPY_REG(R13); + COPY_REG(R14); + COPY_REG(R15); + COPY_REG(RDI); + COPY_REG(RSI); + COPY_REG(RBP); + COPY_REG(RBX); + COPY_REG(RDX); + COPY_REG(RAX); + COPY_REG(RCX); + COPY_REG(RSP); +#undef COPY_REG - // siginfo_t - siginfo_t *HostSigInfo = reinterpret_cast(info); + auto* fpstate = &xstate->fpstate; - // Backup where we think the RIP currently is - ContextBackup->OriginalRIP = CTX->RestoreRIPFromHostPC(Thread, ArchHelpers::Context::GetPc(ucontext)); - // Calculate eflags upfront. - uint32_t eflags = CTX->ReconstructCompactedEFLAGS(Thread, WasInJIT, ArchHelpers::Context::GetArmGPRs(ucontext), ArchHelpers::Context::GetArmPState(ucontext)); + // Copy float registers + memcpy(fpstate->_st, Frame->State.mm, sizeof(Frame->State.mm)); - if (Is64BitMode) { - NewGuestSP = SetupFrame_x64(Thread, ContextBackup, Frame, Signal, HostSigInfo, ucontext, GuestAction, GuestStack, NewGuestSP, eflags); + if (IsAVXEnabled) { + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { + memcpy(&fpstate->_xmm[i], &Frame->State.xmm.avx.data[i][0], sizeof(__uint128_t)); } - else { - const bool SigInfoFrame = (GuestAction->sa_flags & SA_SIGINFO) == SA_SIGINFO; - if (SigInfoFrame) { - NewGuestSP = SetupRTFrame_ia32(ContextBackup, Frame, Signal, HostSigInfo, ucontext, GuestAction, GuestStack, NewGuestSP, eflags); - } - else { - NewGuestSP = SetupFrame_ia32(ContextBackup, Frame, Signal, HostSigInfo, ucontext, GuestAction, GuestStack, NewGuestSP, eflags); - } + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { + memcpy(&xstate->ymmh.ymmh_space[i], &Frame->State.xmm.avx.data[i][2], sizeof(__uint128_t)); } + } else { + memcpy(fpstate->_xmm, Frame->State.xmm.sse.data, sizeof(Frame->State.xmm.sse.data)); + } - Frame->State.rip = reinterpret_cast(GuestAction->sigaction_handler.sigaction); - Frame->State.gregs[FEXCore::X86State::REG_RSP] = NewGuestSP; + // FCW store default + fpstate->fcw = Frame->State.FCW; + fpstate->ftw = Frame->State.AbridgedFTW; + + // Reconstruct FSW + fpstate->fsw = (Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] << 11) | + (Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] << 8) | (Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] << 9) | + (Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] << 10) | (Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] << 14); + + // Copy over signal stack information + guest_uctx->uc_stack.ss_flags = GuestStack->ss_flags; + guest_uctx->uc_stack.ss_sp = GuestStack->ss_sp; + guest_uctx->uc_stack.ss_size = GuestStack->ss_size; + + // Apparently RAX is always set to zero in case of badly misbehaving C applications and variadics. + Frame->State.gregs[FEXCore::X86State::REG_RAX] = 0; + Frame->State.gregs[FEXCore::X86State::REG_RDI] = Signal; + Frame->State.gregs[FEXCore::X86State::REG_RSI] = SigInfoLocation; + Frame->State.gregs[FEXCore::X86State::REG_RDX] = UContextLocation; + + // Set up the new SP for stack handling + // The host is required to provide us a restorer. + // If the guest didn't provide a restorer then the application should fail with a SIGSEGV. + // TODO: Emulate SIGSEGV when the guest doesn't provide a restorer. + NewGuestSP -= 8; + if (GuestAction->restorer) { + *(uint64_t*)NewGuestSP = (uint64_t)GuestAction->restorer; + } else { + // XXX: Emulate SIGSEGV here + // *(uint64_t*)NewGuestSP = SignalReturn; + } - // The guest starts its signal frame with a zero initialized FPU - // Set that up now. Little bit costly but it's a requirement - // This state will be restored on rt_sigreturn - memset(Frame->State.xmm.avx.data, 0, sizeof(Frame->State.xmm)); - memset(Frame->State.mm, 0, sizeof(Frame->State.mm)); - Frame->State.FCW = 0x37F; - Frame->State.AbridgedFTW = 0; + return NewGuestSP; +} - // Set the new PC - ArchHelpers::Context::SetPc(ucontext, Config.AbsoluteLoopTopAddressFillSRA); - // Set our state register to point to our guest thread data - ArchHelpers::Context::SetState(ucontext, reinterpret_cast(Frame)); +uint64_t SignalDelegator::SetupFrame_ia32(ArchHelpers::Context::ContextBackup* ContextBackup, FEXCore::Core::CpuStateFrame* Frame, + int Signal, siginfo_t* HostSigInfo, void* ucontext, GuestSigAction* GuestAction, + stack_t* GuestStack, uint64_t NewGuestSP, const uint32_t eflags) { - return true; + const bool IsAVXEnabled = Config.SupportsAVX; + const uint64_t SignalReturn = reinterpret_cast(VDSOPointers.VDSO_kernel_sigreturn); + + NewGuestSP -= sizeof(uint64_t); + NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(uint64_t)); + + uint64_t HostStackLocation = NewGuestSP; + + if (IsAVXEnabled) { + NewGuestSP -= sizeof(FEXCore::x86::xstate); + NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86::xstate)); + } else { + NewGuestSP -= sizeof(FEXCore::x86::_libc_fpstate); + NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86::_libc_fpstate)); } - bool SignalDelegator::HandleSIGILL(FEXCore::Core::InternalThreadState *Thread, int Signal, void *info, void *ucontext) { - if (ArchHelpers::Context::GetPc(ucontext) == Config.SignalHandlerReturnAddress || - ArchHelpers::Context::GetPc(ucontext) == Config.SignalHandlerReturnAddressRT) { - RestoreThreadState(Thread, ucontext, - ArchHelpers::Context::GetPc(ucontext) == Config.SignalHandlerReturnAddressRT ? RestoreType::TYPE_REALTIME : RestoreType::TYPE_NONREALTIME); + uint64_t FPStateLocation = NewGuestSP; + + NewGuestSP -= sizeof(SigFrame_i32); + NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(SigFrame_i32)); + uint64_t SigFrameLocation = NewGuestSP; + + ContextBackup->FPStateLocation = FPStateLocation; + ContextBackup->UContextLocation = SigFrameLocation; + ContextBackup->SigInfoLocation = 0; + + SigFrame_i32* guest_uctx = reinterpret_cast(SigFrameLocation); + // Store where the host context lives in the guest stack. + *(uint64_t*)HostStackLocation = (uint64_t)ContextBackup; + + // Pointer to where the fpreg memory is + guest_uctx->sc.fpstate = static_cast(FPStateLocation); + auto* xstate = reinterpret_cast(FPStateLocation); + SetXStateInfo(xstate, IsAVXEnabled); + + guest_uctx->sc.cs = Frame->State.cs_idx; + guest_uctx->sc.ds = Frame->State.ds_idx; + guest_uctx->sc.es = Frame->State.es_idx; + guest_uctx->sc.fs = Frame->State.fs_idx; + guest_uctx->sc.gs = Frame->State.gs_idx; + guest_uctx->sc.ss = Frame->State.ss_idx; + + if (ContextBackup->FaultToTopAndGeneratedException) { + guest_uctx->sc.trapno = Frame->SynchronousFaultData.TrapNo; + guest_uctx->sc.err = Frame->SynchronousFaultData.err_code; + Signal = Frame->SynchronousFaultData.Signal; + } else { + guest_uctx->sc.trapno = ConvertSignalToTrapNo(Signal, HostSigInfo); + guest_uctx->sc.err = ConvertSignalToError(ucontext, Signal, HostSigInfo); + } - // Ref count our faults - // We use this to track if it is safe to clear cache - --Thread->CurrentFrame->SignalHandlerRefCounter; + guest_uctx->sc.ip = ContextBackup->OriginalRIP; + guest_uctx->sc.flags = eflags; + guest_uctx->sc.sp_at_signal = 0; + +#define COPY_REG(x, y) guest_uctx->sc.x = Frame->State.gregs[FEXCore::X86State::REG_##y]; + COPY_REG(di, RDI); + COPY_REG(si, RSI); + COPY_REG(bp, RBP); + COPY_REG(bx, RBX); + COPY_REG(dx, RDX); + COPY_REG(ax, RAX); + COPY_REG(cx, RCX); + COPY_REG(sp, RSP); +#undef COPY_REG - if (Thread->DeferredSignalFrames.size() != 0) { - // If we have more deferred frames to process then mprotect back to PROT_NONE. - // It will have been RW coming in to this sigreturn and now we need to remove permissions - // to ensure FEX trampolines back to the SIGSEGV deferred handler. - mprotect(reinterpret_cast(Thread->CurrentFrame->State.DeferredSignalFaultAddress), 4096, PROT_NONE); - } - return true; - } + auto* fpstate = &xstate->fpstate; - if (ArchHelpers::Context::GetPc(ucontext) == Config.PauseReturnInstruction) { - RestoreThreadState(Thread, ucontext, RestoreType::TYPE_PAUSE); + // Copy float registers + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) { + // 32-bit st register size is only 10 bytes. Not padded to 16byte like x86-64 + memcpy(&fpstate->_st[i], &Frame->State.mm[i], 10); + } - // Ref count our faults - // We use this to track if it is safe to clear cache - --Thread->CurrentFrame->SignalHandlerRefCounter; - return true; + // Extended XMM state + fpstate->status = FEXCore::x86::fpstate_magic::MAGIC_XFPSTATE; + if (IsAVXEnabled) { + for (size_t i = 0; i < std::size(Frame->State.xmm.avx.data); i++) { + memcpy(&fpstate->_xmm[i], &Frame->State.xmm.avx.data[i][0], sizeof(__uint128_t)); } - - return false; + for (size_t i = 0; i < std::size(Frame->State.xmm.avx.data); i++) { + memcpy(&xstate->ymmh.ymmh_space[i], &Frame->State.xmm.avx.data[i][2], sizeof(__uint128_t)); + } + } else { + memcpy(fpstate->_xmm, Frame->State.xmm.sse.data, sizeof(Frame->State.xmm.sse.data)); } - bool SignalDelegator::HandleSignalPause(FEXCore::Core::InternalThreadState *Thread, int Signal, void *info, void *ucontext) { - FEXCore::Core::SignalEvent SignalReason = Thread->SignalReason.load(); - auto Frame = Thread->CurrentFrame; + // FCW store default + fpstate->fcw = Frame->State.FCW; + // Reconstruct FSW + fpstate->fsw = (Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] << 11) | + (Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] << 8) | (Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] << 9) | + (Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] << 10) | (Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] << 14); + fpstate->ftw = FEXCore::FPState::ConvertFromAbridgedFTW(fpstate->fsw, Frame->State.mm, Frame->State.AbridgedFTW); + + // Curiously non-rt signals don't support altstack. So that state doesn't exist here. + + // Copy over the signal information. + guest_uctx->Signal = Signal; + + // Retcode needs to be bit-exact for debuggers + constexpr static uint8_t retcode[] = { + 0x58, // pop eax + 0xb8, // mov + 0x77, 0x00, 0x00, 0x00, // 32-bit sigreturn + 0xcd, 0x80, // int 0x80 + }; - if (SignalReason == FEXCore::Core::SignalEvent::Pause) { - // Store our thread state so we can come back to this - StoreThreadState(Thread, Signal, ucontext); + memcpy(guest_uctx->retcode, &retcode, sizeof(retcode)); + + // 32-bit Guest can provide its own restorer or we need to provide our own. + // On a real host this restorer will live in VDSO. + constexpr uint32_t SA_RESTORER = 0x04000000; + const bool HasRestorer = (GuestAction->sa_flags & SA_RESTORER) == SA_RESTORER; + if (HasRestorer) { + guest_uctx->pretcode = (uint32_t)(uint64_t)GuestAction->restorer; + } else { + guest_uctx->pretcode = SignalReturn; + LOGMAN_THROW_AA_FMT(SignalReturn < 0x1'0000'0000ULL, "This needs to be below 4GB"); + } - if (Thread->CPUBackend->IsAddressInCodeBuffer(ArchHelpers::Context::GetPc(ucontext))) { - // We are in jit, SRA must be spilled - ArchHelpers::Context::SetPc(ucontext, Config.ThreadPauseHandlerAddressSpillSRA); - } else { - // We are in non-jit, SRA is already spilled - LOGMAN_THROW_A_FMT(!IsAddressInDispatcher(ArchHelpers::Context::GetPc(ucontext)), - "Signals in dispatcher have unsynchronized context"); - ArchHelpers::Context::SetPc(ucontext, Config.ThreadPauseHandlerAddress); - } + // Support regparm=3 + Frame->State.gregs[FEXCore::X86State::REG_RAX] = Signal; + Frame->State.gregs[FEXCore::X86State::REG_RDX] = 0; + Frame->State.gregs[FEXCore::X86State::REG_RCX] = 0; - // Set our state register to point to our guest thread data - ArchHelpers::Context::SetState(ucontext, reinterpret_cast(Frame)); + return NewGuestSP; +} - // Ref count our faults - // We use this to track if it is safe to clear cache - ++Thread->CurrentFrame->SignalHandlerRefCounter; +uint64_t SignalDelegator::SetupRTFrame_ia32(ArchHelpers::Context::ContextBackup* ContextBackup, FEXCore::Core::CpuStateFrame* Frame, + int Signal, siginfo_t* HostSigInfo, void* ucontext, GuestSigAction* GuestAction, + stack_t* GuestStack, uint64_t NewGuestSP, const uint32_t eflags) { - Thread->SignalReason.store(FEXCore::Core::SignalEvent::Nothing); - return true; - } + const bool IsAVXEnabled = Config.SupportsAVX; + const uint64_t SignalReturn = reinterpret_cast(VDSOPointers.VDSO_kernel_rt_sigreturn); - if (SignalReason == FEXCore::Core::SignalEvent::Stop) { - // Our thread is stopping - // We don't care about anything at this point - // Set the stack to our starting location when we entered the core and get out safely - ArchHelpers::Context::SetSp(ucontext, Frame->ReturningStackLocation); + NewGuestSP -= sizeof(uint64_t); + NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(uint64_t)); - // Our ref counting doesn't matter anymore - Thread->CurrentFrame->SignalHandlerRefCounter = 0; + uint64_t HostStackLocation = NewGuestSP; - // Set the new PC - if (Thread->CPUBackend->IsAddressInCodeBuffer(ArchHelpers::Context::GetPc(ucontext))) { - // We are in jit, SRA must be spilled - ArchHelpers::Context::SetPc(ucontext, Config.ThreadStopHandlerAddressSpillSRA); - } else { - // We are in non-jit, SRA is already spilled - LOGMAN_THROW_A_FMT(!IsAddressInDispatcher(ArchHelpers::Context::GetPc(ucontext)), - "Signals in dispatcher have unsynchronized context"); - ArchHelpers::Context::SetPc(ucontext, Config.ThreadStopHandlerAddress); - } + if (IsAVXEnabled) { + NewGuestSP -= sizeof(FEXCore::x86::xstate); + NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86::xstate)); + } else { + NewGuestSP -= sizeof(FEXCore::x86::_libc_fpstate); + NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86::_libc_fpstate)); + } - // We need to be a little bit careful here - // If we were already paused (due to GDB) and we are immediately stopping (due to gdb kill) - // Then we need to ensure we don't double decrement our idle thread counter - if (Thread->RunningEvents.ThreadSleeping) { - // If the thread was sleeping then its idle counter was decremented - // Reincrement it here to not break logic - FEX::HLE::_SyscallHandler->TM.IncrementIdleRefCount(); + uint64_t FPStateLocation = NewGuestSP; + + NewGuestSP -= sizeof(RTSigFrame_i32); + NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(RTSigFrame_i32)); + + uint64_t SigFrameLocation = NewGuestSP; + RTSigFrame_i32* guest_uctx = reinterpret_cast(SigFrameLocation); + // Store where the host context lives in the guest stack. + *(uint64_t*)HostStackLocation = (uint64_t)ContextBackup; + + ContextBackup->FPStateLocation = FPStateLocation; + ContextBackup->UContextLocation = SigFrameLocation; + ContextBackup->SigInfoLocation = 0; // Part of frame. + + // We have extended float information + guest_uctx->uc.uc_flags = FEXCore::x86::UC_FP_XSTATE; + guest_uctx->uc.uc_link = 0; + + // Pointer to where the fpreg memory is + guest_uctx->uc.uc_mcontext.fpregs = static_cast(FPStateLocation); + auto* xstate = reinterpret_cast(FPStateLocation); + SetXStateInfo(xstate, IsAVXEnabled); + + guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_CS] = Frame->State.cs_idx; + guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_DS] = Frame->State.ds_idx; + guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_ES] = Frame->State.es_idx; + guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_FS] = Frame->State.fs_idx; + guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_GS] = Frame->State.gs_idx; + guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_SS] = Frame->State.ss_idx; + + if (ContextBackup->FaultToTopAndGeneratedException) { + guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_TRAPNO] = Frame->SynchronousFaultData.TrapNo; + guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_ERR] = Frame->SynchronousFaultData.err_code; + Signal = Frame->SynchronousFaultData.Signal; + } else { + guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_TRAPNO] = ConvertSignalToTrapNo(Signal, HostSigInfo); + guest_uctx->info.si_code = HostSigInfo->si_code; + guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_ERR] = ConvertSignalToError(ucontext, Signal, HostSigInfo); + } + + guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_EIP] = ContextBackup->OriginalRIP; + guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_EFL] = eflags; + guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_UESP] = Frame->State.gregs[FEXCore::X86State::REG_RSP]; + guest_uctx->uc.uc_mcontext.cr2 = 0; + +#define COPY_REG(x) guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_##x] = Frame->State.gregs[FEXCore::X86State::REG_##x]; + COPY_REG(RDI); + COPY_REG(RSI); + COPY_REG(RBP); + COPY_REG(RBX); + COPY_REG(RDX); + COPY_REG(RAX); + COPY_REG(RCX); + COPY_REG(RSP); +#undef COPY_REG + + auto* fpstate = &xstate->fpstate; + + // Copy float registers + for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) { + // 32-bit st register size is only 10 bytes. Not padded to 16byte like x86-64 + memcpy(&fpstate->_st[i], &Frame->State.mm[i], 10); + } + + // Extended XMM state + fpstate->status = FEXCore::x86::fpstate_magic::MAGIC_XFPSTATE; + if (IsAVXEnabled) { + for (size_t i = 0; i < std::size(Frame->State.xmm.avx.data); i++) { + memcpy(&fpstate->_xmm[i], &Frame->State.xmm.avx.data[i][0], sizeof(__uint128_t)); + } + for (size_t i = 0; i < std::size(Frame->State.xmm.avx.data); i++) { + memcpy(&xstate->ymmh.ymmh_space[i], &Frame->State.xmm.avx.data[i][2], sizeof(__uint128_t)); + } + } else { + memcpy(fpstate->_xmm, Frame->State.xmm.sse.data, sizeof(Frame->State.xmm.sse.data)); + } + + // FCW store default + fpstate->fcw = Frame->State.FCW; + // Reconstruct FSW + fpstate->fsw = (Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] << 11) | + (Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] << 8) | (Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] << 9) | + (Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] << 10) | (Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] << 14); + fpstate->ftw = FEXCore::FPState::ConvertFromAbridgedFTW(fpstate->fsw, Frame->State.mm, Frame->State.AbridgedFTW); + + // Copy over signal stack information + guest_uctx->uc.uc_stack.ss_flags = GuestStack->ss_flags; + guest_uctx->uc.uc_stack.ss_sp = static_cast(reinterpret_cast(GuestStack->ss_sp)); + guest_uctx->uc.uc_stack.ss_size = GuestStack->ss_size; + + // Setup siginfo + if (ContextBackup->FaultToTopAndGeneratedException) { + guest_uctx->info.si_code = Frame->SynchronousFaultData.si_code; + } else { + guest_uctx->info.si_code = HostSigInfo->si_code; + } + + // These three elements are in every siginfo + guest_uctx->info.si_signo = HostSigInfo->si_signo; + guest_uctx->info.si_errno = HostSigInfo->si_errno; + + const SigInfoLayout Layout = CalculateSigInfoLayout(Signal, guest_uctx->info.si_code); + + switch (Layout) { + case SigInfoLayout::LAYOUT_KILL: + guest_uctx->info._sifields._kill.pid = HostSigInfo->si_pid; + guest_uctx->info._sifields._kill.uid = HostSigInfo->si_uid; + break; + case SigInfoLayout::LAYOUT_TIMER: + guest_uctx->info._sifields._timer.tid = HostSigInfo->si_timerid; + guest_uctx->info._sifields._timer.overrun = HostSigInfo->si_overrun; + guest_uctx->info._sifields._timer.sigval.sival_int = HostSigInfo->si_int; + break; + case SigInfoLayout::LAYOUT_POLL: + guest_uctx->info._sifields._poll.band = HostSigInfo->si_band; + guest_uctx->info._sifields._poll.fd = HostSigInfo->si_fd; + break; + case SigInfoLayout::LAYOUT_FAULT: + // Macro expansion to get the si_addr + // This is the address trying to be accessed, not the RIP + guest_uctx->info._sifields._sigfault.addr = static_cast(reinterpret_cast(HostSigInfo->si_addr)); + break; + case SigInfoLayout::LAYOUT_FAULT_RIP: + // Macro expansion to get the si_addr + // Can't really give a real result here. Pull from the context for now + guest_uctx->info._sifields._sigfault.addr = ContextBackup->OriginalRIP; + break; + case SigInfoLayout::LAYOUT_CHLD: + guest_uctx->info._sifields._sigchld.pid = HostSigInfo->si_pid; + guest_uctx->info._sifields._sigchld.uid = HostSigInfo->si_uid; + guest_uctx->info._sifields._sigchld.status = HostSigInfo->si_status; + guest_uctx->info._sifields._sigchld.utime = HostSigInfo->si_utime; + guest_uctx->info._sifields._sigchld.stime = HostSigInfo->si_stime; + break; + case SigInfoLayout::LAYOUT_RT: + guest_uctx->info._sifields._rt.pid = HostSigInfo->si_pid; + guest_uctx->info._sifields._rt.uid = HostSigInfo->si_uid; + guest_uctx->info._sifields._rt.sigval.sival_int = HostSigInfo->si_int; + break; + case SigInfoLayout::LAYOUT_SYS: + guest_uctx->info._sifields._sigsys.call_addr = static_cast(reinterpret_cast(HostSigInfo->si_call_addr)); + guest_uctx->info._sifields._sigsys.syscall = HostSigInfo->si_syscall; + // We need to lie about the architecture here. + // Otherwise we would expose incorrect information to the guest. + constexpr uint32_t AUDIT_LE = 0x4000'0000U; + constexpr uint32_t MACHINE_I386 = 3; // This matches the ELF definition. + guest_uctx->info._sifields._sigsys.arch = AUDIT_LE | MACHINE_I386; + break; + } + + // Setup the guest stack context. + guest_uctx->Signal = Signal; + guest_uctx->pinfo = (uint32_t)(uint64_t)&guest_uctx->info; + guest_uctx->puc = (uint32_t)(uint64_t)&guest_uctx->uc; + + // Retcode needs to be bit-exact for debuggers + constexpr static uint8_t rt_retcode[] = { + 0xb8, // mov + 0xad, 0x00, 0x00, 0x00, // 32-bit rt_sigreturn + 0xcd, 0x80, // int 0x80 + 0x0, // Pad + }; + + memcpy(guest_uctx->retcode, &rt_retcode, sizeof(rt_retcode)); + + // 32-bit Guest can provide its own restorer or we need to provide our own. + // On a real host this restorer will live in VDSO. + constexpr uint32_t SA_RESTORER = 0x04000000; + const bool HasRestorer = (GuestAction->sa_flags & SA_RESTORER) == SA_RESTORER; + if (HasRestorer) { + guest_uctx->pretcode = (uint32_t)(uint64_t)GuestAction->restorer; + } else { + guest_uctx->pretcode = SignalReturn; + LOGMAN_THROW_AA_FMT(SignalReturn < 0x1'0000'0000ULL, "This needs to be below 4GB"); + } + + // Support regparm=3 + Frame->State.gregs[FEXCore::X86State::REG_RAX] = Signal; + Frame->State.gregs[FEXCore::X86State::REG_RDX] = guest_uctx->pinfo; + Frame->State.gregs[FEXCore::X86State::REG_RCX] = guest_uctx->puc; + + return NewGuestSP; +} + +bool SignalDelegator::HandleDispatcherGuestSignal(FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext, + GuestSigAction* GuestAction, stack_t* GuestStack) { + auto ContextBackup = StoreThreadState(Thread, Signal, ucontext); + + auto Frame = Thread->CurrentFrame; + + // Ref count our faults + // We use this to track if it is safe to clear cache + ++Thread->CurrentFrame->SignalHandlerRefCounter; + + uint64_t OldPC = ArchHelpers::Context::GetPc(ucontext); + const bool WasInJIT = Thread->CPUBackend->IsAddressInCodeBuffer(OldPC); + + // Spill the SRA regardless of signal handler type + // We are going to be returning to the top of the dispatcher which will fill again + // Otherwise we might load garbage + if (WasInJIT) { + uint32_t IgnoreMask {}; +#ifdef _M_ARM_64 + if (Frame->InSyscallInfo != 0) { + // We are in a syscall, this means we are in a weird register state + // We need to spill SRA but only some of it, since some values have already been spilled + // Lower 16 bits tells us which registers are already spilled to the context + // So we ignore spilling those ones + IgnoreMask = Frame->InSyscallInfo & 0xFFFF; + } else { + // We must spill everything + IgnoreMask = 0; + } +#endif + + // We are in jit, SRA must be spilled + SpillSRA(Thread, ucontext, IgnoreMask); + + ContextBackup->Flags |= ArchHelpers::Context::ContextFlags::CONTEXT_FLAG_INJIT; + + // We are leaving the syscall information behind. Make sure to store the previous state. + ContextBackup->InSyscallInfo = Thread->CurrentFrame->InSyscallInfo; + Thread->CurrentFrame->InSyscallInfo = 0; + } else { + if (!IsAddressInDispatcher(OldPC)) { + // This is likely to cause issues but in some cases it isn't fatal + // This can also happen if we have put a signal on hold, then we just reenabled the signal + // So we are in the syscall handler + // Only throw a log message in this case + if constexpr (false) { + // XXX: Messages in the signal handler can cause us to crash + LogMan::Msg::EFmt("Signals in dispatcher have unsynchronized context"); } + } + } - Thread->SignalReason.store(FEXCore::Core::SignalEvent::Nothing); - return true; + uint64_t OldGuestSP = Frame->State.gregs[FEXCore::X86State::REG_RSP]; + uint64_t NewGuestSP = OldGuestSP; + + // altstack is only used if the signal handler was setup with SA_ONSTACK + if (GuestAction->sa_flags & SA_ONSTACK) { + // Additionally the altstack is only used if the enabled (SS_DISABLE flag is not set) + if (!(GuestStack->ss_flags & SS_DISABLE)) { + // If our guest is already inside of the alternative stack + // Then that means we are hitting recursive signals and we need to walk back the stack correctly + uint64_t AltStackBase = reinterpret_cast(GuestStack->ss_sp); + uint64_t AltStackEnd = AltStackBase + GuestStack->ss_size; + if (OldGuestSP >= AltStackBase && OldGuestSP <= AltStackEnd) { + // We are already in the alt stack, the rest of the code will handle adjusting this + } else { + NewGuestSP = AltStackEnd; + } } + } - if (SignalReason == FEXCore::Core::SignalEvent::Return || - SignalReason == FEXCore::Core::SignalEvent::ReturnRT) { - RestoreThreadState(Thread, ucontext, SignalReason == FEXCore::Core::SignalEvent::ReturnRT ? RestoreType::TYPE_REALTIME : RestoreType::TYPE_NONREALTIME); + // siginfo_t + siginfo_t* HostSigInfo = reinterpret_cast(info); + + // Backup where we think the RIP currently is + ContextBackup->OriginalRIP = CTX->RestoreRIPFromHostPC(Thread, ArchHelpers::Context::GetPc(ucontext)); + // Calculate eflags upfront. + uint32_t eflags = CTX->ReconstructCompactedEFLAGS(Thread, WasInJIT, ArchHelpers::Context::GetArmGPRs(ucontext), + ArchHelpers::Context::GetArmPState(ucontext)); + + if (Is64BitMode) { + NewGuestSP = SetupFrame_x64(Thread, ContextBackup, Frame, Signal, HostSigInfo, ucontext, GuestAction, GuestStack, NewGuestSP, eflags); + } else { + const bool SigInfoFrame = (GuestAction->sa_flags & SA_SIGINFO) == SA_SIGINFO; + if (SigInfoFrame) { + NewGuestSP = SetupRTFrame_ia32(ContextBackup, Frame, Signal, HostSigInfo, ucontext, GuestAction, GuestStack, NewGuestSP, eflags); + } else { + NewGuestSP = SetupFrame_ia32(ContextBackup, Frame, Signal, HostSigInfo, ucontext, GuestAction, GuestStack, NewGuestSP, eflags); + } + } - // Ref count our faults - // We use this to track if it is safe to clear cache - --Thread->CurrentFrame->SignalHandlerRefCounter; + Frame->State.rip = reinterpret_cast(GuestAction->sigaction_handler.sigaction); + Frame->State.gregs[FEXCore::X86State::REG_RSP] = NewGuestSP; - Thread->SignalReason.store(FEXCore::Core::SignalEvent::Nothing); - return true; + // The guest starts its signal frame with a zero initialized FPU + // Set that up now. Little bit costly but it's a requirement + // This state will be restored on rt_sigreturn + memset(Frame->State.xmm.avx.data, 0, sizeof(Frame->State.xmm)); + memset(Frame->State.mm, 0, sizeof(Frame->State.mm)); + Frame->State.FCW = 0x37F; + Frame->State.AbridgedFTW = 0; + + // Set the new PC + ArchHelpers::Context::SetPc(ucontext, Config.AbsoluteLoopTopAddressFillSRA); + // Set our state register to point to our guest thread data + ArchHelpers::Context::SetState(ucontext, reinterpret_cast(Frame)); + + return true; +} + +bool SignalDelegator::HandleSIGILL(FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) { + if (ArchHelpers::Context::GetPc(ucontext) == Config.SignalHandlerReturnAddress || + ArchHelpers::Context::GetPc(ucontext) == Config.SignalHandlerReturnAddressRT) { + RestoreThreadState(Thread, ucontext, + ArchHelpers::Context::GetPc(ucontext) == Config.SignalHandlerReturnAddressRT ? RestoreType::TYPE_REALTIME : + RestoreType::TYPE_NONREALTIME); + + // Ref count our faults + // We use this to track if it is safe to clear cache + --Thread->CurrentFrame->SignalHandlerRefCounter; + + if (Thread->DeferredSignalFrames.size() != 0) { + // If we have more deferred frames to process then mprotect back to PROT_NONE. + // It will have been RW coming in to this sigreturn and now we need to remove permissions + // to ensure FEX trampolines back to the SIGSEGV deferred handler. + mprotect(reinterpret_cast(Thread->CurrentFrame->State.DeferredSignalFaultAddress), 4096, PROT_NONE); } - return false; + return true; } - void SignalDelegator::SignalThread(FEXCore::Core::InternalThreadState *Thread, FEXCore::Core::SignalEvent Event) { - if (Event == FEXCore::Core::SignalEvent::Pause && - Thread->RunningEvents.Running.load() == false) { - // Skip signaling a thread if it is already paused. - return; + if (ArchHelpers::Context::GetPc(ucontext) == Config.PauseReturnInstruction) { + RestoreThreadState(Thread, ucontext, RestoreType::TYPE_PAUSE); + + // Ref count our faults + // We use this to track if it is safe to clear cache + --Thread->CurrentFrame->SignalHandlerRefCounter; + return true; + } + + return false; +} + +bool SignalDelegator::HandleSignalPause(FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) { + FEXCore::Core::SignalEvent SignalReason = Thread->SignalReason.load(); + auto Frame = Thread->CurrentFrame; + + if (SignalReason == FEXCore::Core::SignalEvent::Pause) { + // Store our thread state so we can come back to this + StoreThreadState(Thread, Signal, ucontext); + + if (Thread->CPUBackend->IsAddressInCodeBuffer(ArchHelpers::Context::GetPc(ucontext))) { + // We are in jit, SRA must be spilled + ArchHelpers::Context::SetPc(ucontext, Config.ThreadPauseHandlerAddressSpillSRA); + } else { + // We are in non-jit, SRA is already spilled + LOGMAN_THROW_A_FMT(!IsAddressInDispatcher(ArchHelpers::Context::GetPc(ucontext)), "Signals in dispatcher have unsynchronized " + "context"); + ArchHelpers::Context::SetPc(ucontext, Config.ThreadPauseHandlerAddress); } - Thread->SignalReason.store(Event); - FHU::Syscalls::tgkill(Thread->ThreadManager.PID, Thread->ThreadManager.TID, SignalDelegator::SIGNAL_FOR_PAUSE); + + // Set our state register to point to our guest thread data + ArchHelpers::Context::SetState(ucontext, reinterpret_cast(Frame)); + + // Ref count our faults + // We use this to track if it is safe to clear cache + ++Thread->CurrentFrame->SignalHandlerRefCounter; + + Thread->SignalReason.store(FEXCore::Core::SignalEvent::Nothing); + return true; } - /** @} */ + if (SignalReason == FEXCore::Core::SignalEvent::Stop) { + // Our thread is stopping + // We don't care about anything at this point + // Set the stack to our starting location when we entered the core and get out safely + ArchHelpers::Context::SetSp(ucontext, Frame->ReturningStackLocation); - static bool IsAsyncSignal(const siginfo_t* Info, int Signal) { - if (Info->si_code <= SI_USER) { - // If the signal is not from the kernel then it is always async. - // This is because synchronous signals can be sent through tgkill,sigqueue and other methods. - // SI_USER == 0 and all negative si_code values come from the user. - return true; + // Our ref counting doesn't matter anymore + Thread->CurrentFrame->SignalHandlerRefCounter = 0; + + // Set the new PC + if (Thread->CPUBackend->IsAddressInCodeBuffer(ArchHelpers::Context::GetPc(ucontext))) { + // We are in jit, SRA must be spilled + ArchHelpers::Context::SetPc(ucontext, Config.ThreadStopHandlerAddressSpillSRA); + } else { + // We are in non-jit, SRA is already spilled + LOGMAN_THROW_A_FMT(!IsAddressInDispatcher(ArchHelpers::Context::GetPc(ucontext)), "Signals in dispatcher have unsynchronized " + "context"); + ArchHelpers::Context::SetPc(ucontext, Config.ThreadStopHandlerAddress); } - else { - // If the signal is from the kernel then it is async only if it isn't an explicit synchronous signal. - switch (Signal) { - // These are all synchronous signals. - case SIGBUS: - case SIGFPE: - case SIGILL: - case SIGSEGV: - case SIGTRAP: - return false; - default: break; - } + + // We need to be a little bit careful here + // If we were already paused (due to GDB) and we are immediately stopping (due to gdb kill) + // Then we need to ensure we don't double decrement our idle thread counter + if (Thread->RunningEvents.ThreadSleeping) { + // If the thread was sleeping then its idle counter was decremented + // Reincrement it here to not break logic + FEX::HLE::_SyscallHandler->TM.IncrementIdleRefCount(); } - // Everything else is async and can be deferred. + Thread->SignalReason.store(FEXCore::Core::SignalEvent::Nothing); + return true; + } + + if (SignalReason == FEXCore::Core::SignalEvent::Return || SignalReason == FEXCore::Core::SignalEvent::ReturnRT) { + RestoreThreadState(Thread, ucontext, + SignalReason == FEXCore::Core::SignalEvent::ReturnRT ? RestoreType::TYPE_REALTIME : RestoreType::TYPE_NONREALTIME); + + // Ref count our faults + // We use this to track if it is safe to clear cache + --Thread->CurrentFrame->SignalHandlerRefCounter; + + Thread->SignalReason.store(FEXCore::Core::SignalEvent::Nothing); + return true; + } + return false; +} + +void SignalDelegator::SignalThread(FEXCore::Core::InternalThreadState* Thread, FEXCore::Core::SignalEvent Event) { + if (Event == FEXCore::Core::SignalEvent::Pause && Thread->RunningEvents.Running.load() == false) { + // Skip signaling a thread if it is already paused. + return; + } + Thread->SignalReason.store(Event); + FHU::Syscalls::tgkill(Thread->ThreadManager.PID, Thread->ThreadManager.TID, SignalDelegator::SIGNAL_FOR_PAUSE); +} + +/** @} */ + +static bool IsAsyncSignal(const siginfo_t* Info, int Signal) { + if (Info->si_code <= SI_USER) { + // If the signal is not from the kernel then it is always async. + // This is because synchronous signals can be sent through tgkill,sigqueue and other methods. + // SI_USER == 0 and all negative si_code values come from the user. return true; + } else { + // If the signal is from the kernel then it is async only if it isn't an explicit synchronous signal. + switch (Signal) { + // These are all synchronous signals. + case SIGBUS: + case SIGFPE: + case SIGILL: + case SIGSEGV: + case SIGTRAP: return false; + default: break; + } } - void SignalDelegator::HandleGuestSignal(FEXCore::Core::InternalThreadState *Thread, int Signal, void *Info, void *UContext) { - ucontext_t* _context = (ucontext_t*)UContext; - auto SigInfo = *static_cast(Info); - - constexpr bool SupportDeferredSignals = true; - if (SupportDeferredSignals) { - auto MustDeferSignal = (Thread->CurrentFrame->State.DeferredSignalRefCount.Load() != 0); - - if (Signal == SIGSEGV && - SigInfo.si_code == SEGV_ACCERR && - SigInfo.si_addr == reinterpret_cast(Thread->CurrentFrame->State.DeferredSignalFaultAddress)) { - if (!MustDeferSignal) { - // We just reached the end of the outermost signal-deferring section and faulted to check for pending signals. - // Pull a signal frame off the stack. - - mprotect(reinterpret_cast(Thread->CurrentFrame->State.DeferredSignalFaultAddress), 4096, PROT_READ | PROT_WRITE); - - if (Thread->DeferredSignalFrames.empty()) { - // No signals to defer. Just set the fault page back to RW and continue execution. - // This occurs as a minor race condition between the refcount decrement and the access to the fault page. - return; - } - - auto Top = Thread->DeferredSignalFrames.back(); - Signal = Top.Signal; - SigInfo = Top.Info; - Thread->DeferredSignalFrames.pop_back(); - - // Until we re-protect the page to PROT_NONE, FEX will now *permanently* defer signals and /not/ check them. - // - // In order to return /back/ to a sane state, we wait for the rt_sigreturn to happen. - // rt_sigreturn will check if there are any more deferred signals to handle - // - If there are deferred signals - // - mprotect back to PROT_NONE - // - sigreturn will trampoline out to the previous fault address check, SIGSEGV and restart - // - If there are *no* deferred signals - // - No need to mprotect, it is already RW + // Everything else is async and can be deferred. + return true; +} + +void SignalDelegator::HandleGuestSignal(FEXCore::Core::InternalThreadState* Thread, int Signal, void* Info, void* UContext) { + ucontext_t* _context = (ucontext_t*)UContext; + auto SigInfo = *static_cast(Info); + + constexpr bool SupportDeferredSignals = true; + if (SupportDeferredSignals) { + auto MustDeferSignal = (Thread->CurrentFrame->State.DeferredSignalRefCount.Load() != 0); + + if (Signal == SIGSEGV && SigInfo.si_code == SEGV_ACCERR && + SigInfo.si_addr == reinterpret_cast(Thread->CurrentFrame->State.DeferredSignalFaultAddress)) { + if (!MustDeferSignal) { + // We just reached the end of the outermost signal-deferring section and faulted to check for pending signals. + // Pull a signal frame off the stack. + + mprotect(reinterpret_cast(Thread->CurrentFrame->State.DeferredSignalFaultAddress), 4096, PROT_READ | PROT_WRITE); + + if (Thread->DeferredSignalFrames.empty()) { + // No signals to defer. Just set the fault page back to RW and continue execution. + // This occurs as a minor race condition between the refcount decrement and the access to the fault page. + return; } - else { + + auto Top = Thread->DeferredSignalFrames.back(); + Signal = Top.Signal; + SigInfo = Top.Info; + Thread->DeferredSignalFrames.pop_back(); + + // Until we re-protect the page to PROT_NONE, FEX will now *permanently* defer signals and /not/ check them. + // + // In order to return /back/ to a sane state, we wait for the rt_sigreturn to happen. + // rt_sigreturn will check if there are any more deferred signals to handle + // - If there are deferred signals + // - mprotect back to PROT_NONE + // - sigreturn will trampoline out to the previous fault address check, SIGSEGV and restart + // - If there are *no* deferred signals + // - No need to mprotect, it is already RW + } else { #ifdef _M_ARM_64 - // If RefCount != 0 then that means we hit an access with nested signal-deferring sections. - // Increment the PC past the `str zr, [x1]` to continue code execution until we reach the outermost section. - ArchHelpers::Context::SetPc(UContext, ArchHelpers::Context::GetPc(UContext) + 4); - return; + // If RefCount != 0 then that means we hit an access with nested signal-deferring sections. + // Increment the PC past the `str zr, [x1]` to continue code execution until we reach the outermost section. + ArchHelpers::Context::SetPc(UContext, ArchHelpers::Context::GetPc(UContext) + 4); + return; #else - // X86 should always be doing a refcount compare and branch since we can't guarantee instruction size. - // ARM64 just always does the access to reduce branching overhead. - ERROR_AND_DIE_FMT("X86 shouldn't hit this DeferredSignalFaultAddress"); + // X86 should always be doing a refcount compare and branch since we can't guarantee instruction size. + // ARM64 just always does the access to reduce branching overhead. + ERROR_AND_DIE_FMT("X86 shouldn't hit this DeferredSignalFaultAddress"); #endif - } } - else if (Signal == SIGSEGV && - SigInfo.si_code == SEGV_ACCERR && - FaultSafeMemcpy::IsFaultLocation(ArchHelpers::Context::GetPc(UContext))) { - // If you want to emulate EFAULT behaviour then enable this if-statement. - // Do this once we find an application that depends on this. - if constexpr (false) { - // Return from the subroutine, returning EFAULT. - ArchHelpers::Context::SetArmReg(UContext, 0, EFAULT); - ArchHelpers::Context::SetPc(UContext, ArchHelpers::Context::GetArmReg(UContext, 30)); - return; - } - else { - LogMan::Msg::AFmt("Received invalid data to syscall. Crashing now!"); - } - } - else { - if (IsAsyncSignal(&SigInfo, Signal) && MustDeferSignal) { - // If the signal is asynchronous (as determined by si_code) and FEX is in a state of needing - // to defer the signal, then add the signal to the thread's signal queue. - LOGMAN_THROW_A_FMT(Thread->DeferredSignalFrames.size() != Thread->DeferredSignalFrames.capacity(), - "Deferred signals vector hit capacity size. This will likely crash! Asserting now!"); - Thread->DeferredSignalFrames.emplace_back(FEXCore::Core::InternalThreadState::DeferredSignalState { - .Info = SigInfo, - .Signal = Signal, - }); - - // Now update the faulting page permissions so it will fault on write. - mprotect(reinterpret_cast(Thread->CurrentFrame->State.DeferredSignalFaultAddress), 4096, PROT_NONE); - - // Postpone the remainder of signal handling logic until we process the SIGSEGV triggered by writing to DeferredSignalFaultAddress. - return; - } + } else if (Signal == SIGSEGV && SigInfo.si_code == SEGV_ACCERR && FaultSafeMemcpy::IsFaultLocation(ArchHelpers::Context::GetPc(UContext))) { + // If you want to emulate EFAULT behaviour then enable this if-statement. + // Do this once we find an application that depends on this. + if constexpr (false) { + // Return from the subroutine, returning EFAULT. + ArchHelpers::Context::SetArmReg(UContext, 0, EFAULT); + ArchHelpers::Context::SetPc(UContext, ArchHelpers::Context::GetArmReg(UContext, 30)); + return; + } else { + LogMan::Msg::AFmt("Received invalid data to syscall. Crashing now!"); } - } - // Let the host take first stab at handling the signal - SignalHandler &Handler = HostHandlers[Signal]; - - // Remove the pending signal - ThreadData.PendingSignals &= ~(1ULL << (Signal - 1)); - - // We have an emulation thread pointer, we can now modify its state - if (Handler.GuestAction.sigaction_handler.handler == SIG_DFL) { - if (Handler.DefaultBehaviour == DEFAULT_TERM || - Handler.DefaultBehaviour == DEFAULT_COREDUMP) { - // Let the signal fall through to the unhandled path - // This way the parent process can know it died correctly + } else { + if (IsAsyncSignal(&SigInfo, Signal) && MustDeferSignal) { + // If the signal is asynchronous (as determined by si_code) and FEX is in a state of needing + // to defer the signal, then add the signal to the thread's signal queue. + LOGMAN_THROW_A_FMT(Thread->DeferredSignalFrames.size() != Thread->DeferredSignalFrames.capacity(), "Deferred signals vector hit " + "capacity size. This will " + "likely crash! Asserting now!"); + Thread->DeferredSignalFrames.emplace_back(FEXCore::Core::InternalThreadState::DeferredSignalState { + .Info = SigInfo, + .Signal = Signal, + }); + + // Now update the faulting page permissions so it will fault on write. + mprotect(reinterpret_cast(Thread->CurrentFrame->State.DeferredSignalFaultAddress), 4096, PROT_NONE); + + // Postpone the remainder of signal handling logic until we process the SIGSEGV triggered by writing to DeferredSignalFaultAddress. + return; } } - else if (Handler.GuestAction.sigaction_handler.handler == SIG_IGN) { - return; + } + // Let the host take first stab at handling the signal + SignalHandler& Handler = HostHandlers[Signal]; + + // Remove the pending signal + ThreadData.PendingSignals &= ~(1ULL << (Signal - 1)); + + // We have an emulation thread pointer, we can now modify its state + if (Handler.GuestAction.sigaction_handler.handler == SIG_DFL) { + if (Handler.DefaultBehaviour == DEFAULT_TERM || Handler.DefaultBehaviour == DEFAULT_COREDUMP) { + // Let the signal fall through to the unhandled path + // This way the parent process can know it died correctly } - else { - if (Handler.GuestHandler && - Handler.GuestHandler(Thread, Signal, &SigInfo, UContext, &Handler.GuestAction, &ThreadData.GuestAltStack)) { - // Set up a new mask based on this signals signal mask - uint64_t NewMask = Handler.GuestAction.sa_mask.Val; - - // If NODEFER then the new signal mask includes this signal - if (!(Handler.GuestAction.sa_flags & SA_NODEFER)) { - NewMask |= (1ULL << (Signal - 1)); - } + } else if (Handler.GuestAction.sigaction_handler.handler == SIG_IGN) { + return; + } else { + if (Handler.GuestHandler && Handler.GuestHandler(Thread, Signal, &SigInfo, UContext, &Handler.GuestAction, &ThreadData.GuestAltStack)) { + // Set up a new mask based on this signals signal mask + uint64_t NewMask = Handler.GuestAction.sa_mask.Val; + + // If NODEFER then the new signal mask includes this signal + if (!(Handler.GuestAction.sa_flags & SA_NODEFER)) { + NewMask |= (1ULL << (Signal - 1)); + } - // Walk our required signals and stop masking them if requested - for (size_t i = 0; i < MAX_SIGNALS; ++i) { - if (HostHandlers[i + 1].Required.load(std::memory_order_relaxed)) { - // Never mask our required signals - NewMask &= ~(1ULL << i); - } + // Walk our required signals and stop masking them if requested + for (size_t i = 0; i < MAX_SIGNALS; ++i) { + if (HostHandlers[i + 1].Required.load(std::memory_order_relaxed)) { + // Never mask our required signals + NewMask &= ~(1ULL << i); } + } - // Update our host signal mask so we don't hit race conditions with signals - // This allows us to maintain the expected signal mask through the guest signal handling and then all the way back again - memcpy(&_context->uc_sigmask, &NewMask, sizeof(uint64_t)); + // Update our host signal mask so we don't hit race conditions with signals + // This allows us to maintain the expected signal mask through the guest signal handling and then all the way back again + memcpy(&_context->uc_sigmask, &NewMask, sizeof(uint64_t)); - // We handled this signal, continue running - return; - } - ERROR_AND_DIE_FMT("Unhandled guest exception"); + // We handled this signal, continue running + return; } + ERROR_AND_DIE_FMT("Unhandled guest exception"); + } - // Unhandled crash - // Call back in to the previous handler - if (Handler.OldAction.sa_flags & SA_SIGINFO) { - Handler.OldAction.sigaction(Signal, &SigInfo, UContext); - } - else if (Handler.OldAction.handler == SIG_IGN || - (Handler.OldAction.handler == SIG_DFL && - Handler.DefaultBehaviour == DEFAULT_IGNORE)) { - // Do nothing - } - else if (Handler.OldAction.handler == SIG_DFL && - (Handler.DefaultBehaviour == DEFAULT_COREDUMP || - Handler.DefaultBehaviour == DEFAULT_TERM)) { + // Unhandled crash + // Call back in to the previous handler + if (Handler.OldAction.sa_flags & SA_SIGINFO) { + Handler.OldAction.sigaction(Signal, &SigInfo, UContext); + } else if (Handler.OldAction.handler == SIG_IGN || (Handler.OldAction.handler == SIG_DFL && Handler.DefaultBehaviour == DEFAULT_IGNORE)) { + // Do nothing + } else if (Handler.OldAction.handler == SIG_DFL && (Handler.DefaultBehaviour == DEFAULT_COREDUMP || Handler.DefaultBehaviour == DEFAULT_TERM)) { #ifndef FEX_DISABLE_TELEMETRY - // In the case of signals that cause coredump or terminate, save telemetry early. - // FEX is hard crashing at this point and won't hit regular shutdown routines. - // Add the signal to the crash mask. - CrashMask |= (1ULL << Signal); - SaveTelemetry(); + // In the case of signals that cause coredump or terminate, save telemetry early. + // FEX is hard crashing at this point and won't hit regular shutdown routines. + // Add the signal to the crash mask. + CrashMask |= (1ULL << Signal); + SaveTelemetry(); #endif - // Reassign back to DFL and crash - signal(Signal, SIG_DFL); - if (SigInfo.si_code != SI_KERNEL) { - // If the signal wasn't sent by the kernel then we need to reraise it. - // This is necessary since returning from this signal handler now might just continue executing. - // eg: If sent from tgkill then the signal gets dropped and returns. - FHU::Syscalls::tgkill(::getpid(), FHU::Syscalls::gettid(), Signal); - } - } - else { - Handler.OldAction.handler(Signal); + // Reassign back to DFL and crash + signal(Signal, SIG_DFL); + if (SigInfo.si_code != SI_KERNEL) { + // If the signal wasn't sent by the kernel then we need to reraise it. + // This is necessary since returning from this signal handler now might just continue executing. + // eg: If sent from tgkill then the signal gets dropped and returns. + FHU::Syscalls::tgkill(::getpid(), FHU::Syscalls::gettid(), Signal); } + } else { + Handler.OldAction.handler(Signal); } +} - void SignalDelegator::SaveTelemetry() { +void SignalDelegator::SaveTelemetry() { #ifndef FEX_DISABLE_TELEMETRY - if (!ApplicationName.empty()) { - FEXCore::Telemetry::Shutdown(ApplicationName); - } -#endif + if (!ApplicationName.empty()) { + FEXCore::Telemetry::Shutdown(ApplicationName); } +#endif +} - bool SignalDelegator::InstallHostThunk(int Signal) { - SignalHandler &SignalHandler = HostHandlers[Signal]; - // If the host thunk is already installed for this, just return - if (SignalHandler.Installed) { - return false; - } +bool SignalDelegator::InstallHostThunk(int Signal) { + SignalHandler& SignalHandler = HostHandlers[Signal]; + // If the host thunk is already installed for this, just return + if (SignalHandler.Installed) { + return false; + } - // Default flags for us - SignalHandler.HostAction.sa_flags = SA_SIGINFO | SA_ONSTACK; + // Default flags for us + SignalHandler.HostAction.sa_flags = SA_SIGINFO | SA_ONSTACK; - bool Result = UpdateHostThunk(Signal); + bool Result = UpdateHostThunk(Signal); - SignalHandler.Installed = Result; - return Result; - } + SignalHandler.Installed = Result; + return Result; +} - bool SignalDelegator::UpdateHostThunk(int Signal) { - SignalHandler &SignalHandler = HostHandlers[Signal]; +bool SignalDelegator::UpdateHostThunk(int Signal) { + SignalHandler& SignalHandler = HostHandlers[Signal]; - // Now install the thunk handler - SignalHandler.HostAction.sigaction = SignalHandlerThunk; + // Now install the thunk handler + SignalHandler.HostAction.sigaction = SignalHandlerThunk; - auto CheckAndAddFlags = [](uint64_t HostFlags, uint64_t GuestFlags, uint64_t Flags) { - // If any of the flags don't match then update to the newest set - if ((HostFlags ^ GuestFlags) & Flags) { - // Remove all the flags from the host that we are testing for - HostFlags &= ~Flags; - // Copy over the guest flags being set - HostFlags |= GuestFlags & Flags; - } + auto CheckAndAddFlags = [](uint64_t HostFlags, uint64_t GuestFlags, uint64_t Flags) { + // If any of the flags don't match then update to the newest set + if ((HostFlags ^ GuestFlags) & Flags) { + // Remove all the flags from the host that we are testing for + HostFlags &= ~Flags; + // Copy over the guest flags being set + HostFlags |= GuestFlags & Flags; + } - return HostFlags; - }; + return HostFlags; + }; - // Don't allow the guest to override flags for - // SA_SIGINFO : Host always needs SA_SIGINFO - // SA_ONSTACK : Host always needs the altstack - // SA_RESETHAND : We don't support one shot handlers - // SA_RESTORER : We always need our host side restorer on x86-64, Couldn't use guest restorer anyway - SignalHandler.HostAction.sa_flags = CheckAndAddFlags( - SignalHandler.HostAction.sa_flags, - SignalHandler.GuestAction.sa_flags, - SA_NOCLDSTOP | SA_NOCLDWAIT | SA_NODEFER | SA_RESTART); + // Don't allow the guest to override flags for + // SA_SIGINFO : Host always needs SA_SIGINFO + // SA_ONSTACK : Host always needs the altstack + // SA_RESETHAND : We don't support one shot handlers + // SA_RESTORER : We always need our host side restorer on x86-64, Couldn't use guest restorer anyway + SignalHandler.HostAction.sa_flags = CheckAndAddFlags(SignalHandler.HostAction.sa_flags, SignalHandler.GuestAction.sa_flags, + SA_NOCLDSTOP | SA_NOCLDWAIT | SA_NODEFER | SA_RESTART); #ifdef _M_X86_64 #define SA_RESTORER 0x04000000 - SignalHandler.HostAction.sa_flags |= SA_RESTORER; - SignalHandler.HostAction.restorer = sigrestore; + SignalHandler.HostAction.sa_flags |= SA_RESTORER; + SignalHandler.HostAction.restorer = sigrestore; #endif - // Walk the signals we have that are required and make sure to remove it from the mask - // This'll likely be SIGILL, SIGBUS, SIG63 - - // If the guest has masked some signals then we need to also mask those signals - for (size_t i = 1; i < HostHandlers.size(); ++i) { - if (HostHandlers[i].Required.load(std::memory_order_relaxed)) { - SignalHandler.HostAction.sa_mask &= ~(1ULL << (i - 1)); - } - else if (SigIsMember(&SignalHandler.GuestAction.sa_mask, i)) { - SignalHandler.HostAction.sa_mask |= (1ULL << (i - 1)); - } - } + // Walk the signals we have that are required and make sure to remove it from the mask + // This'll likely be SIGILL, SIGBUS, SIG63 - // Check for SIG_IGN - if (SignalHandler.GuestAction.sigaction_handler.handler == SIG_IGN && - HostHandlers[Signal].Required.load(std::memory_order_relaxed) == false) { - // We are ignoring this signal on the guest - // Which means we need to ignore it on the host as well - SignalHandler.HostAction.handler = SIG_IGN; + // If the guest has masked some signals then we need to also mask those signals + for (size_t i = 1; i < HostHandlers.size(); ++i) { + if (HostHandlers[i].Required.load(std::memory_order_relaxed)) { + SignalHandler.HostAction.sa_mask &= ~(1ULL << (i - 1)); + } else if (SigIsMember(&SignalHandler.GuestAction.sa_mask, i)) { + SignalHandler.HostAction.sa_mask |= (1ULL << (i - 1)); } + } - // Check for SIG_DFL - if (SignalHandler.GuestAction.sigaction_handler.handler == SIG_DFL && - HostHandlers[Signal].Required.load(std::memory_order_relaxed) == false) { - // Default handler on guest and default handler on host - // With coredump and terminate then expect fireworks, but that is what the guest wants - SignalHandler.HostAction.handler = SIG_DFL; - } + // Check for SIG_IGN + if (SignalHandler.GuestAction.sigaction_handler.handler == SIG_IGN && HostHandlers[Signal].Required.load(std::memory_order_relaxed) == false) { + // We are ignoring this signal on the guest + // Which means we need to ignore it on the host as well + SignalHandler.HostAction.handler = SIG_IGN; + } - // Only update the old action if we haven't ever been installed - const int Result = ::syscall(SYS_rt_sigaction, Signal, &SignalHandler.HostAction, SignalHandler.Installed ? nullptr : &SignalHandler.OldAction, 8); - if (Result < 0) { - // Signal 32 and 33 are consumed by glibc. We don't handle this atm - LogMan::Msg::AFmt("Failed to install host signal thunk for signal {}: {}", Signal, strerror(errno)); - return false; - } + // Check for SIG_DFL + if (SignalHandler.GuestAction.sigaction_handler.handler == SIG_DFL && HostHandlers[Signal].Required.load(std::memory_order_relaxed) == false) { + // Default handler on guest and default handler on host + // With coredump and terminate then expect fireworks, but that is what the guest wants + SignalHandler.HostAction.handler = SIG_DFL; + } - return true; + // Only update the old action if we haven't ever been installed + const int Result = + ::syscall(SYS_rt_sigaction, Signal, &SignalHandler.HostAction, SignalHandler.Installed ? nullptr : &SignalHandler.OldAction, 8); + if (Result < 0) { + // Signal 32 and 33 are consumed by glibc. We don't handle this atm + LogMan::Msg::AFmt("Failed to install host signal thunk for signal {}: {}", Signal, strerror(errno)); + return false; } - void SignalDelegator::UninstallHostHandler(int Signal) { - SignalHandler &SignalHandler = HostHandlers[Signal]; - - ::syscall(SYS_rt_sigaction, Signal, &SignalHandler.OldAction, nullptr, 8); - } - - SignalDelegator::SignalDelegator(FEXCore::Context::Context *_CTX, const std::string_view ApplicationName) - : CTX {_CTX} - , ApplicationName {ApplicationName} { - // Register this delegate - LOGMAN_THROW_AA_FMT(!GlobalDelegator, "Can't register global delegator multiple times!"); - GlobalDelegator = this; - // Signal zero isn't real - HostHandlers[0].Installed = true; - - // We can't capture SIGKILL or SIGSTOP - HostHandlers[SIGKILL].Installed = true; - HostHandlers[SIGSTOP].Installed = true; - - // Most signals default to termination - // These ones are slightly different - static constexpr std::array, 14> SignalDefaultBehaviours = {{ - {SIGQUIT, DEFAULT_COREDUMP}, - {SIGILL, DEFAULT_COREDUMP}, - {SIGTRAP, DEFAULT_COREDUMP}, - {SIGABRT, DEFAULT_COREDUMP}, - {SIGBUS, DEFAULT_COREDUMP}, - {SIGFPE, DEFAULT_COREDUMP}, - {SIGSEGV, DEFAULT_COREDUMP}, - {SIGCHLD, DEFAULT_IGNORE}, - {SIGCONT, DEFAULT_IGNORE}, - {SIGURG, DEFAULT_IGNORE}, - {SIGXCPU, DEFAULT_COREDUMP}, - {SIGXFSZ, DEFAULT_COREDUMP}, - {SIGSYS, DEFAULT_COREDUMP}, - {SIGWINCH, DEFAULT_IGNORE}, - }}; - - for (const auto &[Signal, Behaviour] : SignalDefaultBehaviours) { - HostHandlers[Signal].DefaultBehaviour = Behaviour; - } + return true; +} + +void SignalDelegator::UninstallHostHandler(int Signal) { + SignalHandler& SignalHandler = HostHandlers[Signal]; + + ::syscall(SYS_rt_sigaction, Signal, &SignalHandler.OldAction, nullptr, 8); +} - // Register frontend SIGILL handler for forced assertion. - RegisterFrontendHostSignalHandler(SIGILL, [](FEXCore::Core::InternalThreadState *Thread, int Signal, void *info, void *ucontext) -> bool { - ucontext_t* _context = (ucontext_t*)ucontext; - auto &mcontext = _context->uc_mcontext; - uint64_t PC{}; +SignalDelegator::SignalDelegator(FEXCore::Context::Context* _CTX, const std::string_view ApplicationName) + : CTX {_CTX} + , ApplicationName {ApplicationName} { + // Register this delegate + LOGMAN_THROW_AA_FMT(!GlobalDelegator, "Can't register global delegator multiple times!"); + GlobalDelegator = this; + // Signal zero isn't real + HostHandlers[0].Installed = true; + + // We can't capture SIGKILL or SIGSTOP + HostHandlers[SIGKILL].Installed = true; + HostHandlers[SIGSTOP].Installed = true; + + // Most signals default to termination + // These ones are slightly different + static constexpr std::array, 14> SignalDefaultBehaviours = {{ + {SIGQUIT, DEFAULT_COREDUMP}, + {SIGILL, DEFAULT_COREDUMP}, + {SIGTRAP, DEFAULT_COREDUMP}, + {SIGABRT, DEFAULT_COREDUMP}, + {SIGBUS, DEFAULT_COREDUMP}, + {SIGFPE, DEFAULT_COREDUMP}, + {SIGSEGV, DEFAULT_COREDUMP}, + {SIGCHLD, DEFAULT_IGNORE}, + {SIGCONT, DEFAULT_IGNORE}, + {SIGURG, DEFAULT_IGNORE}, + {SIGXCPU, DEFAULT_COREDUMP}, + {SIGXFSZ, DEFAULT_COREDUMP}, + {SIGSYS, DEFAULT_COREDUMP}, + {SIGWINCH, DEFAULT_IGNORE}, + }}; + + for (const auto& [Signal, Behaviour] : SignalDefaultBehaviours) { + HostHandlers[Signal].DefaultBehaviour = Behaviour; + } + + // Register frontend SIGILL handler for forced assertion. + RegisterFrontendHostSignalHandler( + SIGILL, + [](FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) -> bool { + ucontext_t* _context = (ucontext_t*)ucontext; + auto& mcontext = _context->uc_mcontext; + uint64_t PC {}; #ifdef _M_ARM_64 - PC = mcontext.pc; + PC = mcontext.pc; #else - PC = mcontext.gregs[REG_RIP]; + PC = mcontext.gregs[REG_RIP]; #endif - if (PC == reinterpret_cast(&FEXCore::Assert::ForcedAssert)) { - // This is a host side assert. Don't deliver this to the guest - // We want to actually break here - GlobalDelegator->UninstallHostHandler(Signal); - return true; - } - return false; - }, true); + if (PC == reinterpret_cast(&FEXCore::Assert::ForcedAssert)) { + // This is a host side assert. Don't deliver this to the guest + // We want to actually break here + GlobalDelegator->UninstallHostHandler(Signal); + return true; + } + return false; + }, + true); - const auto PauseHandler = [](FEXCore::Core::InternalThreadState *Thread, int Signal, void *info, void *ucontext) -> bool { - return GlobalDelegator->HandleSignalPause(Thread, Signal, info, ucontext); - }; + const auto PauseHandler = [](FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) -> bool { + return GlobalDelegator->HandleSignalPause(Thread, Signal, info, ucontext); + }; - const auto GuestSignalHandler = [](FEXCore::Core::InternalThreadState *Thread, int Signal, void *info, void *ucontext, GuestSigAction *GuestAction, stack_t *GuestStack) -> bool { - return GlobalDelegator->HandleDispatcherGuestSignal(Thread, Signal, info, ucontext, GuestAction, GuestStack); - }; + const auto GuestSignalHandler = [](FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext, + GuestSigAction* GuestAction, stack_t* GuestStack) -> bool { + return GlobalDelegator->HandleDispatcherGuestSignal(Thread, Signal, info, ucontext, GuestAction, GuestStack); + }; - const auto SigillHandler = [](FEXCore::Core::InternalThreadState *Thread, int Signal, void *info, void *ucontext) -> bool { - return GlobalDelegator->HandleSIGILL(Thread, Signal, info, ucontext); - }; + const auto SigillHandler = [](FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) -> bool { + return GlobalDelegator->HandleSIGILL(Thread, Signal, info, ucontext); + }; - // Register SIGILL signal handler. - RegisterHostSignalHandler(SIGILL, SigillHandler, true); + // Register SIGILL signal handler. + RegisterHostSignalHandler(SIGILL, SigillHandler, true); #ifdef _M_ARM_64 - // Register SIGBUS signal handler. - const auto SigbusHandler = [](FEXCore::Core::InternalThreadState *Thread, int Signal, void *_info, void *ucontext) -> bool { - const auto PC = ArchHelpers::Context::GetPc(ucontext); - if (!Thread->CPUBackend->IsAddressInCodeBuffer(PC)) { - // Wasn't a sigbus in JIT code - return false; - } - siginfo_t* info = reinterpret_cast(_info); + // Register SIGBUS signal handler. + const auto SigbusHandler = [](FEXCore::Core::InternalThreadState* Thread, int Signal, void* _info, void* ucontext) -> bool { + const auto PC = ArchHelpers::Context::GetPc(ucontext); + if (!Thread->CPUBackend->IsAddressInCodeBuffer(PC)) { + // Wasn't a sigbus in JIT code + return false; + } + siginfo_t* info = reinterpret_cast(_info); - if (info->si_code != BUS_ADRALN) { - // This only handles alignment problems - return false; - } + if (info->si_code != BUS_ADRALN) { + // This only handles alignment problems + return false; + } - const auto Result = FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(Thread, GlobalDelegator->ParanoidTSO(), PC, ArchHelpers::Context::GetArmGPRs(ucontext)); - ArchHelpers::Context::SetPc(ucontext, PC + Result.second); - return Result.first; - }; + const auto Result = FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(Thread, GlobalDelegator->ParanoidTSO(), PC, + ArchHelpers::Context::GetArmGPRs(ucontext)); + ArchHelpers::Context::SetPc(ucontext, PC + Result.second); + return Result.first; + }; - RegisterHostSignalHandler(SIGBUS, SigbusHandler, true); + RegisterHostSignalHandler(SIGBUS, SigbusHandler, true); #endif - // Register pause signal handler. - RegisterHostSignalHandler(SignalDelegator::SIGNAL_FOR_PAUSE, PauseHandler, true); + // Register pause signal handler. + RegisterHostSignalHandler(SignalDelegator::SIGNAL_FOR_PAUSE, PauseHandler, true); - // Guest signal handlers. - for (uint32_t Signal = 0; Signal <= SignalDelegator::MAX_SIGNALS; ++Signal) { - RegisterHostSignalHandlerForGuest(Signal, GuestSignalHandler); - } + // Guest signal handlers. + for (uint32_t Signal = 0; Signal <= SignalDelegator::MAX_SIGNALS; ++Signal) { + RegisterHostSignalHandlerForGuest(Signal, GuestSignalHandler); } +} - SignalDelegator::~SignalDelegator() { - for (int i = 0; i < MAX_SIGNALS; ++i) { - if (i == 0 || - i == SIGKILL || - i == SIGSTOP || - !HostHandlers[i].Installed - ) { - continue; - } - ::syscall(SYS_rt_sigaction, i, &HostHandlers[i].OldAction, nullptr, 8); - HostHandlers[i].Installed = false; +SignalDelegator::~SignalDelegator() { + for (int i = 0; i < MAX_SIGNALS; ++i) { + if (i == 0 || i == SIGKILL || i == SIGSTOP || !HostHandlers[i].Installed) { + continue; } - GlobalDelegator = nullptr; + ::syscall(SYS_rt_sigaction, i, &HostHandlers[i].OldAction, nullptr, 8); + HostHandlers[i].Installed = false; } + GlobalDelegator = nullptr; +} - FEXCore::Core::InternalThreadState *SignalDelegator::GetTLSThread() { - return ThreadData.Thread; +FEXCore::Core::InternalThreadState* SignalDelegator::GetTLSThread() { + return ThreadData.Thread; +} + +void SignalDelegator::RegisterTLSState(FEXCore::Core::InternalThreadState* Thread) { + ThreadData.Thread = Thread; + + // Set up our signal alternative stack + // This is per thread rather than per signal + ThreadData.AltStackPtr = FEXCore::Allocator::mmap(nullptr, SIGSTKSZ * 16, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + stack_t altstack {}; + altstack.ss_sp = ThreadData.AltStackPtr; + altstack.ss_size = SIGSTKSZ * 16; + altstack.ss_flags = 0; + LOGMAN_THROW_AA_FMT(!!altstack.ss_sp, "Couldn't allocate stack pointer"); + + // Register the alt stack + const int Result = sigaltstack(&altstack, nullptr); + if (Result == -1) { + LogMan::Msg::EFmt("Failed to install alternative signal stack {}", strerror(errno)); } - void SignalDelegator::RegisterTLSState(FEXCore::Core::InternalThreadState *Thread) { - ThreadData.Thread = Thread; + // Get the current host signal mask + ::syscall(SYS_rt_sigprocmask, 0, nullptr, &ThreadData.CurrentSignalMask.Val, 8); - // Set up our signal alternative stack - // This is per thread rather than per signal - ThreadData.AltStackPtr = FEXCore::Allocator::mmap(nullptr, SIGSTKSZ * 16, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - stack_t altstack{}; - altstack.ss_sp = ThreadData.AltStackPtr; - altstack.ss_size = SIGSTKSZ * 16; - altstack.ss_flags = 0; - LOGMAN_THROW_AA_FMT(!!altstack.ss_sp, "Couldn't allocate stack pointer"); + if (Thread != (FEXCore::Core::InternalThreadState*)UINTPTR_MAX) { + // Reserve a small amount of deferred signal frames. Usually the stack won't be utilized beyond + // 1 or 2 signals but add a few more just in case. + Thread->DeferredSignalFrames.reserve(8); + } +} - // Register the alt stack - const int Result = sigaltstack(&altstack, nullptr); - if (Result == -1) { - LogMan::Msg::EFmt("Failed to install alternative signal stack {}", strerror(errno)); - } +void SignalDelegator::UninstallTLSState(FEXCore::Core::InternalThreadState* Thread) { + FEXCore::Allocator::munmap(ThreadData.AltStackPtr, SIGSTKSZ * 16); - // Get the current host signal mask - ::syscall(SYS_rt_sigprocmask, 0, nullptr, &ThreadData.CurrentSignalMask.Val, 8); + ThreadData.AltStackPtr = nullptr; - if (Thread != (FEXCore::Core::InternalThreadState*)UINTPTR_MAX) { - // Reserve a small amount of deferred signal frames. Usually the stack won't be utilized beyond - // 1 or 2 signals but add a few more just in case. - Thread->DeferredSignalFrames.reserve(8); - } - } + stack_t altstack {}; + altstack.ss_flags = SS_DISABLE; - void SignalDelegator::UninstallTLSState(FEXCore::Core::InternalThreadState *Thread) { - FEXCore::Allocator::munmap(ThreadData.AltStackPtr, SIGSTKSZ * 16); + // Uninstall the alt stack + const int Result = sigaltstack(&altstack, nullptr); + if (Result == -1) { + LogMan::Msg::EFmt("Failed to uninstall alternative signal stack {}", strerror(errno)); + } - ThreadData.AltStackPtr = nullptr; + ThreadData.Thread = nullptr; +} - stack_t altstack{}; - altstack.ss_flags = SS_DISABLE; +void SignalDelegator::FrontendRegisterHostSignalHandler(int Signal, HostSignalDelegatorFunction Func, bool Required) { + // Linux signal handlers are per-process rather than per thread + // Multiple threads could be calling in to this + std::lock_guard lk(HostDelegatorMutex); + HostHandlers[Signal].Required = Required; + InstallHostThunk(Signal); +} - // Uninstall the alt stack - const int Result = sigaltstack(&altstack, nullptr); - if (Result == -1) { - LogMan::Msg::EFmt("Failed to uninstall alternative signal stack {}", strerror(errno)); - } +void SignalDelegator::FrontendRegisterFrontendHostSignalHandler(int Signal, HostSignalDelegatorFunction Func, bool Required) { + // Linux signal handlers are per-process rather than per thread + // Multiple threads could be calling in to this + std::lock_guard lk(HostDelegatorMutex); + HostHandlers[Signal].Required = Required; + InstallHostThunk(Signal); +} - ThreadData.Thread = nullptr; - } +void SignalDelegator::RegisterHostSignalHandlerForGuest(int Signal, FEX::HLE::HostSignalDelegatorFunctionForGuest Func) { + std::lock_guard lk(HostDelegatorMutex); + HostHandlers[Signal].GuestHandler = std::move(Func); +} - void SignalDelegator::FrontendRegisterHostSignalHandler(int Signal, HostSignalDelegatorFunction Func, bool Required) { - // Linux signal handlers are per-process rather than per thread - // Multiple threads could be calling in to this - std::lock_guard lk(HostDelegatorMutex); - HostHandlers[Signal].Required = Required; - InstallHostThunk(Signal); - } +void SignalDelegator::RegisterFrontendHostSignalHandler(int Signal, HostSignalDelegatorFunction Func, bool Required) { + SetFrontendHostSignalHandler(Signal, Func, Required); + FrontendRegisterFrontendHostSignalHandler(Signal, Func, Required); +} - void SignalDelegator::FrontendRegisterFrontendHostSignalHandler(int Signal, HostSignalDelegatorFunction Func, bool Required) { - // Linux signal handlers are per-process rather than per thread - // Multiple threads could be calling in to this - std::lock_guard lk(HostDelegatorMutex); - HostHandlers[Signal].Required = Required; - InstallHostThunk(Signal); - } +uint64_t SignalDelegator::RegisterGuestSignalHandler(int Signal, const GuestSigAction* Action, GuestSigAction* OldAction) { + std::lock_guard lk(GuestDelegatorMutex); - void SignalDelegator::RegisterHostSignalHandlerForGuest(int Signal, FEX::HLE::HostSignalDelegatorFunctionForGuest Func) { - std::lock_guard lk(HostDelegatorMutex); - HostHandlers[Signal].GuestHandler = std::move(Func); + // Invalid signal specified + if (Signal > MAX_SIGNALS) { + return -EINVAL; } - void SignalDelegator::RegisterFrontendHostSignalHandler(int Signal, HostSignalDelegatorFunction Func, bool Required) { - SetFrontendHostSignalHandler(Signal, Func, Required); - FrontendRegisterFrontendHostSignalHandler(Signal, Func, Required); + // If we have an old signal set then give it back + if (OldAction) { + *OldAction = HostHandlers[Signal].GuestAction; } - uint64_t SignalDelegator::RegisterGuestSignalHandler(int Signal, const GuestSigAction *Action, GuestSigAction *OldAction) { - std::lock_guard lk(GuestDelegatorMutex); - - // Invalid signal specified - if (Signal > MAX_SIGNALS) { + // Now assign the new action + if (Action) { + // These signal dispositions can't be changed on Linux + if (Signal == SIGKILL || Signal == SIGSTOP) { return -EINVAL; } - // If we have an old signal set then give it back - if (OldAction) { - *OldAction = HostHandlers[Signal].GuestAction; + HostHandlers[Signal].GuestAction = *Action; + // Only attempt to install a new thunk handler if we were installing a new guest action + if (!InstallHostThunk(Signal)) { + UpdateHostThunk(Signal); } + } - // Now assign the new action - if (Action) { - // These signal dispositions can't be changed on Linux - if (Signal == SIGKILL || Signal == SIGSTOP) { - return -EINVAL; - } - - HostHandlers[Signal].GuestAction = *Action; - // Only attempt to install a new thunk handler if we were installing a new guest action - if (!InstallHostThunk(Signal)) { - UpdateHostThunk(Signal); - } - } + return 0; +} - return 0; - } +void SignalDelegator::CheckXIDHandler() { + std::lock_guard lk(GuestDelegatorMutex); + std::lock_guard lk2(HostDelegatorMutex); - void SignalDelegator::CheckXIDHandler() { - std::lock_guard lk(GuestDelegatorMutex); - std::lock_guard lk2(HostDelegatorMutex); + constexpr size_t SIGNAL_SETXID = 33; - constexpr size_t SIGNAL_SETXID = 33; + kernel_sigaction CurrentAction {}; - kernel_sigaction CurrentAction{}; + // Only update the old action if we haven't ever been installed + const int Result = ::syscall(SYS_rt_sigaction, SIGNAL_SETXID, nullptr, &CurrentAction, 8); + if (Result < 0) { + LogMan::Msg::AFmt("Failed to get status of XID signal"); + return; + } - // Only update the old action if we haven't ever been installed - const int Result = ::syscall(SYS_rt_sigaction, SIGNAL_SETXID, nullptr, &CurrentAction, 8); + SignalHandler& HostHandler = HostHandlers[SIGNAL_SETXID]; + if (CurrentAction.handler != HostHandler.HostAction.handler) { + // GLIBC overwrote our XID handler, reinstate our handler + const int Result = ::syscall(SYS_rt_sigaction, SIGNAL_SETXID, &HostHandler.HostAction, nullptr, 8); if (Result < 0) { - LogMan::Msg::AFmt("Failed to get status of XID signal"); - return; - } - - SignalHandler &HostHandler = HostHandlers[SIGNAL_SETXID]; - if (CurrentAction.handler != HostHandler.HostAction.handler) { - // GLIBC overwrote our XID handler, reinstate our handler - const int Result = ::syscall(SYS_rt_sigaction, SIGNAL_SETXID, &HostHandler.HostAction, nullptr, 8); - if (Result < 0) { - LogMan::Msg::AFmt("Failed to reinstate our XID signal handler {}", strerror(errno)); - } + LogMan::Msg::AFmt("Failed to reinstate our XID signal handler {}", strerror(errno)); } } +} - uint64_t SignalDelegator::RegisterGuestSigAltStack(const stack_t *ss, stack_t *old_ss) { - auto Thread = GetTLSThread(); - bool UsingAltStack{}; - uint64_t AltStackBase = reinterpret_cast(ThreadData.GuestAltStack.ss_sp); - uint64_t AltStackEnd = AltStackBase + ThreadData.GuestAltStack.ss_size; - uint64_t GuestSP = Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP]; +uint64_t SignalDelegator::RegisterGuestSigAltStack(const stack_t* ss, stack_t* old_ss) { + auto Thread = GetTLSThread(); + bool UsingAltStack {}; + uint64_t AltStackBase = reinterpret_cast(ThreadData.GuestAltStack.ss_sp); + uint64_t AltStackEnd = AltStackBase + ThreadData.GuestAltStack.ss_size; + uint64_t GuestSP = Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP]; - if (!(ThreadData.GuestAltStack.ss_flags & SS_DISABLE) && - GuestSP >= AltStackBase && - GuestSP <= AltStackEnd) { - UsingAltStack = true; - } + if (!(ThreadData.GuestAltStack.ss_flags & SS_DISABLE) && GuestSP >= AltStackBase && GuestSP <= AltStackEnd) { + UsingAltStack = true; + } - // If we have an old signal set then give it back - if (old_ss) { - *old_ss = ThreadData.GuestAltStack; + // If we have an old signal set then give it back + if (old_ss) { + *old_ss = ThreadData.GuestAltStack; - if (UsingAltStack) { - // We are currently operating on the alt stack - // Let the guest know - old_ss->ss_flags |= SS_ONSTACK; - } - else { - old_ss->ss_flags |= SS_DISABLE; - } + if (UsingAltStack) { + // We are currently operating on the alt stack + // Let the guest know + old_ss->ss_flags |= SS_ONSTACK; + } else { + old_ss->ss_flags |= SS_DISABLE; } + } - // Now assign the new action - if (ss) { - // If we tried setting the alt stack while we are using it then throw an error - if (UsingAltStack) { - return -EPERM; - } - - // We need to check for invalid flags - // The only flag that can be passed is SS_AUTODISARM and SS_DISABLE - if ((ss->ss_flags & ~SS_ONSTACK) & // SS_ONSTACK is ignored - ~(SS_AUTODISARM | SS_DISABLE)) { - // A flag remained that isn't one of the supported ones? - return -EINVAL; - } - - if (ss->ss_flags & SS_DISABLE) { - // If SS_DISABLE Is specified then the rest of the details are ignored - ThreadData.GuestAltStack = *ss; - return 0; - } + // Now assign the new action + if (ss) { + // If we tried setting the alt stack while we are using it then throw an error + if (UsingAltStack) { + return -EPERM; + } - // stack size needs to be MINSIGSTKSZ (0x2000) - if (ss->ss_size < X86_MINSIGSTKSZ) { - return -ENOMEM; - } + // We need to check for invalid flags + // The only flag that can be passed is SS_AUTODISARM and SS_DISABLE + if ((ss->ss_flags & ~SS_ONSTACK) & // SS_ONSTACK is ignored + ~(SS_AUTODISARM | SS_DISABLE)) { + // A flag remained that isn't one of the supported ones? + return -EINVAL; + } + if (ss->ss_flags & SS_DISABLE) { + // If SS_DISABLE Is specified then the rest of the details are ignored ThreadData.GuestAltStack = *ss; + return 0; } - return 0; - } - - static void CheckForPendingSignals(FEXCore::Core::InternalThreadState *Thread) { - // Do we have any pending signals that became unmasked? - uint64_t PendingSignals = ~ThreadData.CurrentSignalMask.Val & ThreadData.PendingSignals; - if (PendingSignals != 0) { - for (int i = 0; i < 64; ++i) { - if (PendingSignals & (1ULL << i)) { - FHU::Syscalls::tgkill(Thread->ThreadManager.PID, Thread->ThreadManager.TID, i + 1); - // We might not even return here which is spooky - } - } + // stack size needs to be MINSIGSTKSZ (0x2000) + if (ss->ss_size < X86_MINSIGSTKSZ) { + return -ENOMEM; } + + ThreadData.GuestAltStack = *ss; } - uint64_t SignalDelegator::GuestSigProcMask(int how, const uint64_t *set, uint64_t *oldset) { - // The order in which we handle signal mask setting is important here - // old and new can point to the same location in memory. - // Even if the pointers are to same memory location, we must store the original signal mask - // coming in to the syscall. - // 1) Store old mask - // 2) Set mask to new mask if exists - // 3) Give old mask back - auto OldSet = ThreadData.CurrentSignalMask.Val; - - if (!!set) { - uint64_t IgnoredSignalsMask = ~((1ULL << (SIGKILL - 1)) | (1ULL << (SIGSTOP - 1))); - if (how == SIG_BLOCK) { - ThreadData.CurrentSignalMask.Val |= *set & IgnoredSignalsMask; - } - else if (how == SIG_UNBLOCK) { - ThreadData.CurrentSignalMask.Val &= ~(*set & IgnoredSignalsMask); - } - else if (how == SIG_SETMASK) { - ThreadData.CurrentSignalMask.Val = *set & IgnoredSignalsMask; - } - else { - return -EINVAL; - } + return 0; +} - uint64_t HostMask = ThreadData.CurrentSignalMask.Val; - // Now actually set the host mask - // This will hide from the guest that we are not actually setting all of the masks it wants - for (size_t i = 0; i < MAX_SIGNALS; ++i) { - if (HostHandlers[i + 1].Required.load(std::memory_order_relaxed)) { - // If it is a required host signal then we can't mask it - HostMask &= ~(1ULL << i); - } +static void CheckForPendingSignals(FEXCore::Core::InternalThreadState* Thread) { + // Do we have any pending signals that became unmasked? + uint64_t PendingSignals = ~ThreadData.CurrentSignalMask.Val & ThreadData.PendingSignals; + if (PendingSignals != 0) { + for (int i = 0; i < 64; ++i) { + if (PendingSignals & (1ULL << i)) { + FHU::Syscalls::tgkill(Thread->ThreadManager.PID, Thread->ThreadManager.TID, i + 1); + // We might not even return here which is spooky } + } + } +} - ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &HostMask, nullptr, 8); +uint64_t SignalDelegator::GuestSigProcMask(int how, const uint64_t* set, uint64_t* oldset) { + // The order in which we handle signal mask setting is important here + // old and new can point to the same location in memory. + // Even if the pointers are to same memory location, we must store the original signal mask + // coming in to the syscall. + // 1) Store old mask + // 2) Set mask to new mask if exists + // 3) Give old mask back + auto OldSet = ThreadData.CurrentSignalMask.Val; + + if (!!set) { + uint64_t IgnoredSignalsMask = ~((1ULL << (SIGKILL - 1)) | (1ULL << (SIGSTOP - 1))); + if (how == SIG_BLOCK) { + ThreadData.CurrentSignalMask.Val |= *set & IgnoredSignalsMask; + } else if (how == SIG_UNBLOCK) { + ThreadData.CurrentSignalMask.Val &= ~(*set & IgnoredSignalsMask); + } else if (how == SIG_SETMASK) { + ThreadData.CurrentSignalMask.Val = *set & IgnoredSignalsMask; + } else { + return -EINVAL; } - if (!!oldset) { - *oldset = OldSet; + uint64_t HostMask = ThreadData.CurrentSignalMask.Val; + // Now actually set the host mask + // This will hide from the guest that we are not actually setting all of the masks it wants + for (size_t i = 0; i < MAX_SIGNALS; ++i) { + if (HostHandlers[i + 1].Required.load(std::memory_order_relaxed)) { + // If it is a required host signal then we can't mask it + HostMask &= ~(1ULL << i); + } } - CheckForPendingSignals(GetTLSThread()); + ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &HostMask, nullptr, 8); + } - return 0; + if (!!oldset) { + *oldset = OldSet; } - uint64_t SignalDelegator::GuestSigPending(uint64_t *set, size_t sigsetsize) { - if (sigsetsize > sizeof(uint64_t)) { - return -EINVAL; - } + CheckForPendingSignals(GetTLSThread()); - *set = ThreadData.PendingSignals; + return 0; +} - sigset_t HostSet{}; - if (sigpending(&HostSet) == 0) { - uint64_t HostSignals{}; - for (size_t i = 0; i < MAX_SIGNALS; ++i) { - if (sigismember(&HostSet, i + 1)) { - HostSignals |= (1ULL << i); - } - } +uint64_t SignalDelegator::GuestSigPending(uint64_t* set, size_t sigsetsize) { + if (sigsetsize > sizeof(uint64_t)) { + return -EINVAL; + } - // Merge the real pending signal mask as well - *set |= HostSignals; + *set = ThreadData.PendingSignals; + + sigset_t HostSet {}; + if (sigpending(&HostSet) == 0) { + uint64_t HostSignals {}; + for (size_t i = 0; i < MAX_SIGNALS; ++i) { + if (sigismember(&HostSet, i + 1)) { + HostSignals |= (1ULL << i); + } } - return 0; + + // Merge the real pending signal mask as well + *set |= HostSignals; } + return 0; +} - uint64_t SignalDelegator::GuestSigSuspend(uint64_t *set, size_t sigsetsize) { - if (sigsetsize > sizeof(uint64_t)) { - return -EINVAL; - } +uint64_t SignalDelegator::GuestSigSuspend(uint64_t* set, size_t sigsetsize) { + if (sigsetsize > sizeof(uint64_t)) { + return -EINVAL; + } - uint64_t IgnoredSignalsMask = ~((1ULL << (SIGKILL - 1)) | (1ULL << (SIGSTOP - 1))); + uint64_t IgnoredSignalsMask = ~((1ULL << (SIGKILL - 1)) | (1ULL << (SIGSTOP - 1))); - // Backup the mask - ThreadData.PreviousSuspendMask = ThreadData.CurrentSignalMask; - // Set the new mask - ThreadData.CurrentSignalMask.Val = *set & IgnoredSignalsMask; - sigset_t HostSet{}; + // Backup the mask + ThreadData.PreviousSuspendMask = ThreadData.CurrentSignalMask; + // Set the new mask + ThreadData.CurrentSignalMask.Val = *set & IgnoredSignalsMask; + sigset_t HostSet {}; - sigemptyset(&HostSet); + sigemptyset(&HostSet); - for (int32_t i = 0; i < MAX_SIGNALS; ++i) { - if (*set & (1ULL << i)) { - sigaddset(&HostSet, i + 1); - } + for (int32_t i = 0; i < MAX_SIGNALS; ++i) { + if (*set & (1ULL << i)) { + sigaddset(&HostSet, i + 1); } + } - // Additionally we must always listen to SIGNAL_FOR_PAUSE - // This technically forces us in to a race but should be fine - // SIGBUS and SIGILL can't happen so we don't need to listen for them - //sigaddset(&HostSet, SIGNAL_FOR_PAUSE); + // Additionally we must always listen to SIGNAL_FOR_PAUSE + // This technically forces us in to a race but should be fine + // SIGBUS and SIGILL can't happen so we don't need to listen for them + // sigaddset(&HostSet, SIGNAL_FOR_PAUSE); - // Spin this in a loop until we aren't sigsuspended - // This can happen in the case that the guest has sent signal that we can't block - uint64_t Result = sigsuspend(&HostSet); + // Spin this in a loop until we aren't sigsuspended + // This can happen in the case that the guest has sent signal that we can't block + uint64_t Result = sigsuspend(&HostSet); - // Restore Previous signal mask we are emulating - // XXX: Might be unsafe if the signal handler adjusted the thread's signal mask - // But since we don't support the guest adjusting the mask through the context object - // then this is safe-ish - ThreadData.CurrentSignalMask = ThreadData.PreviousSuspendMask; + // Restore Previous signal mask we are emulating + // XXX: Might be unsafe if the signal handler adjusted the thread's signal mask + // But since we don't support the guest adjusting the mask through the context object + // then this is safe-ish + ThreadData.CurrentSignalMask = ThreadData.PreviousSuspendMask; - CheckForPendingSignals(GetTLSThread()); + CheckForPendingSignals(GetTLSThread()); - return Result == -1 ? -errno : Result; + return Result == -1 ? -errno : Result; +} +uint64_t SignalDelegator::GuestSigTimedWait(uint64_t* set, siginfo_t* info, const struct timespec* timeout, size_t sigsetsize) { + if (sigsetsize > sizeof(uint64_t)) { + return -EINVAL; } - uint64_t SignalDelegator::GuestSigTimedWait(uint64_t *set, siginfo_t *info, const struct timespec *timeout, size_t sigsetsize) { - if (sigsetsize > sizeof(uint64_t)) { - return -EINVAL; - } + uint64_t Result = ::syscall(SYS_rt_sigtimedwait, set, info, timeout); - uint64_t Result = ::syscall(SYS_rt_sigtimedwait, set, info, timeout); + return Result == -1 ? -errno : Result; +} - return Result == -1 ? -errno : Result; +uint64_t SignalDelegator::GuestSignalFD(int fd, const uint64_t* set, size_t sigsetsize, int flags) { + if (sigsetsize > sizeof(uint64_t)) { + return -EINVAL; } - uint64_t SignalDelegator::GuestSignalFD(int fd, const uint64_t *set, size_t sigsetsize, int flags) { - if (sigsetsize > sizeof(uint64_t)) { - return -EINVAL; - } - - sigset_t HostSet{}; - sigemptyset(&HostSet); + sigset_t HostSet {}; + sigemptyset(&HostSet); - for (size_t i = 0; i < MAX_SIGNALS; ++i) { - if (HostHandlers[i + 1].Required.load(std::memory_order_relaxed)) { - // For now skip our internal signals - continue; - } + for (size_t i = 0; i < MAX_SIGNALS; ++i) { + if (HostHandlers[i + 1].Required.load(std::memory_order_relaxed)) { + // For now skip our internal signals + continue; + } - if (*set & (1ULL << i)) { - sigaddset(&HostSet, i + 1); - } + if (*set & (1ULL << i)) { + sigaddset(&HostSet, i + 1); } + } - // XXX: This is a barebones implementation just to get applications that listen for SIGCHLD to work - // In the future we need our own listern thread that forwards the result - // Thread is necessary to prevent deadlocks for a thread that has signaled on the same thread listening to the FD and blocking is enabled - uint64_t Result = signalfd(fd, &HostSet, flags); + // XXX: This is a barebones implementation just to get applications that listen for SIGCHLD to work + // In the future we need our own listern thread that forwards the result + // Thread is necessary to prevent deadlocks for a thread that has signaled on the same thread listening to the FD and blocking is enabled + uint64_t Result = signalfd(fd, &HostSet, flags); - return Result == -1 ? -errno : Result; - } + return Result == -1 ? -errno : Result; +} - fextl::unique_ptr CreateSignalDelegator(FEXCore::Context::Context *CTX, const std::string_view ApplicationName) { - return fextl::make_unique(CTX, ApplicationName); - } +fextl::unique_ptr CreateSignalDelegator(FEXCore::Context::Context* CTX, const std::string_view ApplicationName) { + return fextl::make_unique(CTX, ApplicationName); } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp index 94eb68cef0..f0c74940d7 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp @@ -60,14 +60,11 @@ desc: Glue logic, brk allocations namespace FEX::HLE { class SignalDelegator; -SyscallHandler *_SyscallHandler{}; +SyscallHandler* _SyscallHandler {}; template -uint64_t GetDentsEmulation(int fd, T *dirp, uint32_t count) { - uint64_t Result = syscall(SYSCALL_DEF(getdents64), - static_cast(fd), - dirp, - static_cast(count)); +uint64_t GetDentsEmulation(int fd, T* dirp, uint32_t count) { + uint64_t Result = syscall(SYSCALL_DEF(getdents64), static_cast(fd), dirp, static_cast(count)); // Now copy back in to the array we were given if (Result != -1) { @@ -76,7 +73,7 @@ uint64_t GetDentsEmulation(int fd, T *dirp, uint32_t count) { if constexpr (sizeof(decltype(FEX::HLE::x64::linux_dirent_64::d_ino)) > sizeof(decltype(T::d_ino))) { uint64_t TmpOffset = 0; while (TmpOffset < Result) { - FEX::HLE::x64::linux_dirent_64 *Tmp = (FEX::HLE::x64::linux_dirent_64*)(reinterpret_cast(dirp) + TmpOffset); + FEX::HLE::x64::linux_dirent_64* Tmp = (FEX::HLE::x64::linux_dirent_64*)(reinterpret_cast(dirp) + TmpOffset); decltype(T::d_ino) Result_d_ino = Tmp->d_ino; if (Result_d_ino != Tmp->d_ino) { @@ -95,8 +92,8 @@ uint64_t GetDentsEmulation(int fd, T *dirp, uint32_t count) { // than the data returned by getdents64. // This means FEX is guaranteed to /never/ fill the full getdents buffer to the guest, but we may temporarily use it all. while (TmpOffset < Result) { - T *Outgoing = (T*)(reinterpret_cast(dirp) + Offset); - FEX::HLE::x64::linux_dirent_64 *Tmp = (FEX::HLE::x64::linux_dirent_64*)(reinterpret_cast(dirp) + TmpOffset); + T* Outgoing = (T*)(reinterpret_cast(dirp) + Offset); + FEX::HLE::x64::linux_dirent_64* Tmp = (FEX::HLE::x64::linux_dirent_64*)(reinterpret_cast(dirp) + TmpOffset); if (!Tmp->d_reclen) { break; @@ -130,11 +127,9 @@ uint64_t GetDentsEmulation(int fd, T *dirp, uint32_t count) { SYSCALL_ERRNO(); } -template -uint64_t GetDentsEmulation(int, FEX::HLE::x64::linux_dirent*, uint32_t); +template uint64_t GetDentsEmulation(int, FEX::HLE::x64::linux_dirent*, uint32_t); -template -uint64_t GetDentsEmulation(int, FEX::HLE::x32::linux_dirent_32*, uint32_t); +template uint64_t GetDentsEmulation(int, FEX::HLE::x32::linux_dirent_32*, uint32_t); static bool IsShebangFile(std::span Data) { // File isn't large enough to even contain a shebang. @@ -143,13 +138,10 @@ static bool IsShebangFile(std::span Data) { } // Handle shebang files. - if (Data[0] == '#' && - Data[1] == '!') { - fextl::string InterpreterLine { - Data.begin() + 2, // strip off "#!" prefix - std::find(Data.begin(), Data.end(), '\n') - }; - fextl::vector ShebangArguments{}; + if (Data[0] == '#' && Data[1] == '!') { + fextl::string InterpreterLine {Data.begin() + 2, // strip off "#!" prefix + std::find(Data.begin(), Data.end(), '\n')}; + fextl::vector ShebangArguments {}; // Shebang line can have a single argument fextl::istringstream InterpreterSS(InterpreterLine); @@ -162,7 +154,7 @@ static bool IsShebangFile(std::span Data) { } // Executable argument - fextl::string &ShebangProgram = ShebangArguments[0]; + fextl::string& ShebangProgram = ShebangArguments[0]; // If the filename is absolute then prepend the rootfs // If it is relative then don't append the rootfs @@ -190,7 +182,7 @@ static bool IsShebangFD(int FD) { return IsShebangFile(std::span(Header.data(), ReadSize)); } -static bool IsShebangFilename(fextl::string const &Filename) { +static bool IsShebangFilename(const fextl::string& Filename) { // Open the Filename to determine if it is a shebang file. int FD = open(Filename.c_str(), O_RDONLY | O_CLOEXEC); if (FD == -1) { @@ -202,37 +194,33 @@ static bool IsShebangFilename(fextl::string const &Filename) { return IsShebang; } -uint64_t ExecveHandler(const char *pathname, char* const* argv, char* const* envp, ExecveAtArgs Args) { - fextl::string Filename{}; +uint64_t ExecveHandler(const char* pathname, char* const* argv, char* const* envp, ExecveAtArgs Args) { + fextl::string Filename {}; fextl::string RootFS = FEX::HLE::_SyscallHandler->RootFSPath(); - ELFLoader::ELFContainer::ELFType Type{}; + ELFLoader::ELFContainer::ELFType Type {}; // AT_EMPTY_PATH is only used if the pathname is empty. const bool IsFDExec = (Args.flags & AT_EMPTY_PATH) && strlen(pathname) == 0; const bool SupportsProcFSInterpreter = FEX::HLE::_SyscallHandler->FM.SupportsProcFSInterpreterPath(); fextl::string FDExecEnv; - bool IsShebang{}; + bool IsShebang {}; if (IsFDExec) { Type = ELFLoader::ELFContainer::GetELFType(Args.dirfd); IsShebang = IsShebangFD(Args.dirfd); - } - else - { + } else { // For absolute paths, check the rootfs first (if available) if (pathname[0] == '/') { auto Path = FEX::HLE::_SyscallHandler->FM.GetEmulatedPath(pathname, true); if (!Path.empty() && FHU::Filesystem::Exists(Path)) { Filename = Path; - } - else { + } else { Filename = pathname; } - } - else { + } else { Filename = pathname; } @@ -247,9 +235,7 @@ uint64_t ExecveHandler(const char *pathname, char* const* argv, char* const* env char PidSelfPath[50]; snprintf(PidSelfPath, 50, "/proc/%i/exe", pid); - if (strcmp(pathname, "/proc/self/exe") == 0 || - strcmp(pathname, "/proc/thread-self/exe") == 0 || - strcmp(pathname, PidSelfPath) == 0) { + if (strcmp(pathname, "/proc/self/exe") == 0 || strcmp(pathname, "/proc/thread-self/exe") == 0 || strcmp(pathname, PidSelfPath) == 0) { // If the application is trying to execve `/proc/self/exe` or its variants, // then we need to redirect this path to the true application path. // This is because this path is a symlink to the executing application, which is always `FEXInterpreter` or `FEXLoader`. @@ -275,11 +261,9 @@ uint64_t ExecveHandler(const char *pathname, char* const* argv, char* const* env // Kernel does its own checks for file format support for this // We can only call execve directly if we both have an interpreter installed AND were ran with the interpreter // If the user ran FEX through FEXLoader then we must go down the emulated path - uint64_t Result{}; - if (FEX::HLE::_SyscallHandler->IsInterpreterInstalled() && - FEX::HLE::_SyscallHandler->IsInterpreter() && - (Type == ELFLoader::ELFContainer::ELFType::TYPE_X86_32 || - Type == ELFLoader::ELFContainer::ELFType::TYPE_X86_64)) { + uint64_t Result {}; + if (FEX::HLE::_SyscallHandler->IsInterpreterInstalled() && FEX::HLE::_SyscallHandler->IsInterpreter() && + (Type == ELFLoader::ELFContainer::ELFType::TYPE_X86_32 || Type == ELFLoader::ELFContainer::ELFType::TYPE_X86_64)) { // If the FEX interpreter is installed then just execve the ELF file // This will stay inside of our emulated environment since binfmt_misc will capture it Result = ::syscall(SYS_execveat, Args.dirfd, Filename.c_str(), argv, envp, Args.flags); @@ -296,9 +280,9 @@ uint64_t ExecveHandler(const char *pathname, char* const* argv, char* const* env // We don't have an interpreter installed or we are executing a non-ELF executable // We now need to munge the arguments - fextl::vector ExecveArgs{}; - fextl::vector EnvpArgs{}; - char *const *EnvpPtr = envp; + fextl::vector ExecveArgs {}; + fextl::vector EnvpArgs {}; + char* const* EnvpPtr = envp; const char NullString[] = ""; FEX::HLE::_SyscallHandler->GetCodeLoader()->GetExecveArguments(&ExecveArgs); if (!FEX::HLE::_SyscallHandler->IsInterpreter()) { @@ -321,10 +305,9 @@ uint64_t ExecveHandler(const char *pathname, char* const* argv, char* const* env ExecveArgs.emplace_back(*OldArgv); ++OldArgv; } - } - else { + } else { // Linux kernel will stick an empty argument in to the argv list if none are provided. - ExecveArgs.emplace_back(NullString); + ExecveArgs.emplace_back(NullString); } // Emplace nullptr at the end to stop @@ -362,12 +345,11 @@ uint64_t ExecveHandler(const char *pathname, char* const* argv, char* const* env // Emplace nullptr at the end to stop EnvpArgs.emplace_back(nullptr); - EnvpPtr = const_cast(EnvpArgs.data()); + EnvpPtr = const_cast(EnvpArgs.data()); } - const char *InterpreterPath = SupportsProcFSInterpreter ? "/proc/self/interpreter" : "/proc/self/exe"; - Result = ::syscall(SYS_execveat, Args.dirfd, InterpreterPath, - const_cast(ExecveArgs.data()), EnvpPtr, Args.flags); + const char* InterpreterPath = SupportsProcFSInterpreter ? "/proc/self/interpreter" : "/proc/self/exe"; + Result = ::syscall(SYS_execveat, Args.dirfd, InterpreterPath, const_cast(ExecveArgs.data()), EnvpPtr, Args.flags); SYSCALL_ERRNO(); } @@ -381,10 +363,10 @@ static bool AllFlagsSet(uint64_t Flags, uint64_t Mask) { } struct StackFrameData { - FEXCore::Core::InternalThreadState *Thread{}; - FEXCore::Context::Context *CTX{}; - FEXCore::Core::CpuStateFrame NewFrame{}; - FEX::HLE::clone3_args GuestArgs{}; + FEXCore::Core::InternalThreadState* Thread {}; + FEXCore::Context::Context* CTX {}; + FEXCore::Core::CpuStateFrame NewFrame {}; + FEX::HLE::clone3_args GuestArgs {}; }; struct StackFramePlusRet { @@ -395,7 +377,7 @@ struct StackFramePlusRet { [[noreturn]] static void Clone3HandlerRet() { - StackFrameData *Data = (StackFrameData*)alloca(0); + StackFrameData* Data = (StackFrameData*)alloca(0); uint64_t Result = FEX::HLE::HandleNewClone(Data->Thread, Data->CTX, &Data->NewFrame, &Data->GuestArgs); FEX::LinuxEmulation::Threads::DeallocateStackObject(Data->GuestArgs.NewStack); // To behave like a real clone, we now just need to call exit here @@ -403,8 +385,8 @@ static void Clone3HandlerRet() { FEX_UNREACHABLE; } -static int Clone2HandlerRet(void *arg) { - StackFrameData *Data = (StackFrameData*)arg; +static int Clone2HandlerRet(void* arg) { + StackFrameData* Data = (StackFrameData*)arg; uint64_t Result = FEX::HLE::HandleNewClone(Data->Thread, Data->CTX, &Data->NewFrame, &Data->GuestArgs); FEX::LinuxEmulation::Threads::DeallocateStackObject(Data->GuestArgs.NewStack); FEXCore::Allocator::free(arg); @@ -423,38 +405,39 @@ static int Clone2HandlerRet(void *arg) { #define CLONE_NEWTIME 0x00000080ULL #endif -static void PrintFlags(uint64_t Flags){ -#define FLAGPRINT(x, y) if (Flags & (y)) LogMan::Msg::IFmt("\tFlag: " #x) - FLAGPRINT(CSIGNAL, 0x000000FF); - FLAGPRINT(CLONE_VM, 0x00000100); - FLAGPRINT(CLONE_FS, 0x00000200); - FLAGPRINT(CLONE_FILES, 0x00000400); - FLAGPRINT(CLONE_SIGHAND, 0x00000800); - FLAGPRINT(CLONE_PTRACE, 0x00002000); - FLAGPRINT(CLONE_VFORK, 0x00004000); - FLAGPRINT(CLONE_PARENT, 0x00008000); - FLAGPRINT(CLONE_THREAD, 0x00010000); - FLAGPRINT(CLONE_NEWNS, 0x00020000); - FLAGPRINT(CLONE_SYSVSEM, 0x00040000); - FLAGPRINT(CLONE_SETTLS, 0x00080000); - FLAGPRINT(CLONE_PARENT_SETTID, 0x00100000); +static void PrintFlags(uint64_t Flags) { +#define FLAGPRINT(x, y) \ + if (Flags & (y)) LogMan::Msg::IFmt("\tFlag: " #x) + FLAGPRINT(CSIGNAL, 0x000000FF); + FLAGPRINT(CLONE_VM, 0x00000100); + FLAGPRINT(CLONE_FS, 0x00000200); + FLAGPRINT(CLONE_FILES, 0x00000400); + FLAGPRINT(CLONE_SIGHAND, 0x00000800); + FLAGPRINT(CLONE_PTRACE, 0x00002000); + FLAGPRINT(CLONE_VFORK, 0x00004000); + FLAGPRINT(CLONE_PARENT, 0x00008000); + FLAGPRINT(CLONE_THREAD, 0x00010000); + FLAGPRINT(CLONE_NEWNS, 0x00020000); + FLAGPRINT(CLONE_SYSVSEM, 0x00040000); + FLAGPRINT(CLONE_SETTLS, 0x00080000); + FLAGPRINT(CLONE_PARENT_SETTID, 0x00100000); FLAGPRINT(CLONE_CHILD_CLEARTID, 0x00200000); - FLAGPRINT(CLONE_DETACHED, 0x00400000); - FLAGPRINT(CLONE_UNTRACED, 0x00800000); - FLAGPRINT(CLONE_CHILD_SETTID, 0x01000000); - FLAGPRINT(CLONE_NEWCGROUP, 0x02000000); - FLAGPRINT(CLONE_NEWUTS, 0x04000000); - FLAGPRINT(CLONE_NEWIPC, 0x08000000); - FLAGPRINT(CLONE_NEWUSER, 0x10000000); - FLAGPRINT(CLONE_NEWPID, 0x20000000); - FLAGPRINT(CLONE_NEWNET, 0x40000000); - FLAGPRINT(CLONE_IO, 0x80000000); - FLAGPRINT(CLONE_PIDFD, 0x00001000); + FLAGPRINT(CLONE_DETACHED, 0x00400000); + FLAGPRINT(CLONE_UNTRACED, 0x00800000); + FLAGPRINT(CLONE_CHILD_SETTID, 0x01000000); + FLAGPRINT(CLONE_NEWCGROUP, 0x02000000); + FLAGPRINT(CLONE_NEWUTS, 0x04000000); + FLAGPRINT(CLONE_NEWIPC, 0x08000000); + FLAGPRINT(CLONE_NEWUSER, 0x10000000); + FLAGPRINT(CLONE_NEWPID, 0x20000000); + FLAGPRINT(CLONE_NEWNET, 0x40000000); + FLAGPRINT(CLONE_IO, 0x80000000); + FLAGPRINT(CLONE_PIDFD, 0x00001000); #undef FLAGPRINT }; -static uint64_t Clone2Handler(FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::clone3_args *args) { - StackFrameData *Data = (StackFrameData *)FEXCore::Allocator::malloc(sizeof(StackFrameData)); +static uint64_t Clone2Handler(FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::clone3_args* args) { + StackFrameData* Data = (StackFrameData*)FEXCore::Allocator::malloc(sizeof(StackFrameData)); Data->Thread = Frame->Thread; Data->CTX = Frame->Thread->CTX; Data->GuestArgs = *args; @@ -463,43 +446,41 @@ static uint64_t Clone2Handler(FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::clo memcpy(&Data->NewFrame, Frame, sizeof(FEXCore::Core::CpuStateFrame)); // Remove flags that will break us - constexpr uint64_t INVALID_FOR_HOST = - CLONE_SETTLS; + constexpr uint64_t INVALID_FOR_HOST = CLONE_SETTLS; uint64_t Flags = args->args.flags & ~INVALID_FOR_HOST; - uint64_t Result = ::clone( - Clone2HandlerRet, // To be called function - (void*)((uint64_t)args->NewStack + args->StackSize), // Stack - Flags, //Flags - Data, //Argument - (pid_t*)args->args.parent_tid, // parent_tid - 0, // XXX: What is correct for this? tls - (pid_t*)args->args.child_tid); // child_tid + uint64_t Result = ::clone(Clone2HandlerRet, // To be called function + (void*)((uint64_t)args->NewStack + args->StackSize), // Stack + Flags, // Flags + Data, // Argument + (pid_t*)args->args.parent_tid, // parent_tid + 0, // XXX: What is correct for this? tls + (pid_t*)args->args.child_tid); // child_tid // Only parent will get here SYSCALL_ERRNO(); } -static uint64_t Clone3Handler(FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::clone3_args *args) { +static uint64_t Clone3Handler(FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::clone3_args* args) { constexpr size_t Offset = sizeof(StackFramePlusRet); - StackFramePlusRet *Data = (StackFramePlusRet*)(reinterpret_cast(args->NewStack) + args->StackSize - Offset); + StackFramePlusRet* Data = (StackFramePlusRet*)(reinterpret_cast(args->NewStack) + args->StackSize - Offset); Data->Ret = (uint64_t)Clone3HandlerRet; Data->Data.Thread = Frame->Thread; Data->Data.CTX = Frame->Thread->CTX; Data->Data.GuestArgs = *args; - FEX::HLE::kernel_clone3_args HostArgs{}; - HostArgs.flags = args->args.flags; - HostArgs.pidfd = args->args.pidfd; - HostArgs.child_tid = args->args.child_tid; - HostArgs.parent_tid = args->args.parent_tid; + FEX::HLE::kernel_clone3_args HostArgs {}; + HostArgs.flags = args->args.flags; + HostArgs.pidfd = args->args.pidfd; + HostArgs.child_tid = args->args.child_tid; + HostArgs.parent_tid = args->args.parent_tid; HostArgs.exit_signal = args->args.exit_signal; // Host stack is always created - HostArgs.stack = reinterpret_cast(args->NewStack); - HostArgs.stack_size = args->StackSize - Offset; // Needs to be 16 byte aligned - HostArgs.tls = 0; // XXX: What is correct for this? - HostArgs.set_tid = args->args.set_tid; - HostArgs.set_tid_size= args->args.set_tid_size; - HostArgs.cgroup = args->args.cgroup; + HostArgs.stack = reinterpret_cast(args->NewStack); + HostArgs.stack_size = args->StackSize - Offset; // Needs to be 16 byte aligned + HostArgs.tls = 0; // XXX: What is correct for this? + HostArgs.set_tid = args->args.set_tid; + HostArgs.set_tid_size = args->args.set_tid_size; + HostArgs.cgroup = args->args.cgroup; // Create a copy of the parent frame memcpy(&Data->Data.NewFrame, Frame, sizeof(FEXCore::Core::CpuStateFrame)); @@ -509,7 +490,7 @@ static uint64_t Clone3Handler(FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::clo SYSCALL_ERRNO(); }; -uint64_t CloneHandler(FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::clone3_args *args) { +uint64_t CloneHandler(FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::clone3_args* args) { uint64_t flags = args->args.flags; if (flags & CLONE_CLEAR_SIGHAND) { @@ -520,20 +501,11 @@ uint64_t CloneHandler(FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::clone3_args return -EINVAL; } - auto HasUnhandledFlags = [](FEX::HLE::clone3_args *args) -> bool { - constexpr uint64_t UNHANDLED_FLAGS = - CLONE_NEWNS | - // CLONE_UNTRACED | - CLONE_NEWCGROUP | - CLONE_NEWUTS | - CLONE_NEWUTS | - CLONE_NEWIPC | - CLONE_NEWUSER | - CLONE_NEWPID | - CLONE_NEWNET | - CLONE_IO | - CLONE_CLEAR_SIGHAND | - CLONE_INTO_CGROUP; + auto HasUnhandledFlags = [](FEX::HLE::clone3_args* args) -> bool { + constexpr uint64_t UNHANDLED_FLAGS = CLONE_NEWNS | + // CLONE_UNTRACED | + CLONE_NEWCGROUP | CLONE_NEWUTS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWPID | + CLONE_NEWNET | CLONE_IO | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP; if ((args->args.flags & UNHANDLED_FLAGS) != 0) { // Basic unhandled flags @@ -553,12 +525,11 @@ uint64_t CloneHandler(FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::clone3_args } if (AnyFlagsSet(args->args.flags, CLONE_THREAD)) { - if (!AllFlagsSet(args->args.flags, CLONE_SYSVSEM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND)) { + if (!AllFlagsSet(args->args.flags, CLONE_SYSVSEM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND)) { LogMan::Msg::IFmt("clone: CLONE_THREAD: Unsuported flags w/ CLONE_THREAD (Shared Resources), {:X}", args->args.flags); return false; } - } - else { + } else { if (AnyFlagsSet(args->args.flags, CLONE_SYSVSEM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_VM)) { // CLONE_VM is particularly nasty here // Memory regions at the point of clone(More similar to a fork) are shared @@ -591,11 +562,10 @@ uint64_t CloneHandler(FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::clone3_args FEX::HLE::_SyscallHandler->LockBeforeFork(Frame->Thread); - uint64_t Result{}; + uint64_t Result {}; if (args->Type == TYPE_CLONE2) { Result = Clone2Handler(Frame, args); - } - else { + } else { Result = Clone3Handler(Frame, args); } @@ -607,16 +577,14 @@ uint64_t CloneHandler(FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::clone3_args ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &args->SignalMask, nullptr, sizeof(args->SignalMask)); } return Result; - } - else { + } else { LogMan::Msg::IFmt("Unsupported flag with CLONE_THREAD. This breaks TLS, falling down classic thread path"); PrintFlags(flags); } } constexpr uint64_t TASK_MAX = (1ULL << 48); // 48-bits until we can query the host side VA sanely. AArch64 doesn't expose this in cpuinfo - if (args->args.tls && - args->args.tls >= TASK_MAX) { + if (args->args.tls && args->args.tls >= TASK_MAX) { return -EPERM; } @@ -629,12 +597,9 @@ uint64_t CloneHandler(FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::clone3_args if (!(flags & CLONE_THREAD)) { // CLONE_PARENT is ignored (Implied by CLONE_THREAD) - return FEX::HLE::ForkGuest(Thread, Frame, flags, - reinterpret_cast(args->args.stack), - args->args.stack_size, - reinterpret_cast(args->args.parent_tid), - reinterpret_cast(args->args.child_tid), - reinterpret_cast(args->args.tls)); + return FEX::HLE::ForkGuest(Thread, Frame, flags, reinterpret_cast(args->args.stack), args->args.stack_size, + reinterpret_cast(args->args.parent_tid), reinterpret_cast(args->args.child_tid), + reinterpret_cast(args->args.tls)); } else { auto NewThread = FEX::HLE::CreateNewThread(Thread->CTX, Frame, args); @@ -656,23 +621,21 @@ uint64_t CloneHandler(FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::clone3_args } }; -uint64_t SyscallHandler::HandleBRK(FEXCore::Core::CpuStateFrame *Frame, void *Addr) { +uint64_t SyscallHandler::HandleBRK(FEXCore::Core::CpuStateFrame* Frame, void* Addr) { std::lock_guard lk(MMapMutex); uint64_t Result; if (Addr == nullptr) { // Just wants to get the location of the program break atm Result = DataSpace + DataSpaceSize; - } - else { + } else { // Allocating out data space uint64_t NewEnd = reinterpret_cast(Addr); if (NewEnd < DataSpace) { // Not allowed to move brk end below original start // Set the size to zero DataSpaceSize = 0; - } - else { + } else { uint64_t NewSize = NewEnd - DataSpace; uint64_t NewSizeAligned = FEXCore::AlignUp(NewSize, 4096); @@ -687,18 +650,17 @@ uint64_t SyscallHandler::HandleBRK(FEXCore::Core::CpuStateFrame *Frame, void *Ad LOGMAN_THROW_A_FMT(ok != -1, "Munmap failed"); DataSpaceMaxSize = NewSizeAligned; - } - else if (NewSize > DataSpaceMaxSize) { + } else if (NewSize > DataSpaceMaxSize) { constexpr static uint64_t SizeAlignment = 8 * 1024 * 1024; uint64_t AllocateNewSize = FEXCore::AlignUp(NewSize, SizeAlignment) - DataSpaceMaxSize; - if (!Is64BitMode() && - (DataSpace + DataSpaceMaxSize + AllocateNewSize > 0x1'0000'0000ULL)) { + if (!Is64BitMode() && (DataSpace + DataSpaceMaxSize + AllocateNewSize > 0x1'0000'0000ULL)) { // If we are 32bit and we tried going about the 32bit limit then out of memory return DataSpace + DataSpaceSize; } - uint64_t NewBRK{}; - NewBRK = (uint64_t)GuestMmap(Frame->Thread, (void*)(DataSpace + DataSpaceMaxSize), AllocateNewSize, PROT_READ | PROT_WRITE, MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + uint64_t NewBRK {}; + NewBRK = (uint64_t)GuestMmap(Frame->Thread, (void*)(DataSpace + DataSpaceMaxSize), AllocateNewSize, PROT_READ | PROT_WRITE, + MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (NewBRK != ~0ULL && NewBRK != (DataSpace + DataSpaceMaxSize)) { @@ -712,8 +674,7 @@ uint64_t SyscallHandler::HandleBRK(FEXCore::Core::CpuStateFrame *Frame, void *Ad if (NewBRK == ~0ULL) { // If we couldn't allocate a new region then out of memory return DataSpace + DataSpaceSize; - } - else { + } else { // Increase our BRK size DataSpaceMaxSize += AllocateNewSize; } @@ -732,7 +693,7 @@ void SyscallHandler::DefaultProgramBreak(uint64_t Base, uint64_t Size) { DataSpaceStartingSize = Size; } -SyscallHandler::SyscallHandler(FEXCore::Context::Context *_CTX, FEX::HLE::SignalDelegator *_SignalDelegation) +SyscallHandler::SyscallHandler(FEXCore::Context::Context* _CTX, FEX::HLE::SignalDelegator* _SignalDelegation) : TM {_CTX, _SignalDelegation} , FM {_CTX} , CTX {_CTX} @@ -750,14 +711,14 @@ SyscallHandler::~SyscallHandler() { } uint32_t SyscallHandler::CalculateHostKernelVersion() { - struct utsname buf{}; + struct utsname buf {}; if (uname(&buf) == -1) { return 0; } - uint32_t Major{}; - uint32_t Minor{}; - uint32_t Patch{}; + uint32_t Major {}; + uint32_t Minor {}; + uint32_t Patch {}; // Parse kernel version in the form of `..[Optional Data]` const auto End = buf.release + sizeof(buf.release); @@ -773,27 +734,32 @@ uint32_t SyscallHandler::CalculateGuestKernelVersion() { return std::max(KernelVersion(5, 0), std::min(KernelVersion(6, 6), GetHostKernelVersion())); } -uint64_t SyscallHandler::HandleSyscall(FEXCore::Core::CpuStateFrame *Frame, FEXCore::HLE::SyscallArguments *Args) { +uint64_t SyscallHandler::HandleSyscall(FEXCore::Core::CpuStateFrame* Frame, FEXCore::HLE::SyscallArguments* Args) { if (Args->Argument[0] >= Definitions.size()) { return -ENOSYS; } - auto &Def = Definitions[Args->Argument[0]]; - uint64_t Result{}; + auto& Def = Definitions[Args->Argument[0]]; + uint64_t Result {}; switch (Def.NumArgs) { case 0: Result = std::invoke(Def.Ptr0, Frame); break; case 1: Result = std::invoke(Def.Ptr1, Frame, Args->Argument[1]); break; case 2: Result = std::invoke(Def.Ptr2, Frame, Args->Argument[1], Args->Argument[2]); break; case 3: Result = std::invoke(Def.Ptr3, Frame, Args->Argument[1], Args->Argument[2], Args->Argument[3]); break; case 4: Result = std::invoke(Def.Ptr4, Frame, Args->Argument[1], Args->Argument[2], Args->Argument[3], Args->Argument[4]); break; - case 5: Result = std::invoke(Def.Ptr5, Frame, Args->Argument[1], Args->Argument[2], Args->Argument[3], Args->Argument[4], Args->Argument[5]); break; - case 6: Result = std::invoke(Def.Ptr6, Frame, Args->Argument[1], Args->Argument[2], Args->Argument[3], Args->Argument[4], Args->Argument[5], Args->Argument[6]); break; + case 5: + Result = std::invoke(Def.Ptr5, Frame, Args->Argument[1], Args->Argument[2], Args->Argument[3], Args->Argument[4], Args->Argument[5]); + break; + case 6: + Result = std::invoke(Def.Ptr6, Frame, Args->Argument[1], Args->Argument[2], Args->Argument[3], Args->Argument[4], Args->Argument[5], + Args->Argument[6]); + break; // for missing syscalls case 255: return std::invoke(Def.Ptr1, Frame, Args->Argument[0]); default: LOGMAN_MSG_A_FMT("Unhandled syscall: {}", Args->Argument[0]); return -1; - break; + break; } #ifdef DEBUG_STRACE Strace(Args, Result); @@ -802,40 +768,44 @@ uint64_t SyscallHandler::HandleSyscall(FEXCore::Core::CpuStateFrame *Frame, FEXC } #ifdef DEBUG_STRACE -void SyscallHandler::Strace(FEXCore::HLE::SyscallArguments *Args, uint64_t Ret) { - auto &Def = Definitions[Args->Argument[0]]; +void SyscallHandler::Strace(FEXCore::HLE::SyscallArguments* Args, uint64_t Ret) { + auto& Def = Definitions[Args->Argument[0]]; switch (Def.NumArgs) { - case 0: LogMan::Msg::D(Def.StraceFmt.c_str(), Ret); break; - case 1: LogMan::Msg::D(Def.StraceFmt.c_str(), Args->Argument[1], Ret); break; - case 2: LogMan::Msg::D(Def.StraceFmt.c_str(), Args->Argument[1], Args->Argument[2], Ret); break; - case 3: LogMan::Msg::D(Def.StraceFmt.c_str(), Args->Argument[1], Args->Argument[2], Args->Argument[3], Ret); break; - case 4: LogMan::Msg::D(Def.StraceFmt.c_str(), Args->Argument[1], Args->Argument[2], Args->Argument[3], Args->Argument[4], Ret); break; - case 5: LogMan::Msg::D(Def.StraceFmt.c_str(), Args->Argument[1], Args->Argument[2], Args->Argument[3], Args->Argument[4], Args->Argument[5], Ret); break; - case 6: LogMan::Msg::D(Def.StraceFmt.c_str(), Args->Argument[1], Args->Argument[2], Args->Argument[3], Args->Argument[4], Args->Argument[5], Args->Argument[6], Ret); break; - default: break; + case 0: LogMan::Msg::D(Def.StraceFmt.c_str(), Ret); break; + case 1: LogMan::Msg::D(Def.StraceFmt.c_str(), Args->Argument[1], Ret); break; + case 2: LogMan::Msg::D(Def.StraceFmt.c_str(), Args->Argument[1], Args->Argument[2], Ret); break; + case 3: LogMan::Msg::D(Def.StraceFmt.c_str(), Args->Argument[1], Args->Argument[2], Args->Argument[3], Ret); break; + case 4: LogMan::Msg::D(Def.StraceFmt.c_str(), Args->Argument[1], Args->Argument[2], Args->Argument[3], Args->Argument[4], Ret); break; + case 5: + LogMan::Msg::D(Def.StraceFmt.c_str(), Args->Argument[1], Args->Argument[2], Args->Argument[3], Args->Argument[4], Args->Argument[5], Ret); + break; + case 6: + LogMan::Msg::D(Def.StraceFmt.c_str(), Args->Argument[1], Args->Argument[2], Args->Argument[3], Args->Argument[4], Args->Argument[5], + Args->Argument[6], Ret); + break; + default: break; } } #endif -uint64_t UnimplementedSyscall(FEXCore::Core::CpuStateFrame *Frame, uint64_t SyscallNumber) { +uint64_t UnimplementedSyscall(FEXCore::Core::CpuStateFrame* Frame, uint64_t SyscallNumber) { ERROR_AND_DIE_FMT("Unhandled system call: {}", SyscallNumber); return -ENOSYS; } -uint64_t UnimplementedSyscallSafe(FEXCore::Core::CpuStateFrame *Frame, uint64_t SyscallNumber) { +uint64_t UnimplementedSyscallSafe(FEXCore::Core::CpuStateFrame* Frame, uint64_t SyscallNumber) { return -ENOSYS; } -void SyscallHandler::LockBeforeFork(FEXCore::Core::InternalThreadState *Thread) { +void SyscallHandler::LockBeforeFork(FEXCore::Core::InternalThreadState* Thread) { Thread->CTX->LockBeforeFork(Thread); VMATracking.Mutex.lock(); } -void SyscallHandler::UnlockAfterFork(FEXCore::Core::InternalThreadState *LiveThread, bool Child) { +void SyscallHandler::UnlockAfterFork(FEXCore::Core::InternalThreadState* LiveThread, bool Child) { if (Child) { VMATracking.Mutex.StealAndDropActiveLocks(); - } - else { + } else { VMATracking.Mutex.unlock(); } @@ -849,7 +819,8 @@ static bool isHEX(char c) { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'); } -fextl::unique_ptr SyscallHandler::GenerateMap(const std::string_view& GuestBinaryFile, const std::string_view& GuestBinaryFileId) { +fextl::unique_ptr +SyscallHandler::GenerateMap(const std::string_view& GuestBinaryFile, const std::string_view& GuestBinaryFileId) { ELFParser GuestELF; @@ -927,7 +898,7 @@ fextl::unique_ptr SyscallHandler::GenerateMap(const rv->SortedLineMappings.resize(len); - for (auto &Mapping: rv->SortedLineMappings) { + for (auto& Mapping : rv->SortedLineMappings) { ::read(FD, (char*)&Mapping.FileGuestBegin, sizeof(Mapping.FileGuestBegin)); ::read(FD, (char*)&Mapping.FileGuestEnd, sizeof(Mapping.FileGuestEnd)); ::read(FD, (char*)&Mapping.LineNumber, sizeof(Mapping.LineNumber)); @@ -941,7 +912,7 @@ fextl::unique_ptr SyscallHandler::GenerateMap(const rv->SortedSymbolMappings.resize(len); - for (auto &Mapping: rv->SortedSymbolMappings) { + for (auto& Mapping : rv->SortedSymbolMappings) { ::read(FD, (char*)&Mapping.FileGuestBegin, sizeof(Mapping.FileGuestBegin)); ::read(FD, (char*)&Mapping.FileGuestEnd, sizeof(Mapping.FileGuestEnd)); @@ -958,8 +929,8 @@ fextl::unique_ptr SyscallHandler::GenerateMap(const close(FD); return rv; } else { - // objdump output parsing, index generation, index file serialization - DoGenerate: +// objdump output parsing, index generation, index file serialization +DoGenerate: LogMan::Msg::DFmt("GenerateMap: Generating index for '{}'", GuestSourceFile); int StreamFD = ::open(GuestSourceFile.c_str(), O_RDONLY | O_CLOEXEC); @@ -990,12 +961,12 @@ fextl::unique_ptr SyscallHandler::GenerateMap(const bool PreviousLineWasEmpty = false; - uintptr_t LastSymbolOffset{}; - uintptr_t CurrentSymbolOffset{}; + uintptr_t LastSymbolOffset {}; + uintptr_t CurrentSymbolOffset {}; fextl::string LastSymbolName; - uintptr_t LastOffset{}; - uintptr_t CurrentOffset{}; + uintptr_t LastOffset {}; + uintptr_t CurrentOffset {}; int LastOffsetLine; auto rv = fextl::make_unique(); @@ -1037,10 +1008,12 @@ fextl::unique_ptr SyscallHandler::GenerateMap(const for (; !isspace(Line[offs]) && offs < Line.size(); offs++) ; - if (offs == Line.size()) + if (offs == Line.size()) { continue; - if (offs != 8 && offs != 16) + } + if (offs != 8 && offs != 16) { continue; + } auto VAOffset = std::strtoul(Line.substr(0, offs).c_str(), nullptr, 16); @@ -1060,8 +1033,9 @@ fextl::unique_ptr SyscallHandler::GenerateMap(const for (; Line[offs] != '<' && offs < Line.size(); offs++) ; - if (offs == Line.size()) + if (offs == Line.size()) { continue; + } offs++; @@ -1073,16 +1047,18 @@ fextl::unique_ptr SyscallHandler::GenerateMap(const for (; isspace(Line[offs]) && offs < Line.size(); offs++) ; - if (offs == Line.size()) + if (offs == Line.size()) { continue; + } int start = offs; for (; Line[offs] != ':' && offs < Line.size(); offs++) ; - if (offs == Line.size()) + if (offs == Line.size()) { continue; + } if (Line[offs + 1] == '\t') { auto VAOffsetStr = Line.substr(start, offs - start); @@ -1116,10 +1092,10 @@ fextl::unique_ptr SyscallHandler::GenerateMap(const // Index post processing - entires are sorted for faster lookups std::sort(rv->SortedLineMappings.begin(), rv->SortedLineMappings.end(), - [](const auto &lhs, const auto &rhs) { return lhs.FileGuestEnd <= rhs.FileGuestBegin; }); + [](const auto& lhs, const auto& rhs) { return lhs.FileGuestEnd <= rhs.FileGuestBegin; }); std::sort(rv->SortedSymbolMappings.begin(), rv->SortedSymbolMappings.end(), - [](const auto &lhs, const auto &rhs) { return lhs.FileGuestEnd <= rhs.FileGuestBegin; }); + [](const auto& lhs, const auto& rhs) { return lhs.FileGuestEnd <= rhs.FileGuestBegin; }); // Index serialization { @@ -1133,7 +1109,7 @@ fextl::unique_ptr SyscallHandler::GenerateMap(const ::write(IndexStream, (const char*)&len, sizeof(len)); - for (const auto &Mapping: rv->SortedLineMappings) { + for (const auto& Mapping : rv->SortedLineMappings) { ::write(IndexStream, (const char*)&Mapping.FileGuestBegin, sizeof(Mapping.FileGuestBegin)); ::write(IndexStream, (const char*)&Mapping.FileGuestEnd, sizeof(Mapping.FileGuestEnd)); ::write(IndexStream, (const char*)&Mapping.LineNumber, sizeof(Mapping.LineNumber)); @@ -1145,7 +1121,7 @@ fextl::unique_ptr SyscallHandler::GenerateMap(const ::write(IndexStream, (char*)&len, sizeof(len)); - for (const auto &Mapping: rv->SortedSymbolMappings) { + for (const auto& Mapping : rv->SortedSymbolMappings) { ::write(IndexStream, (const char*)&Mapping.FileGuestBegin, sizeof(Mapping.FileGuestBegin)); ::write(IndexStream, (const char*)&Mapping.FileGuestEnd, sizeof(Mapping.FileGuestEnd)); @@ -1168,8 +1144,6 @@ fextl::unique_ptr SyscallHandler::GenerateMap(const LogMan::Msg::DFmt("GenerateMap: Finished generating index", GuestIndexFile); return rv; } - - } -} +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/EPoll.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/EPoll.cpp index e214bfb0fa..2c95954b4a 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/EPoll.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/EPoll.cpp @@ -16,19 +16,19 @@ tags: LinuxSyscalls|syscalls-shared #include namespace FEX::HLE { - void RegisterEpoll(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; +void RegisterEpoll(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; - REGISTER_SYSCALL_IMPL_PASS_FLAGS(epoll_create, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int size) -> uint64_t { - uint64_t Result = epoll_create(size); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(epoll_create, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int size) -> uint64_t { + uint64_t Result = epoll_create(size); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(epoll_create1, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int flags) -> uint64_t { - uint64_t Result = epoll_create1(flags); - SYSCALL_ERRNO(); - }); - } + REGISTER_SYSCALL_IMPL_PASS_FLAGS(epoll_create1, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int flags) -> uint64_t { + uint64_t Result = epoll_create1(flags); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/FD.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/FD.cpp index fb3806d5ba..02a30d347b 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/FD.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/FD.cpp @@ -29,409 +29,411 @@ tags: LinuxSyscalls|syscalls-shared #include namespace FEX::HLE { - void RegisterFD(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(read, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd, void *buf, size_t count) -> uint64_t { - uint64_t Result = ::read(fd, buf, count); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(write, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd, void *buf, size_t count) -> uint64_t { - uint64_t Result = ::write(fd, buf, count); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_FLAGS(open, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname, int flags, uint32_t mode) -> uint64_t { - flags = FEX::HLE::RemapFromX86Flags(flags); - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Open(pathname, flags, mode); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_FLAGS(close, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Close(fd); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(chown, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname, uid_t owner, gid_t group) -> uint64_t { - uint64_t Result = ::chown(pathname, owner, group); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fchown, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd, uid_t owner, gid_t group) -> uint64_t { - uint64_t Result = ::fchown(fd, owner, group); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(lchown, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname, uid_t owner, gid_t group) -> uint64_t { - uint64_t Result = ::lchown(pathname, owner, group); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(lseek, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd, uint64_t offset, int whence) -> uint64_t { - uint64_t Result = ::lseek(fd, offset, whence); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_FLAGS(access, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname, int mode) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Access(pathname, mode); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(pipe, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int pipefd[2]) -> uint64_t { - uint64_t Result = ::pipe(pipefd); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_FLAGS(dup3, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame* Frame, int oldfd, int newfd, int flags) -> uint64_t { - flags = FEX::HLE::RemapFromX86Flags(flags); - uint64_t Result = ::dup3(oldfd, newfd, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(flock, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd, int operation) -> uint64_t { - uint64_t Result = ::flock(fd, operation); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fsync, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd) -> uint64_t { - uint64_t Result = ::fsync(fd); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fdatasync, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd) -> uint64_t { - uint64_t Result = ::fdatasync(fd); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(ftruncate, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd, off_t length) -> uint64_t { - uint64_t Result = ::ftruncate(fd, length); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fchmod, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd, int mode) -> uint64_t { - uint64_t Result = ::fchmod(fd, mode); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fadvise64, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd, off_t offset, off_t len, int advice) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(fadvise64), fd, offset, len, advice); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(inotify_init, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = ::inotify_init(); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(inotify_add_watch, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd, const char *pathname, uint32_t mask) -> uint64_t { - uint64_t Result = ::inotify_add_watch(fd, pathname, mask); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(inotify_rm_watch, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd, int wd) -> uint64_t { - uint64_t Result = ::inotify_rm_watch(fd, wd); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_FLAGS(openat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int dirfs, const char *pathname, int flags, uint32_t mode) -> uint64_t { - flags = FEX::HLE::RemapFromX86Flags(flags); - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Openat(dirfs, pathname, flags, mode); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(mkdirat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, mode_t mode) -> uint64_t { - uint64_t Result = ::mkdirat(dirfd, pathname, mode); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(mknodat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, mode_t mode, dev_t dev) -> uint64_t { - uint64_t Result = ::mknodat(dirfd, pathname, mode, dev); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fchownat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, uid_t owner, gid_t group, int flags) -> uint64_t { - // Flags don't need remapped - uint64_t Result = ::fchownat(dirfd, pathname, owner, group, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(unlinkat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, int flags) -> uint64_t { - // Flags don't need remapped - uint64_t Result = ::unlinkat(dirfd, pathname, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(renameat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int olddirfd, const char *oldpath, int newdirfd, const char *newpath) -> uint64_t { - uint64_t Result = ::renameat(olddirfd, oldpath, newdirfd, newpath); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(linkat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int olddirfd, const char *oldpath, int newdirfd, const char *newpath, int flags) -> uint64_t { - // Flags don't need remapped - uint64_t Result = ::linkat(olddirfd, oldpath, newdirfd, newpath, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(symlinkat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *target, int newdirfd, const char *linkpath) -> uint64_t { - uint64_t Result = ::symlinkat(target, newdirfd, linkpath); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_FLAGS(readlinkat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, char *buf, size_t bufsiz) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Readlinkat(dirfd, pathname, buf, bufsiz); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fchmodat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, mode_t mode) -> uint64_t { - uint64_t Result = syscall(SYSCALL_DEF(fchmodat), dirfd, pathname, mode); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_FLAGS(faccessat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, int mode) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->FM.FAccessat(dirfd, pathname, mode); - SYSCALL_ERRNO(); - }); - - if (Handler->IsHostKernelVersionAtLeast(5, 8, 0)) { - // Only exists on kernel 5.8+ - REGISTER_SYSCALL_IMPL_FLAGS(faccessat2, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, int mode, int flags) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->FM.FAccessat2(dirfd, pathname, mode, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(pidfd_getfd, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int pidfd, int fd, unsigned int flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(pidfd_getfd), pidfd, fd, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_FLAGS(openat2, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int dirfs, const char *pathname, struct open_how *how, size_t usize) -> uint64_t { - open_how HostHow{}; - size_t HostSize = std::min(sizeof(open_how), usize); - memcpy(&HostHow, how, HostSize); - - HostHow.flags = FEX::HLE::RemapFromX86Flags(HostHow.flags); - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Openat2(dirfs, pathname, &HostHow, HostSize); - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL(faccessat2, UnimplementedSyscallSafe); - REGISTER_SYSCALL_IMPL(pidfd_getfd, UnimplementedSyscallSafe); - REGISTER_SYSCALL_IMPL(openat2, UnimplementedSyscallSafe); - } - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(splice, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags) -> uint64_t { - // Flags don't need remapped - uint64_t Result = ::splice(fd_in, off_in, fd_out, off_out, len, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(tee, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd_in, int fd_out, size_t len, unsigned int flags) -> uint64_t { - // Flags don't need remapped - uint64_t Result = ::tee(fd_in, fd_out, len, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(timerfd_create, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int32_t clockid, int32_t flags) -> uint64_t { - // Flags don't need remapped - uint64_t Result = ::timerfd_create(clockid, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(eventfd, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, uint32_t count) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(eventfd2), count, 0); - SYSCALL_ERRNO(); - }); +void RegisterFD(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(read, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, void* buf, size_t count) -> uint64_t { + uint64_t Result = ::read(fd, buf, count); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(write, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, void* buf, size_t count) -> uint64_t { + uint64_t Result = ::write(fd, buf, count); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(open, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, int flags, uint32_t mode) -> uint64_t { + flags = FEX::HLE::RemapFromX86Flags(flags); + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Open(pathname, flags, mode); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(close, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Close(fd); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(chown, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, uid_t owner, gid_t group) -> uint64_t { + uint64_t Result = ::chown(pathname, owner, group); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(fchown, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, uid_t owner, gid_t group) -> uint64_t { + uint64_t Result = ::fchown(fd, owner, group); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(lchown, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, uid_t owner, gid_t group) -> uint64_t { + uint64_t Result = ::lchown(pathname, owner, group); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(lseek, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, uint64_t offset, int whence) -> uint64_t { + uint64_t Result = ::lseek(fd, offset, whence); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(access, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, int mode) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Access(pathname, mode); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(pipe, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int pipefd[2]) -> uint64_t { + uint64_t Result = ::pipe(pipefd); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(dup3, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int oldfd, int newfd, int flags) -> uint64_t { + flags = FEX::HLE::RemapFromX86Flags(flags); + uint64_t Result = ::dup3(oldfd, newfd, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(flock, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, int operation) -> uint64_t { + uint64_t Result = ::flock(fd, operation); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(fsync, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd) -> uint64_t { + uint64_t Result = ::fsync(fd); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(fdatasync, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd) -> uint64_t { + uint64_t Result = ::fdatasync(fd); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(ftruncate, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, off_t length) -> uint64_t { + uint64_t Result = ::ftruncate(fd, length); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(fchmod, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, int mode) -> uint64_t { + uint64_t Result = ::fchmod(fd, mode); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(fadvise64, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, off_t offset, off_t len, int advice) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(fadvise64), fd, offset, len, advice); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(inotify_init, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = ::inotify_init(); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(inotify_add_watch, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, const char* pathname, uint32_t mask) -> uint64_t { + uint64_t Result = ::inotify_add_watch(fd, pathname, mask); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(inotify_rm_watch, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, int wd) -> uint64_t { + uint64_t Result = ::inotify_rm_watch(fd, wd); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(openat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int dirfs, const char* pathname, int flags, uint32_t mode) -> uint64_t { + flags = FEX::HLE::RemapFromX86Flags(flags); + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Openat(dirfs, pathname, flags, mode); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(mkdirat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, mode_t mode) -> uint64_t { + uint64_t Result = ::mkdirat(dirfd, pathname, mode); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(mknodat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, mode_t mode, dev_t dev) -> uint64_t { + uint64_t Result = ::mknodat(dirfd, pathname, mode, dev); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + fchownat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, uid_t owner, gid_t group, int flags) -> uint64_t { + // Flags don't need remapped + uint64_t Result = ::fchownat(dirfd, pathname, owner, group, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(unlinkat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, int flags) -> uint64_t { + // Flags don't need remapped + uint64_t Result = ::unlinkat(dirfd, pathname, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + renameat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int olddirfd, const char* oldpath, int newdirfd, const char* newpath) -> uint64_t { + uint64_t Result = ::renameat(olddirfd, oldpath, newdirfd, newpath); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + linkat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int olddirfd, const char* oldpath, int newdirfd, const char* newpath, int flags) -> uint64_t { + // Flags don't need remapped + uint64_t Result = ::linkat(olddirfd, oldpath, newdirfd, newpath, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(symlinkat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* target, int newdirfd, const char* linkpath) -> uint64_t { + uint64_t Result = ::symlinkat(target, newdirfd, linkpath); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(readlinkat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, char* buf, size_t bufsiz) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Readlinkat(dirfd, pathname, buf, bufsiz); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(fchmodat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, mode_t mode) -> uint64_t { + uint64_t Result = syscall(SYSCALL_DEF(fchmodat), dirfd, pathname, mode); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(faccessat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, int mode) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->FM.FAccessat(dirfd, pathname, mode); + SYSCALL_ERRNO(); + }); + + if (Handler->IsHostKernelVersionAtLeast(5, 8, 0)) { + // Only exists on kernel 5.8+ + REGISTER_SYSCALL_IMPL_FLAGS(faccessat2, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, int mode, int flags) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->FM.FAccessat2(dirfd, pathname, mode, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(pidfd_getfd, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int pidfd, int fd, unsigned int flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(pidfd_getfd), pidfd, fd, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS( + openat2, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int dirfs, const char* pathname, struct open_how* how, size_t usize) -> uint64_t { + open_how HostHow {}; + size_t HostSize = std::min(sizeof(open_how), usize); + memcpy(&HostHow, how, HostSize); + + HostHow.flags = FEX::HLE::RemapFromX86Flags(HostHow.flags); + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Openat2(dirfs, pathname, &HostHow, HostSize); + SYSCALL_ERRNO(); + }); + } else { + REGISTER_SYSCALL_IMPL(faccessat2, UnimplementedSyscallSafe); + REGISTER_SYSCALL_IMPL(pidfd_getfd, UnimplementedSyscallSafe); + REGISTER_SYSCALL_IMPL(openat2, UnimplementedSyscallSafe); + } - REGISTER_SYSCALL_IMPL_FLAGS(pipe2, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int pipefd[2], int flags) -> uint64_t { - flags = FEX::HLE::RemapFromX86Flags(flags); - uint64_t Result = ::pipe2(pipefd, flags); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + splice, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd_in, loff_t* off_in, int fd_out, loff_t* off_out, size_t len, unsigned int flags) -> uint64_t { + // Flags don't need remapped + uint64_t Result = ::splice(fd_in, off_in, fd_out, off_out, len, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(tee, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd_in, int fd_out, size_t len, unsigned int flags) -> uint64_t { + // Flags don't need remapped + uint64_t Result = ::tee(fd_in, fd_out, len, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(timerfd_create, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int32_t clockid, int32_t flags) -> uint64_t { + // Flags don't need remapped + uint64_t Result = ::timerfd_create(clockid, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(eventfd, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, uint32_t count) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(eventfd2), count, 0); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(pipe2, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int pipefd[2], int flags) -> uint64_t { + flags = FEX::HLE::RemapFromX86Flags(flags); + uint64_t Result = ::pipe2(pipefd, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(inotify_init1, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int flags) -> uint64_t { + // Flags don't need remapped + uint64_t Result = ::inotify_init1(flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(renameat2, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int olddirfd, const char* oldpath, int newdirfd, + const char* newpath, unsigned int flags) -> uint64_t { + // Flags don't need remapped + uint64_t Result = FHU::Syscalls::renameat2(olddirfd, oldpath, newdirfd, newpath, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(memfd_create, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* name, uint32_t flags) -> uint64_t { + // Flags don't need remapped + uint64_t Result = ::syscall(SYSCALL_DEF(memfd_create), name, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS( + statx, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, int flags, uint32_t mask, struct statx* statxbuf) -> uint64_t { + // Flags don't need remapped + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Statx(dirfd, pathname, flags, mask, statxbuf); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + name_to_handle_at, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, struct file_handle* handle, int* mount_id, int flags) -> uint64_t { + // Flags don't need remapped + uint64_t Result = ::syscall(SYSCALL_DEF(name_to_handle_at), dirfd, pathname, handle, mount_id, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(open_by_handle_at, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int mount_fd, struct file_handle* handle, int flags) -> uint64_t { + // Flags don't need remapped + uint64_t Result = ::syscall(SYSCALL_DEF(open_by_handle_at), mount_fd, handle, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(eventfd2, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, unsigned int count, int flags) -> uint64_t { + // Flags don't need remapped + uint64_t Result = ::syscall(SYSCALL_DEF(eventfd2), count, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + copy_file_range, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd_in, loff_t* off_in, int fd_out, loff_t* off_out, size_t len, unsigned int flags) -> uint64_t { + // Flags don't need remapped + uint64_t Result = ::syscall(SYSCALL_DEF(copy_file_range), fd_in, off_in, fd_out, off_out, len, flags); + SYSCALL_ERRNO(); + }); + + if (Handler->IsHostKernelVersionAtLeast(5, 3, 0)) { + REGISTER_SYSCALL_IMPL_PASS_FLAGS(pidfd_open, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, unsigned int flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(pidfd_open), pid, flags); + SYSCALL_ERRNO(); + }); + } else { + REGISTER_SYSCALL_IMPL(pidfd_open, UnimplementedSyscallSafe); + } - REGISTER_SYSCALL_IMPL_PASS_FLAGS(inotify_init1, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int flags) -> uint64_t { - // Flags don't need remapped - uint64_t Result = ::inotify_init1(flags); + if (Handler->IsHostKernelVersionAtLeast(5, 9, 0)) { + REGISTER_SYSCALL_IMPL_FLAGS(close_range, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, unsigned int first, unsigned int last, unsigned int flags) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->FM.CloseRange(first, last, flags); SYSCALL_ERRNO(); }); + } else { + REGISTER_SYSCALL_IMPL(close_range, UnimplementedSyscallSafe); + } - REGISTER_SYSCALL_IMPL_PASS_FLAGS(renameat2, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int olddirfd, const char *oldpath, int newdirfd, const char *newpath, unsigned int flags) -> uint64_t { - // Flags don't need remapped - uint64_t Result = FHU::Syscalls::renameat2(olddirfd, oldpath, newdirfd, newpath, flags); + if (Handler->IsHostKernelVersionAtLeast(5, 13, 0)) { + REGISTER_SYSCALL_IMPL_PASS_FLAGS(landlock_create_ruleset, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, void* const rule_attr, size_t size, uint32_t flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(landlock_create_ruleset), rule_attr, size, flags); SYSCALL_ERRNO(); }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(memfd_create, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *name, uint32_t flags) -> uint64_t { - // Flags don't need remapped - uint64_t Result = ::syscall(SYSCALL_DEF(memfd_create), name, flags); + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + landlock_add_rule, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, uint32_t ruleset_fd, uint64_t rule_type, void* const rule_attr, uint32_t flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(landlock_add_rule), ruleset_fd, rule_type, rule_attr, flags); SYSCALL_ERRNO(); }); - - REGISTER_SYSCALL_IMPL_FLAGS(statx, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, int flags, uint32_t mask, struct statx *statxbuf) -> uint64_t { - // Flags don't need remapped - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Statx(dirfd, pathname, flags, mask, statxbuf); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(landlock_restrict_self, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, uint32_t ruleset_fd, uint32_t flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(landlock_restrict_self), ruleset_fd, flags); SYSCALL_ERRNO(); }); + } else { + REGISTER_SYSCALL_IMPL(landlock_create_ruleset, UnimplementedSyscallSafe); + REGISTER_SYSCALL_IMPL(landlock_add_rule, UnimplementedSyscallSafe); + REGISTER_SYSCALL_IMPL(landlock_restrict_self, UnimplementedSyscallSafe); + } - REGISTER_SYSCALL_IMPL_PASS_FLAGS(name_to_handle_at, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, struct file_handle *handle, int *mount_id, int flags) -> uint64_t { - // Flags don't need remapped - uint64_t Result = ::syscall(SYSCALL_DEF(name_to_handle_at), dirfd, pathname, handle, mount_id, flags); + if (Handler->IsHostKernelVersionAtLeast(5, 14, 0)) { + REGISTER_SYSCALL_IMPL_PASS_FLAGS(memfd_secret, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, uint32_t flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(memfd_secret), flags); SYSCALL_ERRNO(); }); + } else { + REGISTER_SYSCALL_IMPL(memfd_secret, UnimplementedSyscallSafe); + } - REGISTER_SYSCALL_IMPL_PASS_FLAGS(open_by_handle_at, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int mount_fd, struct file_handle *handle, int flags) -> uint64_t { - // Flags don't need remapped - uint64_t Result = ::syscall(SYSCALL_DEF(open_by_handle_at), mount_fd, handle, flags); + if (Handler->IsHostKernelVersionAtLeast(5, 15, 0)) { + REGISTER_SYSCALL_IMPL_PASS_FLAGS(process_mrelease, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int pidfd, uint32_t flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(process_mrelease), pidfd, flags); SYSCALL_ERRNO(); }); + } else { + REGISTER_SYSCALL_IMPL(process_mrelease, UnimplementedSyscallSafe); + } - REGISTER_SYSCALL_IMPL_PASS_FLAGS(eventfd2, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, unsigned int count, int flags) -> uint64_t { - // Flags don't need remapped - uint64_t Result = ::syscall(SYSCALL_DEF(eventfd2), count, flags); + if (Handler->IsHostKernelVersionAtLeast(6, 5, 0)) { + REGISTER_SYSCALL_IMPL_PASS_FLAGS(cachestat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, void* cstat_range, void* cstat, uint32_t flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(cachestat), fd, cstat_range, cstat, flags); SYSCALL_ERRNO(); }); + } else { + REGISTER_SYSCALL_IMPL(cachestat, UnimplementedSyscallSafe); + } - REGISTER_SYSCALL_IMPL_PASS_FLAGS(copy_file_range, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags) -> uint64_t { - // Flags don't need remapped - uint64_t Result = ::syscall(SYSCALL_DEF(copy_file_range), fd_in, off_in, fd_out, off_out, len, flags); + if (Handler->IsHostKernelVersionAtLeast(6, 6, 0)) { + REGISTER_SYSCALL_IMPL_PASS_FLAGS(fchmodat2, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, mode_t mode, uint32_t flags) -> uint64_t { + uint64_t Result = syscall(SYSCALL_DEF(fchmodat2), dirfd, pathname, mode, flags); SYSCALL_ERRNO(); }); - - if (Handler->IsHostKernelVersionAtLeast(5, 3, 0)) { - REGISTER_SYSCALL_IMPL_PASS_FLAGS(pidfd_open, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, unsigned int flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(pidfd_open), pid, flags); - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL(pidfd_open, UnimplementedSyscallSafe); - } - - if (Handler->IsHostKernelVersionAtLeast(5, 9, 0)) { - REGISTER_SYSCALL_IMPL_FLAGS(close_range, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, unsigned int first, unsigned int last, unsigned int flags) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->FM.CloseRange(first, last, flags); - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL(close_range, UnimplementedSyscallSafe); - } - - if (Handler->IsHostKernelVersionAtLeast(5, 13, 0)) { - REGISTER_SYSCALL_IMPL_PASS_FLAGS(landlock_create_ruleset, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, void *const rule_attr, size_t size, uint32_t flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(landlock_create_ruleset), rule_attr, size, flags); - SYSCALL_ERRNO(); - }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(landlock_add_rule, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, uint32_t ruleset_fd, uint64_t rule_type, void *const rule_attr, uint32_t flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(landlock_add_rule), ruleset_fd, rule_type, rule_attr, flags); - SYSCALL_ERRNO(); - }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(landlock_restrict_self, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, uint32_t ruleset_fd, uint32_t flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(landlock_restrict_self), ruleset_fd, flags); - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL(landlock_create_ruleset, UnimplementedSyscallSafe); - REGISTER_SYSCALL_IMPL(landlock_add_rule, UnimplementedSyscallSafe); - REGISTER_SYSCALL_IMPL(landlock_restrict_self, UnimplementedSyscallSafe); - } - - if (Handler->IsHostKernelVersionAtLeast(5, 14, 0)) { - REGISTER_SYSCALL_IMPL_PASS_FLAGS(memfd_secret, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, uint32_t flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(memfd_secret), flags); - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL(memfd_secret, UnimplementedSyscallSafe); - } - - if (Handler->IsHostKernelVersionAtLeast(5, 15, 0)) { - REGISTER_SYSCALL_IMPL_PASS_FLAGS(process_mrelease, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int pidfd, uint32_t flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(process_mrelease), pidfd, flags); - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL(process_mrelease, UnimplementedSyscallSafe); - } - - if (Handler->IsHostKernelVersionAtLeast(6, 5, 0)) { - REGISTER_SYSCALL_IMPL_PASS_FLAGS(cachestat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd, void* cstat_range, void* cstat, uint32_t flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(cachestat), fd, cstat_range, cstat, flags); - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL(cachestat, UnimplementedSyscallSafe); - } - - if (Handler->IsHostKernelVersionAtLeast(6, 6, 0)) { - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fchmodat2, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, mode_t mode, uint32_t flags) -> uint64_t { - uint64_t Result = syscall(SYSCALL_DEF(fchmodat2), dirfd, pathname, mode, flags); - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL(fchmodat2, UnimplementedSyscallSafe); - } + } else { + REGISTER_SYSCALL_IMPL(fchmodat2, UnimplementedSyscallSafe); } } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/FS.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/FS.cpp index dba921aa28..ffd4b5f4ec 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/FS.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/FS.cpp @@ -22,265 +22,261 @@ tags: LinuxSyscalls|syscalls-shared #include namespace FEX::HLE { - void RegisterFS(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(getcwd, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, char *buf, size_t size) -> uint64_t { - uint64_t Result = syscall(SYSCALL_DEF(getcwd), buf, size); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(chdir, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *path) -> uint64_t { - uint64_t Result = ::chdir(path); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fchdir, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd) -> uint64_t { - uint64_t Result = ::fchdir(fd); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(rename, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *oldpath, const char *newpath) -> uint64_t { - uint64_t Result = ::rename(oldpath, newpath); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(mkdir, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname, mode_t mode) -> uint64_t { - uint64_t Result = ::mkdir(pathname, mode); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(rmdir, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname) -> uint64_t { - uint64_t Result = ::rmdir(pathname); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(link, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *oldpath, const char *newpath) -> uint64_t { - uint64_t Result = ::link(oldpath, newpath); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(unlink, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname) -> uint64_t { - uint64_t Result = ::unlink(pathname); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(symlink, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *target, const char *linkpath) -> uint64_t { - uint64_t Result = ::symlink(target, linkpath); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_FLAGS(readlink, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname, char *buf, size_t bufsiz) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Readlink(pathname, buf, bufsiz); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(chmod, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname, mode_t mode) -> uint64_t { - uint64_t Result = ::chmod(pathname, mode); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(umask, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, mode_t mask) -> uint64_t { - uint64_t Result = ::umask(mask); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_FLAGS(mknod, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname, mode_t mode, dev_t dev) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Mknod(pathname, mode, dev); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(ustat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, dev_t dev, struct ustat *ubuf) -> uint64_t { - // Doesn't exist on AArch64, will return -ENOSYS - // Since version 2.28 of GLIBC it has stopped providing a wrapper for this syscall - uint64_t Result = syscall(SYSCALL_DEF(ustat), dev, ubuf); - SYSCALL_ERRNO(); - }); - - /* - arg1 is one of: void, unsigned int fs_index, const char *fsname - arg2 is one of: void, char *buf - */ - REGISTER_SYSCALL_IMPL_PASS_FLAGS(sysfs, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int option, uint64_t arg1, uint64_t arg2) -> uint64_t { - // Doesn't exist on AArch64, will return -ENOSYS - uint64_t Result = syscall(SYSCALL_DEF(sysfs), option, arg1, arg2); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(truncate, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *path, off_t length) -> uint64_t { - uint64_t Result = ::truncate(path, length); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(creat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname, mode_t mode) -> uint64_t { - uint64_t Result = ::creat(pathname, mode); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(chroot, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *path) -> uint64_t { - uint64_t Result = ::chroot(path); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(sync, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - sync(); - return 0; // always successful - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(acct, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *filename) -> uint64_t { - uint64_t Result = ::acct(filename); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(mount, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *source, const char *target, const char *filesystemtype, unsigned long mountflags, const void *data) -> uint64_t { - uint64_t Result = ::mount(source, target, filesystemtype, mountflags, data); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(umount2, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *target, int flags) -> uint64_t { - uint64_t Result = ::umount2(target, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(swapon, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *path, int swapflags) -> uint64_t { - uint64_t Result = ::swapon(path, swapflags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(swapoff, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *path) -> uint64_t { - uint64_t Result = ::swapoff(path); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(syncfs, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(syncfs), fd); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL(setxattr, - [](FEXCore::Core::CpuStateFrame *Frame, const char *path, const char *name, const void *value, size_t size, int flags) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Setxattr(path, name, value, size, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL(lsetxattr, - [](FEXCore::Core::CpuStateFrame *Frame, const char *path, const char *name, const void *value, size_t size, int flags) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->FM.LSetxattr(path, name, value, size, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fsetxattr, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd, const char *name, const void *value, size_t size, int flags) -> uint64_t { - uint64_t Result = ::fsetxattr(fd, name, value, size, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL(getxattr, - [](FEXCore::Core::CpuStateFrame *Frame, const char *path, const char *name, void *value, size_t size) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Getxattr(path, name, value, size); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL(lgetxattr, - [](FEXCore::Core::CpuStateFrame *Frame, const char *path, const char *name, void *value, size_t size) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->FM.LGetxattr(path, name, value, size); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fgetxattr, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd, const char *name, void *value, size_t size) -> uint64_t { - uint64_t Result = ::fgetxattr(fd, name, value, size); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL(listxattr, - [](FEXCore::Core::CpuStateFrame *Frame, const char *path, char *list, size_t size) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Listxattr(path, list, size); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL(llistxattr, - [](FEXCore::Core::CpuStateFrame *Frame, const char *path, char *list, size_t size) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->FM.LListxattr(path, list, size); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(flistxattr, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd, char *list, size_t size) -> uint64_t { - uint64_t Result = ::flistxattr(fd, list, size); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL(removexattr, - [](FEXCore::Core::CpuStateFrame *Frame, const char *path, const char *name) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Removexattr(path, name); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL(lremovexattr, - [](FEXCore::Core::CpuStateFrame *Frame, const char *path, const char *name) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->FM.LRemovexattr(path, name); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fremovexattr, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd, const char *name) -> uint64_t { - uint64_t Result = ::fremovexattr(fd, name); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fanotify_init, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, unsigned int flags, unsigned int event_f_flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(fanotify_init), flags, event_f_flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fanotify_mark, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fanotify_fd, unsigned int flags, uint64_t mask, int dirfd, const char *pathname) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(fanotify_mark), fanotify_fd, flags, mask, dirfd, pathname); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(pivot_root, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *new_root, const char *put_old) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(pivot_root), new_root, put_old); - SYSCALL_ERRNO(); - }); - - if (Handler->IsHostKernelVersionAtLeast(5, 14, 0)) { - REGISTER_SYSCALL_IMPL_PASS_FLAGS(quotactl_fd, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, uint32_t fd, uint32_t cmd, uint32_t id, void* addr) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(quotactl_fd), fd, cmd, id, addr); - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL(quotactl_fd, UnimplementedSyscallSafe); - } +void RegisterFS(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(getcwd, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, char* buf, size_t size) -> uint64_t { + uint64_t Result = syscall(SYSCALL_DEF(getcwd), buf, size); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(chdir, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* path) -> uint64_t { + uint64_t Result = ::chdir(path); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(fchdir, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd) -> uint64_t { + uint64_t Result = ::fchdir(fd); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(rename, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* oldpath, const char* newpath) -> uint64_t { + uint64_t Result = ::rename(oldpath, newpath); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(mkdir, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, mode_t mode) -> uint64_t { + uint64_t Result = ::mkdir(pathname, mode); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(rmdir, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname) -> uint64_t { + uint64_t Result = ::rmdir(pathname); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(link, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* oldpath, const char* newpath) -> uint64_t { + uint64_t Result = ::link(oldpath, newpath); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(unlink, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname) -> uint64_t { + uint64_t Result = ::unlink(pathname); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(symlink, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* target, const char* linkpath) -> uint64_t { + uint64_t Result = ::symlink(target, linkpath); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(readlink, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, char* buf, size_t bufsiz) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Readlink(pathname, buf, bufsiz); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(chmod, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, mode_t mode) -> uint64_t { + uint64_t Result = ::chmod(pathname, mode); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(umask, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, mode_t mask) -> uint64_t { + uint64_t Result = ::umask(mask); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(mknod, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, mode_t mode, dev_t dev) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Mknod(pathname, mode, dev); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(ustat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, dev_t dev, struct ustat* ubuf) -> uint64_t { + // Doesn't exist on AArch64, will return -ENOSYS + // Since version 2.28 of GLIBC it has stopped providing a wrapper for this syscall + uint64_t Result = syscall(SYSCALL_DEF(ustat), dev, ubuf); + SYSCALL_ERRNO(); + }); + + /* + arg1 is one of: void, unsigned int fs_index, const char *fsname + arg2 is one of: void, char *buf + */ + REGISTER_SYSCALL_IMPL_PASS_FLAGS(sysfs, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int option, uint64_t arg1, uint64_t arg2) -> uint64_t { + // Doesn't exist on AArch64, will return -ENOSYS + uint64_t Result = syscall(SYSCALL_DEF(sysfs), option, arg1, arg2); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(truncate, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* path, off_t length) -> uint64_t { + uint64_t Result = ::truncate(path, length); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(creat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, mode_t mode) -> uint64_t { + uint64_t Result = ::creat(pathname, mode); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(chroot, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* path) -> uint64_t { + uint64_t Result = ::chroot(path); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(sync, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + sync(); + return 0; // always successful + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(acct, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* filename) -> uint64_t { + uint64_t Result = ::acct(filename); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(mount, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* source, const char* target, + const char* filesystemtype, unsigned long mountflags, const void* data) -> uint64_t { + uint64_t Result = ::mount(source, target, filesystemtype, mountflags, data); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(umount2, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* target, int flags) -> uint64_t { + uint64_t Result = ::umount2(target, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(swapon, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* path, int swapflags) -> uint64_t { + uint64_t Result = ::swapon(path, swapflags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(swapoff, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* path) -> uint64_t { + uint64_t Result = ::swapoff(path); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(syncfs, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(syncfs), fd); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL( + setxattr, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, const char* name, const void* value, size_t size, int flags) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Setxattr(path, name, value, size, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL( + lsetxattr, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, const char* name, const void* value, size_t size, int flags) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->FM.LSetxattr(path, name, value, size, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + fsetxattr, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, const char* name, const void* value, size_t size, int flags) -> uint64_t { + uint64_t Result = ::fsetxattr(fd, name, value, size, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL(getxattr, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, const char* name, void* value, size_t size) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Getxattr(path, name, value, size); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL(lgetxattr, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, const char* name, void* value, size_t size) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->FM.LGetxattr(path, name, value, size); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(fgetxattr, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, const char* name, void* value, size_t size) -> uint64_t { + uint64_t Result = ::fgetxattr(fd, name, value, size); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL(listxattr, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, char* list, size_t size) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Listxattr(path, list, size); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL(llistxattr, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, char* list, size_t size) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->FM.LListxattr(path, list, size); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(flistxattr, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, char* list, size_t size) -> uint64_t { + uint64_t Result = ::flistxattr(fd, list, size); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL(removexattr, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, const char* name) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Removexattr(path, name); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL(lremovexattr, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, const char* name) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->FM.LRemovexattr(path, name); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(fremovexattr, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, const char* name) -> uint64_t { + uint64_t Result = ::fremovexattr(fd, name); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(fanotify_init, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, unsigned int flags, unsigned int event_f_flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(fanotify_init), flags, event_f_flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + fanotify_mark, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fanotify_fd, unsigned int flags, uint64_t mask, int dirfd, const char* pathname) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(fanotify_mark), fanotify_fd, flags, mask, dirfd, pathname); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(pivot_root, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* new_root, const char* put_old) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(pivot_root), new_root, put_old); + SYSCALL_ERRNO(); + }); + + if (Handler->IsHostKernelVersionAtLeast(5, 14, 0)) { + REGISTER_SYSCALL_IMPL_PASS_FLAGS(quotactl_fd, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, uint32_t fd, uint32_t cmd, uint32_t id, void* addr) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(quotactl_fd), fd, cmd, id, addr); + SYSCALL_ERRNO(); + }); + } else { + REGISTER_SYSCALL_IMPL(quotactl_fd, UnimplementedSyscallSafe); } } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/IO.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/IO.cpp index 7e4c57c8ab..1dc80daded 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/IO.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/IO.cpp @@ -16,55 +16,56 @@ tags: LinuxSyscalls|syscalls-shared #include namespace FEX::HLE { - void RegisterIO(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; +void RegisterIO(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; - REGISTER_SYSCALL_IMPL_PASS_FLAGS(iopl, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int level) -> uint64_t { - // Just claim we don't have permission - return -EPERM; - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(iopl, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int level) -> uint64_t { + // Just claim we don't have permission + return -EPERM; + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(ioperm, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, unsigned long from, unsigned long num, int turn_on) -> uint64_t { - // ioperm not available on our architecture - return -EPERM; - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(ioperm, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, unsigned long from, unsigned long num, int turn_on) -> uint64_t { + // ioperm not available on our architecture + return -EPERM; + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(io_setup, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, unsigned nr_events, aio_context_t *ctx_idp) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(io_setup), nr_events, ctx_idp); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(io_setup, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, unsigned nr_events, aio_context_t* ctx_idp) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(io_setup), nr_events, ctx_idp); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(io_destroy, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, aio_context_t ctx_id) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(io_destroy), ctx_id); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(io_destroy, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, aio_context_t ctx_id) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(io_destroy), ctx_id); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(io_submit, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, aio_context_t ctx_id, long nr, struct iocb **iocbpp) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(io_submit), ctx_id, nr, iocbpp); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(io_submit, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, aio_context_t ctx_id, long nr, struct iocb** iocbpp) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(io_submit), ctx_id, nr, iocbpp); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(io_cancel, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, aio_context_t ctx_id, struct iocb *iocb, struct io_event *result) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(io_cancel), ctx_id, iocb, result); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + io_cancel, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, aio_context_t ctx_id, struct iocb* iocb, struct io_event* result) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(io_cancel), ctx_id, iocb, result); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(ioprio_set, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int which, int who) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(ioprio_set), which, who); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(ioprio_set, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int which, int who) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(ioprio_set), which, who); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(ioprio_get, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int which, int who, int ioprio) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(ioprio_get), which, who, ioprio); - SYSCALL_ERRNO(); - }); - } + REGISTER_SYSCALL_IMPL_PASS_FLAGS(ioprio_get, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int which, int who, int ioprio) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(ioprio_get), which, who, ioprio); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/IOUring.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/IOUring.cpp index 302d107fda..0b99662463 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/IOUring.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/IOUring.cpp @@ -17,37 +17,38 @@ tags: LinuxSyscalls|syscalls-shared #include namespace SignalDelegator { - struct GuestSigAction; +struct GuestSigAction; } namespace FEX::HLE { - void RegisterIOUring(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; - - if (Handler->IsHostKernelVersionAtLeast(5, 1, 0)) { - REGISTER_SYSCALL_IMPL_PASS_FLAGS(io_uring_setup, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, uint32_t entries, void* params) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(io_uring_setup), entries, params); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(io_uring_enter, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, unsigned int fd, uint32_t to_submit, uint32_t min_complete, uint32_t flags, void *argp, size_t argsz) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(io_uring_enter), fd, to_submit, min_complete, flags, argp, argsz); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(io_uring_register, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, unsigned int fd, unsigned int opcode, void *arg, uint32_t nr_args) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(io_uring_register), fd, opcode, arg, nr_args); - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL(io_uring_setup, UnimplementedSyscallSafe); - REGISTER_SYSCALL_IMPL(io_uring_enter, UnimplementedSyscallSafe); - REGISTER_SYSCALL_IMPL(io_uring_register, UnimplementedSyscallSafe); - } +void RegisterIOUring(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; + + if (Handler->IsHostKernelVersionAtLeast(5, 1, 0)) { + REGISTER_SYSCALL_IMPL_PASS_FLAGS(io_uring_setup, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, uint32_t entries, void* params) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(io_uring_setup), entries, params); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(io_uring_enter, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, unsigned int fd, uint32_t to_submit, uint32_t min_complete, + uint32_t flags, void* argp, size_t argsz) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(io_uring_enter), fd, to_submit, min_complete, flags, argp, argsz); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + io_uring_register, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, unsigned int fd, unsigned int opcode, void* arg, uint32_t nr_args) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(io_uring_register), fd, opcode, arg, nr_args); + SYSCALL_ERRNO(); + }); + } else { + REGISTER_SYSCALL_IMPL(io_uring_setup, UnimplementedSyscallSafe); + REGISTER_SYSCALL_IMPL(io_uring_enter, UnimplementedSyscallSafe); + REGISTER_SYSCALL_IMPL(io_uring_register, UnimplementedSyscallSafe); } } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Info.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Info.cpp index 84b3501f7a..23b0b16ae5 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Info.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Info.cpp @@ -29,82 +29,80 @@ tags: LinuxSyscalls|syscalls-shared #include namespace FEX::HLE { - using cap_user_header_t = void*; - using cap_user_data_t = void*; +using cap_user_header_t = void*; +using cap_user_data_t = void*; - void RegisterInfo(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; +void RegisterInfo(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; - REGISTER_SYSCALL_IMPL_FLAGS(uname, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, struct utsname *buf) -> uint64_t { - struct utsname Local{}; - if (::uname(&Local) == 0) { - memcpy(buf->nodename, Local.nodename, sizeof(Local.nodename)); - static_assert(sizeof(Local.nodename) <= sizeof(buf->nodename)); - memcpy(buf->domainname, Local.domainname, sizeof(Local.domainname)); - static_assert(sizeof(Local.domainname) <= sizeof(buf->domainname)); - } - else { - strcpy(buf->nodename, "FEXCore"); - LogMan::Msg::EFmt("Couldn't determine host nodename. Defaulting to '{}'", buf->nodename); - } - strcpy(buf->sysname, "Linux"); - uint32_t GuestVersion = FEX::HLE::_SyscallHandler->GetGuestKernelVersion(); - snprintf(buf->release, sizeof(buf->release), "%d.%d.%d", - FEX::HLE::SyscallHandler::KernelMajor(GuestVersion), - FEX::HLE::SyscallHandler::KernelMinor(GuestVersion), - FEX::HLE::SyscallHandler::KernelPatch(GuestVersion)); + REGISTER_SYSCALL_IMPL_FLAGS(uname, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, struct utsname* buf) -> uint64_t { + struct utsname Local {}; + if (::uname(&Local) == 0) { + memcpy(buf->nodename, Local.nodename, sizeof(Local.nodename)); + static_assert(sizeof(Local.nodename) <= sizeof(buf->nodename)); + memcpy(buf->domainname, Local.domainname, sizeof(Local.domainname)); + static_assert(sizeof(Local.domainname) <= sizeof(buf->domainname)); + } else { + strcpy(buf->nodename, "FEXCore"); + LogMan::Msg::EFmt("Couldn't determine host nodename. Defaulting to '{}'", buf->nodename); + } + strcpy(buf->sysname, "Linux"); + uint32_t GuestVersion = FEX::HLE::_SyscallHandler->GetGuestKernelVersion(); + snprintf(buf->release, sizeof(buf->release), "%d.%d.%d", FEX::HLE::SyscallHandler::KernelMajor(GuestVersion), + FEX::HLE::SyscallHandler::KernelMinor(GuestVersion), FEX::HLE::SyscallHandler::KernelPatch(GuestVersion)); - const char version[] = "#" GIT_DESCRIBE_STRING " SMP " __DATE__ " " __TIME__; - strcpy(buf->version, version); - static_assert(sizeof(version) <= sizeof(buf->version), "uname version define became too large!"); - // Tell the guest that we are a 64bit kernel - strcpy(buf->machine, "x86_64"); - return 0; - }); + const char version[] = "#" GIT_DESCRIBE_STRING " SMP " __DATE__ " " __TIME__; + strcpy(buf->version, version); + static_assert(sizeof(version) <= sizeof(buf->version), "uname version define became too large!"); + // Tell the guest that we are a 64bit kernel + strcpy(buf->machine, "x86_64"); + return 0; + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(syslog, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int type, char *bufp, int len) -> uint64_t { - uint64_t Result = ::klogctl(type, bufp, len); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(syslog, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int type, char* bufp, int len) -> uint64_t { + uint64_t Result = ::klogctl(type, bufp, len); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(getrandom, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, void *buf, size_t buflen, unsigned int flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(getrandom), buf, buflen, flags); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(getrandom, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, void* buf, size_t buflen, unsigned int flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(getrandom), buf, buflen, flags); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(capget, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, cap_user_header_t hdrp, cap_user_data_t datap) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(capget), hdrp, datap); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(capget, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, cap_user_header_t hdrp, cap_user_data_t datap) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(capget), hdrp, datap); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(capset, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, cap_user_header_t hdrp, const cap_user_data_t datap) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(capset), hdrp, datap); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(capset, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, cap_user_header_t hdrp, const cap_user_data_t datap) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(capset), hdrp, datap); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_FLAGS(getcpu, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) -> uint64_t { - // tcache is ignored - uint64_t Result = ::syscall(SYSCALL_DEF(getcpu), cpu, node, nullptr); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_FLAGS(getcpu, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, unsigned* cpu, unsigned* node, struct getcpu_cache* tcache) -> uint64_t { + // tcache is ignored + uint64_t Result = ::syscall(SYSCALL_DEF(getcpu), cpu, node, nullptr); + SYSCALL_ERRNO(); + }); - //compare two processes to determine if they share a kernel resource - REGISTER_SYSCALL_IMPL_PASS_FLAGS(kcmp, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(kcmp), pid1, pid2, type, idx1, idx2); - SYSCALL_ERRNO(); - }); + // compare two processes to determine if they share a kernel resource + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + kcmp, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(kcmp), pid1, pid2, type, idx1, idx2); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_FLAGS(seccomp, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, unsigned int operation, unsigned int flags, void *args) -> uint64_t { - // FEX doesn't support seccomp - return -EINVAL; - }); - } + REGISTER_SYSCALL_IMPL_FLAGS(seccomp, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, unsigned int operation, unsigned int flags, void* args) -> uint64_t { + // FEX doesn't support seccomp + return -EINVAL; + }); } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Key.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Key.cpp index 32bd64c701..36c5acf8d3 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Key.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Key.cpp @@ -17,46 +17,49 @@ tags: LinuxSyscalls|syscalls-shared #include namespace FEX::HLE { - void RegisterKey(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(add_key, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *type, const char *description, const void *payload, size_t plen, key_serial_t keyring) -> uint64_t { - uint64_t Result = syscall(SYS_add_key, type, description, payload, plen, keyring); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(request_key, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *type, const char *description, const char *callout_info, key_serial_t dest_keyring) -> uint64_t { - uint64_t Result = syscall(SYS_request_key, type, description, callout_info, dest_keyring); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(keyctl, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int operation, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5) -> uint64_t { - uint64_t Result = syscall(SYS_keyctl, operation, arg2, arg3, arg4, arg5); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(pkey_mprotect, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, void *addr, size_t len, int prot, int pkey) -> uint64_t { - // Added in Linux 4.9 - uint64_t Result = ::syscall(SYSCALL_DEF(pkey_mprotect), addr, len, prot, pkey); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(pkey_alloc, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, unsigned int flags, unsigned int access_rights) -> uint64_t { - // Added in Linux 4.9 - uint64_t Result = ::syscall(SYSCALL_DEF(pkey_alloc), flags, access_rights); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(pkey_free, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int pkey) -> uint64_t { - // Added in Linux 4.9 - uint64_t Result = ::syscall(SYSCALL_DEF(pkey_free), pkey); - SYSCALL_ERRNO(); - }); - } +void RegisterKey(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(add_key, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* type, const char* description, const void* payload, + size_t plen, key_serial_t keyring) -> uint64_t { + uint64_t Result = syscall(SYS_add_key, type, description, payload, plen, keyring); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(request_key, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* type, const char* description, + const char* callout_info, key_serial_t dest_keyring) -> uint64_t { + uint64_t Result = syscall(SYS_request_key, type, description, callout_info, dest_keyring); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + keyctl, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int operation, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5) -> uint64_t { + uint64_t Result = syscall(SYS_keyctl, operation, arg2, arg3, arg4, arg5); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(pkey_mprotect, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, void* addr, size_t len, int prot, int pkey) -> uint64_t { + // Added in Linux 4.9 + uint64_t Result = ::syscall(SYSCALL_DEF(pkey_mprotect), addr, len, prot, pkey); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(pkey_alloc, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, unsigned int flags, unsigned int access_rights) -> uint64_t { + // Added in Linux 4.9 + uint64_t Result = ::syscall(SYSCALL_DEF(pkey_alloc), flags, access_rights); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(pkey_free, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int pkey) -> uint64_t { + // Added in Linux 4.9 + uint64_t Result = ::syscall(SYSCALL_DEF(pkey_free), pkey); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Memory.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Memory.cpp index 245adf317d..446988a384 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Memory.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Memory.cpp @@ -19,107 +19,111 @@ tags: LinuxSyscalls|syscalls-shared #include namespace FEX::HLE { - void RegisterMemory(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; - - REGISTER_SYSCALL_IMPL_FLAGS(brk, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, void *addr) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->HandleBRK(Frame, addr); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(msync, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, void *addr, size_t length, int32_t flags) -> uint64_t { - uint64_t Result = ::msync(addr, length, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(mincore, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, void *addr, size_t length, uint8_t *vec) -> uint64_t { - uint64_t Result = ::mincore(addr, length, vec); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_FLAGS(madvise, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, void *addr, size_t length, int32_t advice) -> uint64_t { - uint64_t Result = ::madvise(addr, length, advice); - - if (Result != -1) { - FEX::HLE::_SyscallHandler->TrackMadvise(Frame->Thread, (uintptr_t)addr, length, advice); - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(mlock, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const void *addr, size_t len) -> uint64_t { - uint64_t Result = ::mlock(addr, len); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(munlock, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const void *addr, size_t len) -> uint64_t { - uint64_t Result = ::munlock(addr, len); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(mlock2, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const void *addr, size_t len, int flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(mlock2), addr, len, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(remap_file_pages, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, void *addr, size_t size, int prot, size_t pgoff, int flags) -> uint64_t { - // This syscall is deprecated, not sure when it will end up being removed - uint64_t Result = ::syscall(SYSCALL_DEF(remap_file_pages), addr, size, prot, pgoff, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(mbind, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, void *addr, unsigned long len, int mode, const unsigned long *nodemask, unsigned long maxnode, unsigned flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(mbind), addr, len, mode, nodemask, maxnode, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(get_mempolicy, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int *mode, unsigned long *nodemask, unsigned long maxnode, void *addr, unsigned long flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(get_mempolicy), mode, nodemask, maxnode, addr, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(set_mempolicy, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int mode, const unsigned long *nodemask, unsigned long maxnode) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(set_mempolicy), mode, nodemask, maxnode); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(migrate_pages, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int pid, unsigned long maxnode, const unsigned long *old_nodes, const unsigned long *new_nodes) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(migrate_pages), pid, maxnode, old_nodes, new_nodes); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(move_pages, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(move_pages), pid, count, pages, nodes, status, flags); +void RegisterMemory(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; + + REGISTER_SYSCALL_IMPL_FLAGS(brk, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, void* addr) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->HandleBRK(Frame, addr); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(msync, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, void* addr, size_t length, int32_t flags) -> uint64_t { + uint64_t Result = ::msync(addr, length, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(mincore, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, void* addr, size_t length, uint8_t* vec) -> uint64_t { + uint64_t Result = ::mincore(addr, length, vec); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(madvise, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, void* addr, size_t length, int32_t advice) -> uint64_t { + uint64_t Result = ::madvise(addr, length, advice); + + if (Result != -1) { + FEX::HLE::_SyscallHandler->TrackMadvise(Frame->Thread, (uintptr_t)addr, length, advice); + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(mlock, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const void* addr, size_t len) -> uint64_t { + uint64_t Result = ::mlock(addr, len); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(munlock, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const void* addr, size_t len) -> uint64_t { + uint64_t Result = ::munlock(addr, len); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(mlock2, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const void* addr, size_t len, int flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(mlock2), addr, len, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(remap_file_pages, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, void* addr, size_t size, int prot, size_t pgoff, int flags) -> uint64_t { + // This syscall is deprecated, not sure when it will end up being removed + uint64_t Result = ::syscall(SYSCALL_DEF(remap_file_pages), addr, size, prot, pgoff, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(mbind, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, void* addr, unsigned long len, int mode, + const unsigned long* nodemask, unsigned long maxnode, unsigned flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(mbind), addr, len, mode, nodemask, maxnode, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(get_mempolicy, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int* mode, unsigned long* nodemask, unsigned long maxnode, + void* addr, unsigned long flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(get_mempolicy), mode, nodemask, maxnode, addr, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(set_mempolicy, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int mode, const unsigned long* nodemask, unsigned long maxnode) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(set_mempolicy), mode, nodemask, maxnode); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(migrate_pages, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int pid, unsigned long maxnode, const unsigned long* old_nodes, + const unsigned long* new_nodes) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(migrate_pages), pid, maxnode, old_nodes, new_nodes); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + move_pages, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int pid, unsigned long count, void** pages, const int* nodes, int* status, int flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(move_pages), pid, count, pages, nodes, status, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(membarrier, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int cmd, int flags) -> uint64_t { + uint64_t Result = syscall(SYSCALL_DEF(membarrier), cmd, flags); + SYSCALL_ERRNO(); + }); + + if (Handler->IsHostKernelVersionAtLeast(5, 17, 0)) { + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + set_mempolicy_home_node, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, uint64_t start, uint64_t len, uint64_t home_node, uint64_t flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(set_mempolicy_home_node), start, len, home_node, flags); SYSCALL_ERRNO(); }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(membarrier, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int cmd, int flags) -> uint64_t { - uint64_t Result = syscall(SYSCALL_DEF(membarrier), cmd, flags); - SYSCALL_ERRNO(); - }); - - if (Handler->IsHostKernelVersionAtLeast(5, 17, 0)) { - REGISTER_SYSCALL_IMPL_PASS_FLAGS(set_mempolicy_home_node, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, uint64_t start, uint64_t len, uint64_t home_node, uint64_t flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(set_mempolicy_home_node), start, len, home_node, flags); - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL(set_mempolicy_home_node, UnimplementedSyscallSafe); - } + } else { + REGISTER_SYSCALL_IMPL(set_mempolicy_home_node, UnimplementedSyscallSafe); } } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Msg.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Msg.cpp index f6dfdc9795..8a45ffff6e 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Msg.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Msg.cpp @@ -17,42 +17,42 @@ tags: LinuxSyscalls|syscalls-shared #include namespace FEX::HLE { - void RegisterMsg(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(msgget, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, key_t key, int msgflg) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(msgget), key, msgflg); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(msgsnd, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int msqid, const void *msgp, size_t msgsz, int msgflg) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(msgsnd), msqid, msgp, msgsz, msgflg); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(msgrcv, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int msqid, void *msgp, size_t msgsz, long msgtyp, int msgflg) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(msgrcv), msqid, msgp, msgsz, msgtyp, msgflg); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(msgctl, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int msqid, int cmd, struct msqid_ds *buf) -> uint64_t { - // A quirk of this syscall - // On 32-bit this syscall ONLY supports IPC_64 msqid_ds encoding - // If an application want to use the old style encoding then it needs to use the ipc syscall with MSGCTL command - // ipc syscall supports both IPC_64 and old encoding - uint64_t Result = ::syscall(SYSCALL_DEF(msgctl), msqid, cmd, buf); - SYSCALL_ERRNO(); - }); - - // last two parameters are optional - REGISTER_SYSCALL_IMPL_PASS_FLAGS(mq_unlink, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const char *name) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(mq_unlink), name); - SYSCALL_ERRNO(); - }); - } +void RegisterMsg(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(msgget, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, key_t key, int msgflg) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(msgget), key, msgflg); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(msgsnd, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int msqid, const void* msgp, size_t msgsz, int msgflg) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(msgsnd), msqid, msgp, msgsz, msgflg); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(msgrcv, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int msqid, void* msgp, size_t msgsz, long msgtyp, int msgflg) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(msgrcv), msqid, msgp, msgsz, msgtyp, msgflg); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(msgctl, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int msqid, int cmd, struct msqid_ds* buf) -> uint64_t { + // A quirk of this syscall + // On 32-bit this syscall ONLY supports IPC_64 msqid_ds encoding + // If an application want to use the old style encoding then it needs to use the ipc syscall with MSGCTL command + // ipc syscall supports both IPC_64 and old encoding + uint64_t Result = ::syscall(SYSCALL_DEF(msgctl), msqid, cmd, buf); + SYSCALL_ERRNO(); + }); + + // last two parameters are optional + REGISTER_SYSCALL_IMPL_PASS_FLAGS(mq_unlink, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const char* name) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(mq_unlink), name); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Namespace.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Namespace.cpp index 48196b67de..bf50e88a67 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Namespace.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Namespace.cpp @@ -17,69 +17,70 @@ tags: LinuxSyscalls|syscalls-shared #include namespace SignalDelegator { - struct GuestSigAction; +struct GuestSigAction; } namespace FEX::HLE { - void RegisterNamespace(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; +void RegisterNamespace(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; - if (Handler->GetHostKernelVersion() >= FEX::HLE::SyscallHandler::KernelVersion(5, 1, 0)) { - REGISTER_SYSCALL_IMPL_PASS_FLAGS(open_tree, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int dfd, const char *filename, unsigned int flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(open_tree), dfd, filename, flags); - SYSCALL_ERRNO(); - }); + if (Handler->GetHostKernelVersion() >= FEX::HLE::SyscallHandler::KernelVersion(5, 1, 0)) { + REGISTER_SYSCALL_IMPL_PASS_FLAGS(open_tree, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int dfd, const char* filename, unsigned int flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(open_tree), dfd, filename, flags); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(move_mount, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, unsigned int flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(move_mount), from_dfd, from_pathname, to_dfd, to_pathname, flags); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(move_mount, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int from_dfd, const char* from_pathname, int to_dfd, + const char* to_pathname, unsigned int flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(move_mount), from_dfd, from_pathname, to_dfd, to_pathname, flags); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fsopen, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int dfd, const char *path, unsigned int flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(fsopen), dfd, path, flags); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(fsopen, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int dfd, const char* path, unsigned int flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(fsopen), dfd, path, flags); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fsconfig, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd, unsigned int cmd, const char *key, const void *value, int aux) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(fsconfig), fd, cmd, key, value, aux); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + fsconfig, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, unsigned int cmd, const char* key, const void* value, int aux) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(fsconfig), fd, cmd, key, value, aux); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fsmount, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fs_fd, uint32_t flags, uint32_t attr_flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(fsmount), fs_fd, flags, attr_flags); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(fsmount, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fs_fd, uint32_t flags, uint32_t attr_flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(fsmount), fs_fd, flags, attr_flags); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(fspick, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int dfd, const char *path, unsigned int flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(fspick), dfd, path, flags); - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL(open_tree, UnimplementedSyscallSafe); - REGISTER_SYSCALL_IMPL(move_mount, UnimplementedSyscallSafe); - REGISTER_SYSCALL_IMPL(fsopen, UnimplementedSyscallSafe); - REGISTER_SYSCALL_IMPL(fsconfig, UnimplementedSyscallSafe); - REGISTER_SYSCALL_IMPL(fsmount, UnimplementedSyscallSafe); - REGISTER_SYSCALL_IMPL(fspick, UnimplementedSyscallSafe); - } + REGISTER_SYSCALL_IMPL_PASS_FLAGS(fspick, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int dfd, const char* path, unsigned int flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(fspick), dfd, path, flags); + SYSCALL_ERRNO(); + }); + } else { + REGISTER_SYSCALL_IMPL(open_tree, UnimplementedSyscallSafe); + REGISTER_SYSCALL_IMPL(move_mount, UnimplementedSyscallSafe); + REGISTER_SYSCALL_IMPL(fsopen, UnimplementedSyscallSafe); + REGISTER_SYSCALL_IMPL(fsconfig, UnimplementedSyscallSafe); + REGISTER_SYSCALL_IMPL(fsmount, UnimplementedSyscallSafe); + REGISTER_SYSCALL_IMPL(fspick, UnimplementedSyscallSafe); + } - if (Handler->GetHostKernelVersion() >= FEX::HLE::SyscallHandler::KernelVersion(5, 12, 0)) { - REGISTER_SYSCALL_IMPL_PASS_FLAGS(mount_setattr, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int dfd, const char *path, unsigned int flags, void *uattr, size_t usize) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(mount_setattr), dfd, path, flags, uattr, usize); - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL(mount_setattr, UnimplementedSyscallSafe); - } + if (Handler->GetHostKernelVersion() >= FEX::HLE::SyscallHandler::KernelVersion(5, 12, 0)) { + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + mount_setattr, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int dfd, const char* path, unsigned int flags, void* uattr, size_t usize) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(mount_setattr), dfd, path, flags, uattr, usize); + SYSCALL_ERRNO(); + }); + } else { + REGISTER_SYSCALL_IMPL(mount_setattr, UnimplementedSyscallSafe); } } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/NotImplemented.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/NotImplemented.cpp index 311403ce6c..d4903034ad 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/NotImplemented.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/NotImplemented.cpp @@ -13,45 +13,43 @@ tags: LinuxSyscalls|syscalls-shared #include #include -#define REGISTER_SYSCALL_NOT_IMPL(name) REGISTER_SYSCALL_IMPL(name, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { \ - LogMan::Msg::DFmt("Using deprecated/removed syscall: " #name); \ - return -ENOSYS; \ -}); +#define REGISTER_SYSCALL_NOT_IMPL(name) \ + REGISTER_SYSCALL_IMPL(name, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { \ + LogMan::Msg::DFmt("Using deprecated/removed syscall: " #name); \ + return -ENOSYS; \ + }); -#define REGISTER_SYSCALL_NO_PERM(name) REGISTER_SYSCALL_IMPL(name, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { \ - return -EPERM; \ -}); +#define REGISTER_SYSCALL_NO_PERM(name) REGISTER_SYSCALL_IMPL(name, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { return -EPERM; }); -#define REGISTER_SYSCALL_NO_ACCESS(name) REGISTER_SYSCALL_IMPL(name, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { \ - return -EACCES; \ -}); +#define REGISTER_SYSCALL_NO_ACCESS(name) \ + REGISTER_SYSCALL_IMPL(name, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { return -EACCES; }); namespace FEX::HLE { - // these are removed/not implemented in the linux kernel we present - - void RegisterNotImplemented(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_NOT_IMPL(uselib); - REGISTER_SYSCALL_NOT_IMPL(create_module); - REGISTER_SYSCALL_NOT_IMPL(get_kernel_syms); - REGISTER_SYSCALL_NOT_IMPL(query_module); - REGISTER_SYSCALL_NOT_IMPL(nfsservctl); // Was removed in Linux 3.1 - REGISTER_SYSCALL_NOT_IMPL(getpmsg); - REGISTER_SYSCALL_NOT_IMPL(putpmsg); - REGISTER_SYSCALL_NOT_IMPL(afs_syscall); - REGISTER_SYSCALL_NOT_IMPL(vserver); - REGISTER_SYSCALL_NOT_IMPL(_sysctl); // Was removed in Linux 5.5 - - REGISTER_SYSCALL_NO_PERM(vhangup); - REGISTER_SYSCALL_NO_PERM(reboot) - REGISTER_SYSCALL_NO_PERM(sethostname); - REGISTER_SYSCALL_NO_PERM(setdomainname); - REGISTER_SYSCALL_NO_PERM(kexec_load); - REGISTER_SYSCALL_NO_PERM(finit_module); - REGISTER_SYSCALL_NO_PERM(bpf); - REGISTER_SYSCALL_NO_PERM(lookup_dcookie); - REGISTER_SYSCALL_NO_PERM(init_module) - REGISTER_SYSCALL_NO_PERM(delete_module); - REGISTER_SYSCALL_NO_PERM(quotactl); - REGISTER_SYSCALL_NO_ACCESS(perf_event_open); - } +// these are removed/not implemented in the linux kernel we present + +void RegisterNotImplemented(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_NOT_IMPL(uselib); + REGISTER_SYSCALL_NOT_IMPL(create_module); + REGISTER_SYSCALL_NOT_IMPL(get_kernel_syms); + REGISTER_SYSCALL_NOT_IMPL(query_module); + REGISTER_SYSCALL_NOT_IMPL(nfsservctl); // Was removed in Linux 3.1 + REGISTER_SYSCALL_NOT_IMPL(getpmsg); + REGISTER_SYSCALL_NOT_IMPL(putpmsg); + REGISTER_SYSCALL_NOT_IMPL(afs_syscall); + REGISTER_SYSCALL_NOT_IMPL(vserver); + REGISTER_SYSCALL_NOT_IMPL(_sysctl); // Was removed in Linux 5.5 + + REGISTER_SYSCALL_NO_PERM(vhangup); + REGISTER_SYSCALL_NO_PERM(reboot) + REGISTER_SYSCALL_NO_PERM(sethostname); + REGISTER_SYSCALL_NO_PERM(setdomainname); + REGISTER_SYSCALL_NO_PERM(kexec_load); + REGISTER_SYSCALL_NO_PERM(finit_module); + REGISTER_SYSCALL_NO_PERM(bpf); + REGISTER_SYSCALL_NO_PERM(lookup_dcookie); + REGISTER_SYSCALL_NO_PERM(init_module) + REGISTER_SYSCALL_NO_PERM(delete_module); + REGISTER_SYSCALL_NO_PERM(quotactl); + REGISTER_SYSCALL_NO_ACCESS(perf_event_open); } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/SHM.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/SHM.cpp index 54fcca1d79..3090ae4598 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/SHM.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/SHM.cpp @@ -16,20 +16,20 @@ tags: LinuxSyscalls|syscalls-shared #include namespace FEX::HLE { - void RegisterSHM(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; +void RegisterSHM(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; - REGISTER_SYSCALL_IMPL_PASS_FLAGS(_shmget, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, key_t key, size_t size, int shmflg) -> uint64_t { - uint64_t Result = shmget(key, size, shmflg); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(_shmget, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, key_t key, size_t size, int shmflg) -> uint64_t { + uint64_t Result = shmget(key, size, shmflg); + SYSCALL_ERRNO(); + }); - // XXX: shmid_ds is definitely not correct for 32-bit - REGISTER_SYSCALL_IMPL_PASS_FLAGS(_shmctl, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int shmid, int cmd, struct shmid_ds *buf) -> uint64_t { - uint64_t Result = ::shmctl(shmid, cmd, buf); - SYSCALL_ERRNO(); - }); - } + // XXX: shmid_ds is definitely not correct for 32-bit + REGISTER_SYSCALL_IMPL_PASS_FLAGS(_shmctl, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int shmid, int cmd, struct shmid_ds* buf) -> uint64_t { + uint64_t Result = ::shmctl(shmid, cmd, buf); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Sched.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Sched.cpp index 08c0100c49..55a00e3b95 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Sched.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Sched.cpp @@ -21,85 +21,86 @@ tags: LinuxSyscalls|syscalls-shared #include namespace FEX::HLE { - void RegisterSched(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(sched_yield, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = ::sched_yield(); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(getpriority, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int which, int who) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(getpriority), which, who); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(setpriority, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int which, int who, int prio) -> uint64_t { - uint64_t Result = ::setpriority(which, who, prio); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(sched_setparam, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, const struct sched_param *param) -> uint64_t { - uint64_t Result = ::sched_setparam(pid, param); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(sched_getparam, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, struct sched_param *param) -> uint64_t { - uint64_t Result = ::sched_getparam(pid, param); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(sched_setscheduler, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, int policy, const struct sched_param *param) -> uint64_t { - uint64_t Result = ::sched_setscheduler(pid, policy, param); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(sched_getscheduler, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid) -> uint64_t { - uint64_t Result = ::sched_getscheduler(pid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(sched_get_priority_max, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int policy) -> uint64_t { - uint64_t Result = ::sched_get_priority_max(policy); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(sched_get_priority_min, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int policy) -> uint64_t { - uint64_t Result = ::sched_get_priority_min(policy); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_FLAGS(sched_setaffinity, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY | SyscallFlags::NOSIDEEFFECTS, - [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, size_t cpusetsize, const unsigned long *mask) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(sched_setaffinity), pid, cpusetsize, mask); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_FLAGS(sched_getaffinity, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, size_t cpusetsize, unsigned char *mask) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(sched_getaffinity), pid, cpusetsize, mask); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(sched_setattr, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, struct sched_attr *attr, unsigned int flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(sched_setattr), pid, attr, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(sched_getattr, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, struct sched_attr *attr, unsigned int size, unsigned int flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(sched_getattr), pid, attr, size, flags); - SYSCALL_ERRNO(); - }); - } +void RegisterSched(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(sched_yield, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = ::sched_yield(); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(getpriority, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int which, int who) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(getpriority), which, who); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(setpriority, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int which, int who, int prio) -> uint64_t { + uint64_t Result = ::setpriority(which, who, prio); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(sched_setparam, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, const struct sched_param* param) -> uint64_t { + uint64_t Result = ::sched_setparam(pid, param); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(sched_getparam, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, struct sched_param* param) -> uint64_t { + uint64_t Result = ::sched_getparam(pid, param); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(sched_setscheduler, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, int policy, const struct sched_param* param) -> uint64_t { + uint64_t Result = ::sched_setscheduler(pid, policy, param); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(sched_getscheduler, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid) -> uint64_t { + uint64_t Result = ::sched_getscheduler(pid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(sched_get_priority_max, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int policy) -> uint64_t { + uint64_t Result = ::sched_get_priority_max(policy); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(sched_get_priority_min, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int policy) -> uint64_t { + uint64_t Result = ::sched_get_priority_min(policy); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(sched_setaffinity, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY | SyscallFlags::NOSIDEEFFECTS, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, size_t cpusetsize, const unsigned long* mask) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(sched_setaffinity), pid, cpusetsize, mask); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(sched_getaffinity, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, size_t cpusetsize, unsigned char* mask) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(sched_getaffinity), pid, cpusetsize, mask); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(sched_setattr, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, struct sched_attr* attr, unsigned int flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(sched_setattr), pid, attr, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + sched_getattr, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, struct sched_attr* attr, unsigned int size, unsigned int flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(sched_getattr), pid, attr, size, flags); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Semaphore.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Semaphore.cpp index ea90b97e6c..fa53cb811a 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Semaphore.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Semaphore.cpp @@ -15,13 +15,13 @@ tags: LinuxSyscalls|syscalls-shared #include namespace FEX::HLE { - void RegisterSemaphore(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; +void RegisterSemaphore(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; - REGISTER_SYSCALL_IMPL_PASS_FLAGS(semget, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, key_t key, int nsems, int semflg) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(semget), key, nsems, semflg); - SYSCALL_ERRNO(); - }); - } + REGISTER_SYSCALL_IMPL_PASS_FLAGS(semget, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, key_t key, int nsems, int semflg) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(semget), key, nsems, semflg); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Signals.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Signals.cpp index 988675d9de..d971f7ee7b 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Signals.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Signals.cpp @@ -19,37 +19,37 @@ tags: LinuxSyscalls|syscalls-shared #include namespace SignalDelegator { - struct GuestSigAction; +struct GuestSigAction; } namespace FEX::HLE { - void RegisterSignals(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL(rt_sigprocmask, [](FEXCore::Core::CpuStateFrame *Frame, int how, const uint64_t *set, uint64_t *oldset) -> uint64_t { - return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigProcMask(how, set, oldset); - }); - - REGISTER_SYSCALL_IMPL(rt_sigpending, [](FEXCore::Core::CpuStateFrame *Frame, uint64_t *set, size_t sigsetsize) -> uint64_t { - return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigPending(set, sigsetsize); - }); - - REGISTER_SYSCALL_IMPL(rt_sigsuspend, [](FEXCore::Core::CpuStateFrame *Frame, uint64_t *unewset, size_t sigsetsize) -> uint64_t { - return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigSuspend(unewset, sigsetsize); - }); - - REGISTER_SYSCALL_IMPL(userfaultfd, [](FEXCore::Core::CpuStateFrame *Frame, int flags) -> uint64_t { - // Disable userfaultfd until we can properly emulate it - // This is okay because the kernel configuration allows you to disable it at compile time - return -ENOSYS; - uint64_t Result = ::syscall(SYSCALL_DEF(userfaultfd), flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL(signalfd, [](FEXCore::Core::CpuStateFrame *Frame, int fd, const uint64_t *mask, size_t sigsetsize) -> uint64_t { - return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSignalFD(fd, mask, sigsetsize, 0); - }); - - REGISTER_SYSCALL_IMPL(signalfd4, [](FEXCore::Core::CpuStateFrame *Frame, int fd, const uint64_t *mask, size_t sigsetsize, int flags) -> uint64_t { - return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSignalFD(fd, mask, sigsetsize, flags); - }); - } +void RegisterSignals(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL(rt_sigprocmask, [](FEXCore::Core::CpuStateFrame* Frame, int how, const uint64_t* set, uint64_t* oldset) -> uint64_t { + return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigProcMask(how, set, oldset); + }); + + REGISTER_SYSCALL_IMPL(rt_sigpending, [](FEXCore::Core::CpuStateFrame* Frame, uint64_t* set, size_t sigsetsize) -> uint64_t { + return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigPending(set, sigsetsize); + }); + + REGISTER_SYSCALL_IMPL(rt_sigsuspend, [](FEXCore::Core::CpuStateFrame* Frame, uint64_t* unewset, size_t sigsetsize) -> uint64_t { + return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigSuspend(unewset, sigsetsize); + }); + + REGISTER_SYSCALL_IMPL(userfaultfd, [](FEXCore::Core::CpuStateFrame* Frame, int flags) -> uint64_t { + // Disable userfaultfd until we can properly emulate it + // This is okay because the kernel configuration allows you to disable it at compile time + return -ENOSYS; + uint64_t Result = ::syscall(SYSCALL_DEF(userfaultfd), flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL(signalfd, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const uint64_t* mask, size_t sigsetsize) -> uint64_t { + return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSignalFD(fd, mask, sigsetsize, 0); + }); + + REGISTER_SYSCALL_IMPL(signalfd4, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const uint64_t* mask, size_t sigsetsize, int flags) -> uint64_t { + return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSignalFD(fd, mask, sigsetsize, flags); + }); } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Socket.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Socket.cpp index 7c6f954994..837c3339cd 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Socket.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Socket.cpp @@ -16,73 +16,76 @@ tags: LinuxSyscalls|syscalls-shared #include namespace FEX::HLE { - void RegisterSocket(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; +void RegisterSocket(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; - REGISTER_SYSCALL_IMPL_PASS_FLAGS(socket, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int domain, int type, int protocol) -> uint64_t { - uint64_t Result = ::socket(domain, type, protocol); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(socket, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int domain, int type, int protocol) -> uint64_t { + uint64_t Result = ::socket(domain, type, protocol); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(connect, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, const struct sockaddr *addr, socklen_t addrlen) -> uint64_t { - uint64_t Result = ::connect(sockfd, addr, addrlen); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(connect, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, const struct sockaddr* addr, socklen_t addrlen) -> uint64_t { + uint64_t Result = ::connect(sockfd, addr, addrlen); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(accept4, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags) -> uint64_t { - uint64_t Result = ::accept4(sockfd, addr, addrlen, flags); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + accept4, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, struct sockaddr* addr, socklen_t* addrlen, int flags) -> uint64_t { + uint64_t Result = ::accept4(sockfd, addr, addrlen, flags); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(sendto, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, const void *buf, size_t len, int flags, const struct sockaddr *dest_addr, socklen_t addrlen) -> uint64_t { - uint64_t Result = ::sendto(sockfd, buf, len, flags, dest_addr, addrlen); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(sendto, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, const void* buf, size_t len, int flags, + const struct sockaddr* dest_addr, socklen_t addrlen) -> uint64_t { + uint64_t Result = ::sendto(sockfd, buf, len, flags, dest_addr, addrlen); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(recvfrom, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, void *buf, size_t len, int flags, struct sockaddr *src_addr, socklen_t *addrlen) -> uint64_t { - uint64_t Result = ::recvfrom(sockfd, buf, len, flags, src_addr, addrlen); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(recvfrom, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, void* buf, size_t len, int flags, + struct sockaddr* src_addr, socklen_t* addrlen) -> uint64_t { + uint64_t Result = ::recvfrom(sockfd, buf, len, flags, src_addr, addrlen); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(shutdown, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, int how) -> uint64_t { - uint64_t Result = ::shutdown(sockfd, how); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(shutdown, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, int how) -> uint64_t { + uint64_t Result = ::shutdown(sockfd, how); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(bind, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, const struct sockaddr *addr, socklen_t addrlen) -> uint64_t { - uint64_t Result = ::bind(sockfd, addr, addrlen); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(bind, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, const struct sockaddr* addr, socklen_t addrlen) -> uint64_t { + uint64_t Result = ::bind(sockfd, addr, addrlen); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(listen, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, int backlog) -> uint64_t { - uint64_t Result = ::listen(sockfd, backlog); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(listen, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, int backlog) -> uint64_t { + uint64_t Result = ::listen(sockfd, backlog); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(getsockname, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, struct sockaddr *addr, socklen_t *addrlen) -> uint64_t { - uint64_t Result = ::getsockname(sockfd, addr, addrlen); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(getsockname, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, struct sockaddr* addr, socklen_t* addrlen) -> uint64_t { + uint64_t Result = ::getsockname(sockfd, addr, addrlen); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(getpeername, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, struct sockaddr *addr, socklen_t *addrlen) -> uint64_t { - uint64_t Result = ::getpeername(sockfd, addr, addrlen); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(getpeername, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, struct sockaddr* addr, socklen_t* addrlen) -> uint64_t { + uint64_t Result = ::getpeername(sockfd, addr, addrlen); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(socketpair, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int domain, int type, int protocol, int sv[2]) -> uint64_t { - uint64_t Result = ::socketpair(domain, type, protocol, sv); - SYSCALL_ERRNO(); - }); - } + REGISTER_SYSCALL_IMPL_PASS_FLAGS(socketpair, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int domain, int type, int protocol, int sv[2]) -> uint64_t { + uint64_t Result = ::socketpair(domain, type, protocol, sv); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Stubs.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Stubs.cpp index c82d806cb5..a3fccf3ded 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Stubs.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Stubs.cpp @@ -15,31 +15,34 @@ tags: LinuxSyscalls|syscalls-shared #include #include -#define SYSCALL_STUB(name) do { ERROR_AND_DIE_FMT("Syscall: " #name " stub!"); return -ENOSYS; } while(0) +#define SYSCALL_STUB(name) \ + do { \ + ERROR_AND_DIE_FMT("Syscall: " #name " stub!"); \ + return -ENOSYS; \ + } while (0) namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } namespace FEX::HLE { - void RegisterStubs(FEX::HLE::SyscallHandler *Handler) { - - REGISTER_SYSCALL_IMPL(ptrace, [](FEXCore::Core::CpuStateFrame *Frame, int /*enum __ptrace_request*/ request, pid_t pid, void *addr, void *data) -> uint64_t { - // We don't support this - return -EPERM; - }); - - REGISTER_SYSCALL_IMPL(modify_ldt, [](FEXCore::Core::CpuStateFrame *Frame, int func, void *ptr, unsigned long bytecount) -> uint64_t { - SYSCALL_STUB(modify_ldt); - }); - - REGISTER_SYSCALL_IMPL(restart_syscall, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - SYSCALL_STUB(restart_syscall); - }); - - REGISTER_SYSCALL_IMPL(rseq, [](FEXCore::Core::CpuStateFrame *Frame, struct rseq *rseq, uint32_t rseq_len, int flags, uint32_t sig) -> uint64_t { - // We don't support this - return -ENOSYS; - }); - } +void RegisterStubs(FEX::HLE::SyscallHandler* Handler) { + + REGISTER_SYSCALL_IMPL( + ptrace, [](FEXCore::Core::CpuStateFrame* Frame, int /*enum __ptrace_request*/ request, pid_t pid, void* addr, void* data) -> uint64_t { + // We don't support this + return -EPERM; + }); + + REGISTER_SYSCALL_IMPL(modify_ldt, [](FEXCore::Core::CpuStateFrame* Frame, int func, void* ptr, unsigned long bytecount) -> uint64_t { + SYSCALL_STUB(modify_ldt); + }); + + REGISTER_SYSCALL_IMPL(restart_syscall, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { SYSCALL_STUB(restart_syscall); }); + + REGISTER_SYSCALL_IMPL(rseq, [](FEXCore::Core::CpuStateFrame* Frame, struct rseq* rseq, uint32_t rseq_len, int flags, uint32_t sig) -> uint64_t { + // We don't support this + return -ENOSYS; + }); } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Thread.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Thread.cpp index c01f8499f2..ccb25f477c 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Thread.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Thread.cpp @@ -42,656 +42,643 @@ ARG_TO_STR(idtype_t, "%u") namespace FEX::HLE { - struct ExecutionThreadHandler { - FEXCore::Context::Context *CTX; - FEXCore::Core::InternalThreadState *Thread; - }; - - static void *ThreadHandler(void* Data) { - ExecutionThreadHandler *Handler = reinterpret_cast(Data); - auto CTX = Handler->CTX; - auto Thread = Handler->Thread; - FEXCore::Allocator::free(Handler); - CTX->ExecutionThread(Thread); - FEX::HLE::_SyscallHandler->TM.DestroyThread(Thread); - return nullptr; - } - - FEXCore::Core::InternalThreadState *CreateNewThread(FEXCore::Context:: Context *CTX, FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::clone3_args *args) { - uint64_t flags = args->args.flags; - FEXCore::Core::CPUState NewThreadState{}; - // Clone copies the parent thread's state - memcpy(&NewThreadState, Frame, sizeof(FEXCore::Core::CPUState)); +struct ExecutionThreadHandler { + FEXCore::Context::Context* CTX; + FEXCore::Core::InternalThreadState* Thread; +}; + +static void* ThreadHandler(void* Data) { + ExecutionThreadHandler* Handler = reinterpret_cast(Data); + auto CTX = Handler->CTX; + auto Thread = Handler->Thread; + FEXCore::Allocator::free(Handler); + CTX->ExecutionThread(Thread); + FEX::HLE::_SyscallHandler->TM.DestroyThread(Thread); + return nullptr; +} - NewThreadState.gregs[FEXCore::X86State::REG_RAX] = 0; - if (args->Type == TYPE_CLONE3) { - // stack pointer points to the lowest address to the stack - // set RSP to stack + size - NewThreadState.gregs[FEXCore::X86State::REG_RSP] = args->args.stack + args->args.stack_size; - } - else { - NewThreadState.gregs[FEXCore::X86State::REG_RSP] = args->args.stack; - } +FEXCore::Core::InternalThreadState* +CreateNewThread(FEXCore::Context::Context* CTX, FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::clone3_args* args) { + uint64_t flags = args->args.flags; + FEXCore::Core::CPUState NewThreadState {}; + // Clone copies the parent thread's state + memcpy(&NewThreadState, Frame, sizeof(FEXCore::Core::CPUState)); + + NewThreadState.gregs[FEXCore::X86State::REG_RAX] = 0; + if (args->Type == TYPE_CLONE3) { + // stack pointer points to the lowest address to the stack + // set RSP to stack + size + NewThreadState.gregs[FEXCore::X86State::REG_RSP] = args->args.stack + args->args.stack_size; + } else { + NewThreadState.gregs[FEXCore::X86State::REG_RSP] = args->args.stack; + } - auto NewThread = FEX::HLE::_SyscallHandler->TM.CreateThread(0, 0, &NewThreadState, args->args.parent_tid); + auto NewThread = FEX::HLE::_SyscallHandler->TM.CreateThread(0, 0, &NewThreadState, args->args.parent_tid); - if (FEX::HLE::_SyscallHandler->Is64BitMode()) { - if (flags & CLONE_SETTLS) { - x64::SetThreadArea(NewThread->CurrentFrame, reinterpret_cast(args->args.tls)); - } - // Set us to start just after the syscall instruction - x64::AdjustRipForNewThread(NewThread->CurrentFrame); + if (FEX::HLE::_SyscallHandler->Is64BitMode()) { + if (flags & CLONE_SETTLS) { + x64::SetThreadArea(NewThread->CurrentFrame, reinterpret_cast(args->args.tls)); } - else { - if (flags & CLONE_SETTLS) { - x32::SetThreadArea(NewThread->CurrentFrame, reinterpret_cast(args->args.tls)); - } - x32::AdjustRipForNewThread(NewThread->CurrentFrame); + // Set us to start just after the syscall instruction + x64::AdjustRipForNewThread(NewThread->CurrentFrame); + } else { + if (flags & CLONE_SETTLS) { + x32::SetThreadArea(NewThread->CurrentFrame, reinterpret_cast(args->args.tls)); } + x32::AdjustRipForNewThread(NewThread->CurrentFrame); + } - // We need to do some post-thread creation setup. - NewThread->StartPaused = true; - - // Initialize a new thread for execution. - ExecutionThreadHandler *Arg = reinterpret_cast(FEXCore::Allocator::malloc(sizeof(ExecutionThreadHandler))); - Arg->CTX = CTX; - Arg->Thread = NewThread; - NewThread->ExecutionThread = FEXCore::Threads::Thread::Create(ThreadHandler, Arg); - - // Wait for the thread to have started. - NewThread->ThreadWaiting.Wait(); - - if (FEX::HLE::_SyscallHandler->NeedXIDCheck()) { - // The first time an application creates a thread, GLIBC installs their SETXID signal handler. - // FEX needs to capture all signals and defer them to the guest. - // Once FEX creates its first guest thread, overwrite the GLIBC SETXID handler *again* to ensure - // FEX maintains control of the signal handler on this signal. - FEX::HLE::_SyscallHandler->GetSignalDelegator()->CheckXIDHandler(); - FEX::HLE::_SyscallHandler->DisableXIDCheck(); - } + // We need to do some post-thread creation setup. + NewThread->StartPaused = true; + + // Initialize a new thread for execution. + ExecutionThreadHandler* Arg = reinterpret_cast(FEXCore::Allocator::malloc(sizeof(ExecutionThreadHandler))); + Arg->CTX = CTX; + Arg->Thread = NewThread; + NewThread->ExecutionThread = FEXCore::Threads::Thread::Create(ThreadHandler, Arg); + + // Wait for the thread to have started. + NewThread->ThreadWaiting.Wait(); + + if (FEX::HLE::_SyscallHandler->NeedXIDCheck()) { + // The first time an application creates a thread, GLIBC installs their SETXID signal handler. + // FEX needs to capture all signals and defer them to the guest. + // Once FEX creates its first guest thread, overwrite the GLIBC SETXID handler *again* to ensure + // FEX maintains control of the signal handler on this signal. + FEX::HLE::_SyscallHandler->GetSignalDelegator()->CheckXIDHandler(); + FEX::HLE::_SyscallHandler->DisableXIDCheck(); + } - // Return the new threads TID - uint64_t Result = NewThread->ThreadManager.GetTID(); + // Return the new threads TID + uint64_t Result = NewThread->ThreadManager.GetTID(); - // Sets the child TID to pointer in ParentTID - if (flags & CLONE_PARENT_SETTID) { - *reinterpret_cast(args->args.parent_tid) = Result; - } + // Sets the child TID to pointer in ParentTID + if (flags & CLONE_PARENT_SETTID) { + *reinterpret_cast(args->args.parent_tid) = Result; + } - // Sets the child TID to the pointer in ChildTID - if (flags & CLONE_CHILD_SETTID) { - NewThread->ThreadManager.set_child_tid = reinterpret_cast(args->args.child_tid); - *reinterpret_cast(args->args.child_tid) = Result; - } + // Sets the child TID to the pointer in ChildTID + if (flags & CLONE_CHILD_SETTID) { + NewThread->ThreadManager.set_child_tid = reinterpret_cast(args->args.child_tid); + *reinterpret_cast(args->args.child_tid) = Result; + } - // When the thread exits, clear the child thread ID at ChildTID - // Additionally wakeup a futex at that address - // Address /may/ be changed with SET_TID_ADDRESS syscall - if (flags & CLONE_CHILD_CLEARTID) { - NewThread->ThreadManager.clear_child_tid = reinterpret_cast(args->args.child_tid); - } + // When the thread exits, clear the child thread ID at ChildTID + // Additionally wakeup a futex at that address + // Address /may/ be changed with SET_TID_ADDRESS syscall + if (flags & CLONE_CHILD_CLEARTID) { + NewThread->ThreadManager.clear_child_tid = reinterpret_cast(args->args.child_tid); + } - // clone3 flag - if (flags & CLONE_PIDFD) { - // Use pidfd_open to emulate this flag - const int pidfd = ::syscall(SYSCALL_DEF(pidfd_open), Result, 0); - if (Result == ~0ULL) { - LogMan::Msg::EFmt("Couldn't get pidfd of TID {}\n", Result); - } - else { - *reinterpret_cast(args->args.pidfd) = pidfd; - } + // clone3 flag + if (flags & CLONE_PIDFD) { + // Use pidfd_open to emulate this flag + const int pidfd = ::syscall(SYSCALL_DEF(pidfd_open), Result, 0); + if (Result == ~0ULL) { + LogMan::Msg::EFmt("Couldn't get pidfd of TID {}\n", Result); + } else { + *reinterpret_cast(args->args.pidfd) = pidfd; } - - FEX::HLE::_SyscallHandler->TM.TrackThread(NewThread); - - return NewThread; } - uint64_t HandleNewClone(FEXCore::Core::InternalThreadState *Thread, FEXCore::Context::Context *CTX, FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::clone3_args *CloneArgs) { - auto GuestArgs = &CloneArgs->args; - uint64_t flags = GuestArgs->flags; - auto NewThread = Thread; - bool CreatedNewThreadObject{}; - - if (flags & CLONE_THREAD) { - FEXCore::Core::CPUState NewThreadState{}; - // Clone copies the parent thread's state - memcpy(&NewThreadState, Frame, sizeof(FEXCore::Core::CPUState)); + FEX::HLE::_SyscallHandler->TM.TrackThread(NewThread); - NewThreadState.gregs[FEXCore::X86State::REG_RAX] = 0; - if (GuestArgs->stack == 0) { - // Copies in the original thread's stack - } - else { - NewThreadState.gregs[FEXCore::X86State::REG_RSP] = GuestArgs->stack; - } - - // Overwrite thread - NewThread = FEX::HLE::_SyscallHandler->TM.CreateThread(0, 0, &NewThreadState, GuestArgs->parent_tid); + return NewThread; +} - // CLONE_PARENT_SETTID, CLONE_CHILD_SETTID, CLONE_CHILD_CLEARTID, CLONE_PIDFD will be handled by kernel - // Call execution thread directly since we already are on the new thread - NewThread->StartRunning.NotifyAll(); // Clear the start running flag - CreatedNewThreadObject = true; - } - else{ - // If we don't have CLONE_THREAD then we are effectively a fork - // Clear all the other threads that are being tracked - // Frame->Thread is /ONLY/ safe to access when CLONE_THREAD flag is not set - // Unlock the mutexes on both sides of the fork - FEX::HLE::_SyscallHandler->UnlockAfterFork(Frame->Thread, true); - - ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &CloneArgs->SignalMask, nullptr, sizeof(CloneArgs->SignalMask)); - - Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RAX] = 0; - if (GuestArgs->stack == 0) { - // Copies in the original thread's stack - } - else { - Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP] = GuestArgs->stack; - } - } +uint64_t HandleNewClone(FEXCore::Core::InternalThreadState* Thread, FEXCore::Context::Context* CTX, FEXCore::Core::CpuStateFrame* Frame, + FEX::HLE::clone3_args* CloneArgs) { + auto GuestArgs = &CloneArgs->args; + uint64_t flags = GuestArgs->flags; + auto NewThread = Thread; + bool CreatedNewThreadObject {}; - if (CloneArgs->Type == TYPE_CLONE3) { - // If we are coming from a clone3 handler then we need to adjust RSP. - Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP] += CloneArgs->args.stack_size; - } + if (flags & CLONE_THREAD) { + FEXCore::Core::CPUState NewThreadState {}; + // Clone copies the parent thread's state + memcpy(&NewThreadState, Frame, sizeof(FEXCore::Core::CPUState)); - if (FEX::HLE::_SyscallHandler->Is64BitMode()) { - if (flags & CLONE_SETTLS) { - x64::SetThreadArea(NewThread->CurrentFrame, reinterpret_cast(GuestArgs->tls)); - } - // Set us to start just after the syscall instruction - x64::AdjustRipForNewThread(NewThread->CurrentFrame); - } - else { - if (flags & CLONE_SETTLS) { - x32::SetThreadArea(NewThread->CurrentFrame, reinterpret_cast(GuestArgs->tls)); - } - x32::AdjustRipForNewThread(NewThread->CurrentFrame); + NewThreadState.gregs[FEXCore::X86State::REG_RAX] = 0; + if (GuestArgs->stack == 0) { + // Copies in the original thread's stack + } else { + NewThreadState.gregs[FEXCore::X86State::REG_RSP] = GuestArgs->stack; } - // Depending on clone settings, our TID and PID could have changed - Thread->ThreadManager.TID = FHU::Syscalls::gettid(); - Thread->ThreadManager.PID = ::getpid(); - FEX::HLE::_SyscallHandler->FM.UpdatePID(Thread->ThreadManager.PID); - - if (CreatedNewThreadObject) { - FEX::HLE::_SyscallHandler->TM.TrackThread(Thread); + // Overwrite thread + NewThread = FEX::HLE::_SyscallHandler->TM.CreateThread(0, 0, &NewThreadState, GuestArgs->parent_tid); + + // CLONE_PARENT_SETTID, CLONE_CHILD_SETTID, CLONE_CHILD_CLEARTID, CLONE_PIDFD will be handled by kernel + // Call execution thread directly since we already are on the new thread + NewThread->StartRunning.NotifyAll(); // Clear the start running flag + CreatedNewThreadObject = true; + } else { + // If we don't have CLONE_THREAD then we are effectively a fork + // Clear all the other threads that are being tracked + // Frame->Thread is /ONLY/ safe to access when CLONE_THREAD flag is not set + // Unlock the mutexes on both sides of the fork + FEX::HLE::_SyscallHandler->UnlockAfterFork(Frame->Thread, true); + + ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &CloneArgs->SignalMask, nullptr, sizeof(CloneArgs->SignalMask)); + + Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RAX] = 0; + if (GuestArgs->stack == 0) { + // Copies in the original thread's stack + } else { + Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP] = GuestArgs->stack; } - - // Start exuting the thread directly - // Our host clone starts in a new stack space, so it can't return back to the JIT space - CTX->ExecutionThread(Thread); - - // The rest of the context remains as is and the thread will continue executing - return Thread->StatusCode; } - uint64_t ForkGuest(FEXCore::Core::InternalThreadState *Thread, FEXCore::Core::CpuStateFrame *Frame, uint32_t flags, void *stack, size_t StackSize, pid_t *parent_tid, pid_t *child_tid, void *tls) { - // Just before we fork, we lock all syscall mutexes so that both processes will end up with a locked mutex - - uint64_t Mask{~0ULL}; - ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &Mask, &Mask, sizeof(Mask)); - - FEX::HLE::_SyscallHandler->LockBeforeFork(Frame->Thread); + if (CloneArgs->Type == TYPE_CLONE3) { + // If we are coming from a clone3 handler then we need to adjust RSP. + Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP] += CloneArgs->args.stack_size; + } - const bool IsVFork = flags & CLONE_VFORK; - pid_t Result{}; - int VForkFDs[2]; - if (IsVFork) { - // Use pipes as a mechanism for knowing when the child process is exiting. - // FEX can't use `waitpid` for this since the child process may want to use it. - // If we use `waitpid` then the kernel won't return the same data if asked again. - pipe2(VForkFDs, O_CLOEXEC); - - // XXX: We don't currently support a real `vfork` as it causes problems. - // Currently behaves like a fork (with wait after the fact), which isn't correct. Need to find where the problem is - Result = fork(); - - if (Result == 0) { - // Close the read end of the pipe. - // Keep the write end open so the parent can poll it. - close(VForkFDs[0]); - } - else { - // Close the write end of the pipe. - close(VForkFDs[1]); - } + if (FEX::HLE::_SyscallHandler->Is64BitMode()) { + if (flags & CLONE_SETTLS) { + x64::SetThreadArea(NewThread->CurrentFrame, reinterpret_cast(GuestArgs->tls)); } - else { - Result = fork(); + // Set us to start just after the syscall instruction + x64::AdjustRipForNewThread(NewThread->CurrentFrame); + } else { + if (flags & CLONE_SETTLS) { + x32::SetThreadArea(NewThread->CurrentFrame, reinterpret_cast(GuestArgs->tls)); } - const bool IsChild = Result == 0; - - if (IsChild) { - // Unlock the mutexes on both sides of the fork - FEX::HLE::_SyscallHandler->UnlockAfterFork(Frame->Thread, IsChild); - - ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &Mask, nullptr, sizeof(Mask)); - - // Child - // update the internal TID - Thread->ThreadManager.TID = FHU::Syscalls::gettid(); - Thread->ThreadManager.PID = ::getpid(); - FEX::HLE::_SyscallHandler->FM.UpdatePID(Thread->ThreadManager.PID); - Thread->ThreadManager.clear_child_tid = nullptr; - - // only a single thread running so no need to remove anything from the thread array - - // Handle child setup now - if (stack != nullptr) { - // use specified stack - Frame->State.gregs[FEXCore::X86State::REG_RSP] = reinterpret_cast(stack) + StackSize; - } else { - // In the case of fork and nullptr stack then the child uses the same stack space as the parent - // Same virtual address, different addressspace - } + x32::AdjustRipForNewThread(NewThread->CurrentFrame); + } - if (FEX::HLE::_SyscallHandler->Is64BitMode()) { - if (flags & CLONE_SETTLS) { - x64::SetThreadArea(Frame, tls); - } - } - else { - // 32bit TLS doesn't just set the fs register - if (flags & CLONE_SETTLS) { - x32::SetThreadArea(Frame, tls); - } - } + // Depending on clone settings, our TID and PID could have changed + Thread->ThreadManager.TID = FHU::Syscalls::gettid(); + Thread->ThreadManager.PID = ::getpid(); + FEX::HLE::_SyscallHandler->FM.UpdatePID(Thread->ThreadManager.PID); - // Sets the child TID to the pointer in ChildTID - if (flags & CLONE_CHILD_SETTID) { - Thread->ThreadManager.set_child_tid = child_tid; - *child_tid = Thread->ThreadManager.TID; - } + if (CreatedNewThreadObject) { + FEX::HLE::_SyscallHandler->TM.TrackThread(Thread); + } - // When the thread exits, clear the child thread ID at ChildTID - // Additionally wakeup a futex at that address - // Address /may/ be changed with SET_TID_ADDRESS syscall - if (flags & CLONE_CHILD_CLEARTID) { - Thread->ThreadManager.clear_child_tid = child_tid; - } + // Start exuting the thread directly + // Our host clone starts in a new stack space, so it can't return back to the JIT space + CTX->ExecutionThread(Thread); - // the rest of the context remains as is, this thread will keep executing - return 0; - } else { - if (Result != -1) { - if (flags & CLONE_PARENT_SETTID) { - *parent_tid = Result; - } - } + // The rest of the context remains as is and the thread will continue executing + return Thread->StatusCode; +} - // Unlock the mutexes on both sides of the fork - FEX::HLE::_SyscallHandler->UnlockAfterFork(Frame->Thread, IsChild); +uint64_t ForkGuest(FEXCore::Core::InternalThreadState* Thread, FEXCore::Core::CpuStateFrame* Frame, uint32_t flags, void* stack, + size_t StackSize, pid_t* parent_tid, pid_t* child_tid, void* tls) { + // Just before we fork, we lock all syscall mutexes so that both processes will end up with a locked mutex - ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &Mask, nullptr, sizeof(Mask)); + uint64_t Mask {~0ULL}; + ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &Mask, &Mask, sizeof(Mask)); - // VFork needs the parent to wait for the child to exit. - if (IsVFork) { - // Wait for the read end of the pipe to close. - pollfd PollFD{}; - PollFD.fd = VForkFDs[0]; - PollFD.events = POLLIN | POLLOUT | POLLRDHUP | POLLERR | POLLHUP | POLLNVAL; + FEX::HLE::_SyscallHandler->LockBeforeFork(Frame->Thread); - // Mask all signals until the child process returns. - sigset_t SignalMask{}; - sigfillset(&SignalMask); - while (ppoll(&PollFD, 1, nullptr, &SignalMask) == -1 && errno == EINTR); + const bool IsVFork = flags & CLONE_VFORK; + pid_t Result {}; + int VForkFDs[2]; + if (IsVFork) { + // Use pipes as a mechanism for knowing when the child process is exiting. + // FEX can't use `waitpid` for this since the child process may want to use it. + // If we use `waitpid` then the kernel won't return the same data if asked again. + pipe2(VForkFDs, O_CLOEXEC); - // Close the read end now. - close(VForkFDs[0]); - } + // XXX: We don't currently support a real `vfork` as it causes problems. + // Currently behaves like a fork (with wait after the fact), which isn't correct. Need to find where the problem is + Result = fork(); - // Parent - SYSCALL_ERRNO(); + if (Result == 0) { + // Close the read end of the pipe. + // Keep the write end open so the parent can poll it. + close(VForkFDs[0]); + } else { + // Close the write end of the pipe. + close(VForkFDs[1]); } + } else { + Result = fork(); } + const bool IsChild = Result == 0; - void RegisterThread(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; + if (IsChild) { + // Unlock the mutexes on both sides of the fork + FEX::HLE::_SyscallHandler->UnlockAfterFork(Frame->Thread, IsChild); - REGISTER_SYSCALL_IMPL(rt_sigreturn, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - FEX::HLE::_SyscallHandler->GetSignalDelegator()->HandleSignalHandlerReturn(true); - FEX_UNREACHABLE; - }); + ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &Mask, nullptr, sizeof(Mask)); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(getpid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = ::getpid(); - SYSCALL_ERRNO(); - }); + // Child + // update the internal TID + Thread->ThreadManager.TID = FHU::Syscalls::gettid(); + Thread->ThreadManager.PID = ::getpid(); + FEX::HLE::_SyscallHandler->FM.UpdatePID(Thread->ThreadManager.PID); + Thread->ThreadManager.clear_child_tid = nullptr; - REGISTER_SYSCALL_IMPL_FLAGS(fork, SyscallFlags::DEFAULT, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - return ForkGuest(Frame->Thread, Frame, 0, 0, 0, 0, 0, 0); - }); + // only a single thread running so no need to remove anything from the thread array - REGISTER_SYSCALL_IMPL_FLAGS(vfork, SyscallFlags::DEFAULT, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - return ForkGuest(Frame->Thread, Frame, CLONE_VFORK, 0, 0, 0, 0, 0); - }); + // Handle child setup now + if (stack != nullptr) { + // use specified stack + Frame->State.gregs[FEXCore::X86State::REG_RSP] = reinterpret_cast(stack) + StackSize; + } else { + // In the case of fork and nullptr stack then the child uses the same stack space as the parent + // Same virtual address, different addressspace + } - REGISTER_SYSCALL_IMPL_FLAGS(clone3, SyscallFlags::DEFAULT, ([](FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::kernel_clone3_args *cl_args, size_t size) -> uint64_t { - FEX::HLE::clone3_args args{}; - args.Type = TypeOfClone::TYPE_CLONE3; - memcpy(&args.args, cl_args, std::min(sizeof(FEX::HLE::kernel_clone3_args), size)); - return CloneHandler(Frame, &args); - })); - - REGISTER_SYSCALL_IMPL_FLAGS(exit, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY | SyscallFlags::NORETURN, - [](FEXCore::Core::CpuStateFrame *Frame, int status) -> uint64_t { - auto Thread = Frame->Thread; - - // TLS/DTV teardown is something FEX can't control. Disable glibc checking when we leave a pthread. - // Since this thread is hard stopping, we can't track the TLS/DTV teardown in FEX's thread handling. - FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator::HardDisable(); - - if (Thread->ThreadManager.clear_child_tid) { - std::atomic *Addr = reinterpret_cast*>(Thread->ThreadManager.clear_child_tid); - Addr->store(0); - syscall(SYSCALL_DEF(futex), - Thread->ThreadManager.clear_child_tid, - FUTEX_WAKE, - ~0ULL, - 0, - 0, - 0); + if (FEX::HLE::_SyscallHandler->Is64BitMode()) { + if (flags & CLONE_SETTLS) { + x64::SetThreadArea(Frame, tls); } + } else { + // 32bit TLS doesn't just set the fs register + if (flags & CLONE_SETTLS) { + x32::SetThreadArea(Frame, tls); + } + } - Thread->StatusCode = status; - FEX::HLE::_SyscallHandler->TM.StopThread(Thread); - - return 0; - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(kill, SyscallFlags::DEFAULT, [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, int sig) -> uint64_t { - uint64_t Result = ::kill(pid, sig); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(tkill, SyscallFlags::DEFAULT, [](FEXCore::Core::CpuStateFrame *Frame, int tid, int sig) -> uint64_t { - // Can't actually use tgkill here, kernel rejects tgkill of tgid == 0 - uint64_t Result = ::syscall(SYSCALL_DEF(tkill), tid, sig); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(tgkill, SyscallFlags::DEFAULT, [](FEXCore::Core::CpuStateFrame *Frame, int tgid, int tid, int sig) -> uint64_t { - uint64_t Result = FHU::Syscalls::tgkill(tgid, tid, sig); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(getuid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = ::getuid(); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(getgid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = ::getgid(); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(setuid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, uid_t uid) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(setuid), uid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(setgid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, gid_t gid) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(setgid), gid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(geteuid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = ::geteuid(); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(getegid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = ::getegid(); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(getppid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = ::getppid(); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(getpgrp, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = ::getpgrp(); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(setsid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = ::setsid(); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(setreuid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, uid_t ruid, uid_t euid) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(setreuid), ruid, euid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(setregid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, gid_t rgid, gid_t egid) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(setregid), rgid, egid); - SYSCALL_ERRNO(); - }); + // Sets the child TID to the pointer in ChildTID + if (flags & CLONE_CHILD_SETTID) { + Thread->ThreadManager.set_child_tid = child_tid; + *child_tid = Thread->ThreadManager.TID; + } - REGISTER_SYSCALL_IMPL_PASS_FLAGS(getgroups, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int size, gid_t list[]) -> uint64_t { - uint64_t Result = ::getgroups(size, list); - SYSCALL_ERRNO(); - }); + // When the thread exits, clear the child thread ID at ChildTID + // Additionally wakeup a futex at that address + // Address /may/ be changed with SET_TID_ADDRESS syscall + if (flags & CLONE_CHILD_CLEARTID) { + Thread->ThreadManager.clear_child_tid = child_tid; + } - REGISTER_SYSCALL_IMPL_PASS_FLAGS(setgroups, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, size_t size, const gid_t *list) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(setgroups), size, list); - SYSCALL_ERRNO(); - }); + // the rest of the context remains as is, this thread will keep executing + return 0; + } else { + if (Result != -1) { + if (flags & CLONE_PARENT_SETTID) { + *parent_tid = Result; + } + } - REGISTER_SYSCALL_IMPL_PASS_FLAGS(setresuid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, uid_t ruid, uid_t euid, uid_t suid) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(setresuid), ruid, euid, suid); - SYSCALL_ERRNO(); - }); + // Unlock the mutexes on both sides of the fork + FEX::HLE::_SyscallHandler->UnlockAfterFork(Frame->Thread, IsChild); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(getresuid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, uid_t *ruid, uid_t *euid, uid_t *suid) -> uint64_t { - uint64_t Result = ::getresuid(ruid, euid, suid); - SYSCALL_ERRNO(); - }); + ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &Mask, nullptr, sizeof(Mask)); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(setresgid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, gid_t rgid, gid_t egid, gid_t sgid) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(setresgid), rgid, egid, sgid); - SYSCALL_ERRNO(); - }); + // VFork needs the parent to wait for the child to exit. + if (IsVFork) { + // Wait for the read end of the pipe to close. + pollfd PollFD {}; + PollFD.fd = VForkFDs[0]; + PollFD.events = POLLIN | POLLOUT | POLLRDHUP | POLLERR | POLLHUP | POLLNVAL; + + // Mask all signals until the child process returns. + sigset_t SignalMask {}; + sigfillset(&SignalMask); + while (ppoll(&PollFD, 1, nullptr, &SignalMask) == -1 && errno == EINTR) + ; + + // Close the read end now. + close(VForkFDs[0]); + } - REGISTER_SYSCALL_IMPL_PASS_FLAGS(getresgid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, gid_t *rgid, gid_t *egid, gid_t *sgid) -> uint64_t { - uint64_t Result = ::getresgid(rgid, egid, sgid); - SYSCALL_ERRNO(); - }); + // Parent + SYSCALL_ERRNO(); + } +} - REGISTER_SYSCALL_IMPL_PASS_FLAGS(personality, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, uint64_t persona) -> uint64_t { - uint64_t Result = ::personality(persona); - SYSCALL_ERRNO(); - }); +void RegisterThread(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; + + REGISTER_SYSCALL_IMPL(rt_sigreturn, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + FEX::HLE::_SyscallHandler->GetSignalDelegator()->HandleSignalHandlerReturn(true); + FEX_UNREACHABLE; + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(getpid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = ::getpid(); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(fork, SyscallFlags::DEFAULT, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + return ForkGuest(Frame->Thread, Frame, 0, 0, 0, 0, 0, 0); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(vfork, SyscallFlags::DEFAULT, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + return ForkGuest(Frame->Thread, Frame, CLONE_VFORK, 0, 0, 0, 0, 0); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(clone3, SyscallFlags::DEFAULT, + ([](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::kernel_clone3_args* cl_args, size_t size) -> uint64_t { + FEX::HLE::clone3_args args {}; + args.Type = TypeOfClone::TYPE_CLONE3; + memcpy(&args.args, cl_args, std::min(sizeof(FEX::HLE::kernel_clone3_args), size)); + return CloneHandler(Frame, &args); + })); + + REGISTER_SYSCALL_IMPL_FLAGS(exit, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY | SyscallFlags::NORETURN, + [](FEXCore::Core::CpuStateFrame* Frame, int status) -> uint64_t { + auto Thread = Frame->Thread; + + // TLS/DTV teardown is something FEX can't control. Disable glibc checking when we leave a pthread. + // Since this thread is hard stopping, we can't track the TLS/DTV teardown in FEX's thread handling. + FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator::HardDisable(); + + if (Thread->ThreadManager.clear_child_tid) { + std::atomic* Addr = reinterpret_cast*>(Thread->ThreadManager.clear_child_tid); + Addr->store(0); + syscall(SYSCALL_DEF(futex), Thread->ThreadManager.clear_child_tid, FUTEX_WAKE, ~0ULL, 0, 0, 0); + } - REGISTER_SYSCALL_IMPL_FLAGS(prctl, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) -> uint64_t { - uint64_t Result{}; + Thread->StatusCode = status; + FEX::HLE::_SyscallHandler->TM.StopThread(Thread); + + return 0; + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(kill, SyscallFlags::DEFAULT, [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, int sig) -> uint64_t { + uint64_t Result = ::kill(pid, sig); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(tkill, SyscallFlags::DEFAULT, [](FEXCore::Core::CpuStateFrame* Frame, int tid, int sig) -> uint64_t { + // Can't actually use tgkill here, kernel rejects tgkill of tgid == 0 + uint64_t Result = ::syscall(SYSCALL_DEF(tkill), tid, sig); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(tgkill, SyscallFlags::DEFAULT, [](FEXCore::Core::CpuStateFrame* Frame, int tgid, int tid, int sig) -> uint64_t { + uint64_t Result = FHU::Syscalls::tgkill(tgid, tid, sig); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(getuid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = ::getuid(); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(getgid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = ::getgid(); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(setuid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, uid_t uid) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(setuid), uid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(setgid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, gid_t gid) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(setgid), gid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(geteuid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = ::geteuid(); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(getegid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = ::getegid(); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(getppid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = ::getppid(); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(getpgrp, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = ::getpgrp(); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(setsid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = ::setsid(); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(setreuid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, uid_t ruid, uid_t euid) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(setreuid), ruid, euid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(setregid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, gid_t rgid, gid_t egid) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(setregid), rgid, egid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(getgroups, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int size, gid_t list[]) -> uint64_t { + uint64_t Result = ::getgroups(size, list); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(setgroups, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, size_t size, const gid_t* list) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(setgroups), size, list); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(setresuid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, uid_t ruid, uid_t euid, uid_t suid) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(setresuid), ruid, euid, suid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(getresuid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, uid_t* ruid, uid_t* euid, uid_t* suid) -> uint64_t { + uint64_t Result = ::getresuid(ruid, euid, suid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(setresgid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, gid_t rgid, gid_t egid, gid_t sgid) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(setresgid), rgid, egid, sgid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(getresgid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, gid_t* rgid, gid_t* egid, gid_t* sgid) -> uint64_t { + uint64_t Result = ::getresgid(rgid, egid, sgid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(personality, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, uint64_t persona) -> uint64_t { + uint64_t Result = ::personality(persona); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(prctl, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) -> uint64_t { + uint64_t Result {}; #ifndef PR_GET_AUXV #define PR_GET_AUXV 0x41555856 #endif - switch (option) { - case PR_SET_SECCOMP: - case PR_GET_SECCOMP: - // FEX doesn't support seccomp + switch (option) { + case PR_SET_SECCOMP: + case PR_GET_SECCOMP: + // FEX doesn't support seccomp + return -EINVAL; + break; + case PR_GET_AUXV: { + if (arg4 || arg5) { return -EINVAL; - break; - case PR_GET_AUXV: { - if (arg4 || arg5) { - return -EINVAL; - } + } - void* addr = reinterpret_cast(arg2); - size_t UserSize = reinterpret_cast(arg3); + void* addr = reinterpret_cast(arg2); + size_t UserSize = reinterpret_cast(arg3); - uint64_t auxvBase=0; - uint64_t auxvSize=0; - FEX::HLE::_SyscallHandler->GetCodeLoader()->GetAuxv(auxvBase, auxvSize); - size_t MinSize = std::min(auxvSize, UserSize); + uint64_t auxvBase = 0; + uint64_t auxvSize = 0; + FEX::HLE::_SyscallHandler->GetCodeLoader()->GetAuxv(auxvBase, auxvSize); + size_t MinSize = std::min(auxvSize, UserSize); - memcpy(addr, reinterpret_cast(auxvBase), MinSize); + memcpy(addr, reinterpret_cast(auxvBase), MinSize); - // Returns the size of auxv without truncation. - return auxvSize; + // Returns the size of auxv without truncation. + return auxvSize; + } + default: Result = ::prctl(option, arg2, arg3, arg4, arg5); break; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_FLAGS(arch_prctl, SyscallFlags::DEFAULT, [](FEXCore::Core::CpuStateFrame* Frame, int code, unsigned long addr) -> uint64_t { + constexpr uint64_t TASK_MAX = (1ULL << 48); // 48-bits until we can query the host side VA sanely. AArch64 doesn't expose this in cpuinfo + uint64_t Result {}; + switch (code) { + case 0x1001: // ARCH_SET_GS + if (addr >= TASK_MAX) { + // Ignore a non-canonical address + return -EPERM; } - default: - Result = ::prctl(option, arg2, arg3, arg4, arg5); + Frame->State.gs_cached = addr; + Result = 0; break; + case 0x1002: // ARCH_SET_FS + if (addr >= TASK_MAX) { + // Ignore a non-canonical address + return -EPERM; } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_FLAGS(arch_prctl, SyscallFlags::DEFAULT, - [](FEXCore::Core::CpuStateFrame *Frame, int code, unsigned long addr) -> uint64_t { - constexpr uint64_t TASK_MAX = (1ULL << 48); // 48-bits until we can query the host side VA sanely. AArch64 doesn't expose this in cpuinfo - uint64_t Result{}; - switch (code) { - case 0x1001: // ARCH_SET_GS - if (addr >= TASK_MAX) { - // Ignore a non-canonical address - return -EPERM; - } - Frame->State.gs_cached = addr; - Result = 0; - break; - case 0x1002: // ARCH_SET_FS - if (addr >= TASK_MAX) { - // Ignore a non-canonical address - return -EPERM; - } - Frame->State.fs_cached = addr; - Result = 0; - break; - case 0x1003: // ARCH_GET_FS - *reinterpret_cast(addr) = Frame->State.fs_cached; - Result = 0; - break; - case 0x1004: // ARCH_GET_GS - *reinterpret_cast(addr) = Frame->State.gs_cached; - Result = 0; - break; - case 0x3001: // ARCH_CET_STATUS - Result = -EINVAL; // We don't support CET, return EINVAL - break; - case 0x1011: // ARCH_GET_CPUID - return 1; - break; - case 0x1012: // ARCH_SET_CPUID - return -ENODEV; // Claim we don't support faulting on CPUID - break; - default: - LogMan::Msg::EFmt("Unknown prctl: 0x{:x}", code); - Result = -EINVAL; - break; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(gettid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = FHU::Syscalls::gettid(); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(set_tid_address, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int *tidptr) -> uint64_t { - auto Thread = Frame->Thread; - Thread->ThreadManager.clear_child_tid = tidptr; - return Thread->ThreadManager.GetTID(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(exit_group, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY | SyscallFlags::NORETURN, - [](FEXCore::Core::CpuStateFrame *Frame, int status) -> uint64_t { - - // Save telemetry if we're exiting. - FEX::HLE::_SyscallHandler->GetSignalDelegator()->SaveTelemetry(); - - syscall(SYSCALL_DEF(exit_group), status); - // This will never be reached - std::terminate(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(prlimit_64, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, int resource, const struct rlimit *new_limit, struct rlimit *old_limit) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(prlimit_64), pid, resource, new_limit, old_limit); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(setpgid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, pid_t pgid) -> uint64_t { - uint64_t Result = ::setpgid(pid, pgid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(getpgid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid) -> uint64_t { - uint64_t Result = ::getpgid(pid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(setfsuid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, uid_t fsuid) -> uint64_t { - uint64_t Result = ::setfsuid(fsuid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(setfsgid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, uid_t fsgid) -> uint64_t { - uint64_t Result = ::setfsgid(fsgid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(getsid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid) -> uint64_t { - uint64_t Result = ::getsid(pid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(unshare, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int flags) -> uint64_t { - uint64_t Result = ::unshare(flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_PASS_FLAGS(setns, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int fd, int nstype) -> uint64_t { - uint64_t Result = ::setns(fd, nstype); - SYSCALL_ERRNO(); - }); - - if (Handler->IsHostKernelVersionAtLeast(5, 16, 0)) { - REGISTER_SYSCALL_IMPL_PASS_FLAGS(futex_waitv, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, void *waiters, uint32_t nr_futexes, uint32_t flags, struct timespec *timeout, clockid_t clockid) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(futex_waitv), waiters, nr_futexes, flags, timeout, clockid); - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL(futex_waitv, UnimplementedSyscallSafe); + Frame->State.fs_cached = addr; + Result = 0; + break; + case 0x1003: // ARCH_GET_FS + *reinterpret_cast(addr) = Frame->State.fs_cached; + Result = 0; + break; + case 0x1004: // ARCH_GET_GS + *reinterpret_cast(addr) = Frame->State.gs_cached; + Result = 0; + break; + case 0x3001: // ARCH_CET_STATUS + Result = -EINVAL; // We don't support CET, return EINVAL + break; + case 0x1011: // ARCH_GET_CPUID + return 1; + break; + case 0x1012: // ARCH_SET_CPUID + return -ENODEV; // Claim we don't support faulting on CPUID + break; + default: + LogMan::Msg::EFmt("Unknown prctl: 0x{:x}", code); + Result = -EINVAL; + break; } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(gettid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = FHU::Syscalls::gettid(); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(set_tid_address, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int* tidptr) -> uint64_t { + auto Thread = Frame->Thread; + Thread->ThreadManager.clear_child_tid = tidptr; + return Thread->ThreadManager.GetTID(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(exit_group, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY | SyscallFlags::NORETURN, + [](FEXCore::Core::CpuStateFrame* Frame, int status) -> uint64_t { + // Save telemetry if we're exiting. + FEX::HLE::_SyscallHandler->GetSignalDelegator()->SaveTelemetry(); + + syscall(SYSCALL_DEF(exit_group), status); + // This will never be reached + std::terminate(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS( + prlimit_64, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, int resource, const struct rlimit* new_limit, struct rlimit* old_limit) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(prlimit_64), pid, resource, new_limit, old_limit); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(setpgid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, pid_t pgid) -> uint64_t { + uint64_t Result = ::setpgid(pid, pgid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(getpgid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid) -> uint64_t { + uint64_t Result = ::getpgid(pid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(setfsuid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, uid_t fsuid) -> uint64_t { + uint64_t Result = ::setfsuid(fsuid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(setfsgid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, uid_t fsgid) -> uint64_t { + uint64_t Result = ::setfsgid(fsgid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(getsid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid) -> uint64_t { + uint64_t Result = ::getsid(pid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(unshare, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int flags) -> uint64_t { + uint64_t Result = ::unshare(flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_PASS_FLAGS(setns, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, int nstype) -> uint64_t { + uint64_t Result = ::setns(fd, nstype); + SYSCALL_ERRNO(); + }); + + if (Handler->IsHostKernelVersionAtLeast(5, 16, 0)) { + REGISTER_SYSCALL_IMPL_PASS_FLAGS(futex_waitv, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, void* waiters, uint32_t nr_futexes, uint32_t flags, + struct timespec* timeout, clockid_t clockid) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(futex_waitv), waiters, nr_futexes, flags, timeout, clockid); + SYSCALL_ERRNO(); + }); + } else { + REGISTER_SYSCALL_IMPL(futex_waitv, UnimplementedSyscallSafe); } } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Time.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Time.cpp index 3a465bfe6f..c1beee853a 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Time.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Time.cpp @@ -14,13 +14,13 @@ tags: LinuxSyscalls|syscalls-shared #include namespace FEX::HLE { - void RegisterTime(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; +void RegisterTime(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; - REGISTER_SYSCALL_IMPL_PASS_FLAGS(pause, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = ::pause(); - SYSCALL_ERRNO(); - }); - } + REGISTER_SYSCALL_IMPL_PASS_FLAGS(pause, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = ::pause(); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Timer.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Timer.cpp index 6953a95778..424ab6841d 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Timer.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Timer.cpp @@ -21,25 +21,25 @@ tags: LinuxSyscalls|syscalls-shared #include namespace FEX::HLE { - void RegisterTimer(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; +void RegisterTimer(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; - REGISTER_SYSCALL_IMPL_PASS_FLAGS(alarm, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, unsigned int seconds) -> uint64_t { - uint64_t Result = ::alarm(seconds); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(alarm, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, unsigned int seconds) -> uint64_t { + uint64_t Result = ::alarm(seconds); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(timer_getoverrun, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, kernel_timer_t timerid) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(timer_getoverrun), timerid); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_PASS_FLAGS(timer_getoverrun, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, kernel_timer_t timerid) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(timer_getoverrun), timerid); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_PASS_FLAGS(timer_delete, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, kernel_timer_t timerid) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(timer_delete), timerid); - SYSCALL_ERRNO(); - }); - } + REGISTER_SYSCALL_IMPL_PASS_FLAGS(timer_delete, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, kernel_timer_t timerid) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(timer_delete), timerid); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp index 9f6248dad6..6f5d15c35d 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp @@ -24,7 +24,7 @@ desc: SMC/MMan Tracking namespace FEX::HLE { /// Helpers /// -auto SyscallHandler::VMAProt::fromProt (int Prot) -> VMAProt { +auto SyscallHandler::VMAProt::fromProt(int Prot) -> VMAProt { return VMAProt { .Readable = (Prot & PROT_READ) != 0, .Writable = (Prot & PROT_WRITE) != 0, @@ -32,7 +32,7 @@ auto SyscallHandler::VMAProt::fromProt (int Prot) -> VMAProt { }; } -auto SyscallHandler::VMAProt::fromSHM (int SHMFlg) -> VMAProt { +auto SyscallHandler::VMAProt::fromSHM(int SHMFlg) -> VMAProt { return VMAProt { .Readable = true, .Writable = SHMFlg & SHM_RDONLY ? false : true, @@ -47,8 +47,8 @@ auto SyscallHandler::VMAFlags::fromFlags(int Flags) -> VMAFlags { } // SMC interactions -bool SyscallHandler::HandleSegfault(FEXCore::Core::InternalThreadState *Thread, int Signal, void *info, void *ucontext) { - const auto FaultAddress = (uintptr_t)((siginfo_t *)info)->si_addr; +bool SyscallHandler::HandleSegfault(FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) { + const auto FaultAddress = (uintptr_t)((siginfo_t*)info)->si_addr; { // Can't use the deferred signal lock in the SIGSEGV handler. @@ -81,7 +81,7 @@ bool SyscallHandler::HandleSegfault(FEXCore::Core::InternalThreadState *Thread, if (VMA->Prot.Writable) { _SyscallHandler->TM.InvalidateGuestCodeRange(Thread, FaultBaseMirrored, FHU::FEX_PAGE_SIZE, [](uintptr_t Start, uintptr_t Length) { - auto rv = mprotect((void *)Start, Length, PROT_READ | PROT_WRITE); + auto rv = mprotect((void*)Start, Length, PROT_READ | PROT_WRITE); LogMan::Throw::AAFmt(rv == 0, "mprotect({}, {}) failed", Start, Length); }); } else { @@ -91,7 +91,7 @@ bool SyscallHandler::HandleSegfault(FEXCore::Core::InternalThreadState *Thread, } while ((VMA = VMA->ResourceNextVMA)); } else { _SyscallHandler->TM.InvalidateGuestCodeRange(Thread, FaultBase, FHU::FEX_PAGE_SIZE, [](uintptr_t Start, uintptr_t Length) { - auto rv = mprotect((void *)Start, Length, PROT_READ | PROT_WRITE); + auto rv = mprotect((void*)Start, Length, PROT_READ | PROT_WRITE); LogMan::Throw::AAFmt(rv == 0, "mprotect({}, {}) failed", Start, Length); }); } @@ -100,7 +100,7 @@ bool SyscallHandler::HandleSegfault(FEXCore::Core::InternalThreadState *Thread, } } -void SyscallHandler::MarkGuestExecutableRange(FEXCore::Core::InternalThreadState *Thread, uint64_t Start, uint64_t Length) { +void SyscallHandler::MarkGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) { const auto Base = Start & FHU::FEX_PAGE_MASK; const auto Top = FEXCore::AlignUp(Start + Length, FHU::FEX_PAGE_SIZE); @@ -147,13 +147,13 @@ void SyscallHandler::MarkGuestExecutableRange(FEXCore::Core::InternalThreadState const auto MirroredBase = std::max(VMAOffsetBase, OffsetBase); const auto MirroredSize = std::min(OffsetTop, VMAOffsetTop) - MirroredBase; - auto rv = mprotect((void *)(MirroredBase - VMAOffsetBase + VMABase), MirroredSize, PROT_READ); + auto rv = mprotect((void*)(MirroredBase - VMAOffsetBase + VMABase), MirroredSize, PROT_READ); LogMan::Throw::AAFmt(rv == 0, "mprotect({}, {}) failed", MirroredBase, MirroredSize); } } while ((VMA = VMA->ResourceNextVMA)); } else if (Mapping->second.Prot.Writable) { - int rv = mprotect((void *)ProtectBase, ProtectSize, PROT_READ); + int rv = mprotect((void*)ProtectBase, ProtectSize, PROT_READ); LogMan::Throw::AAFmt(rv == 0, "mprotect({}, {}) failed", ProtectBase, ProtectSize); } @@ -163,7 +163,7 @@ void SyscallHandler::MarkGuestExecutableRange(FEXCore::Core::InternalThreadState } // Used for AOT -FEXCore::HLE::AOTIRCacheEntryLookupResult SyscallHandler::LookupAOTIRCacheEntry(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestAddr) { +FEXCore::HLE::AOTIRCacheEntryLookupResult SyscallHandler::LookupAOTIRCacheEntry(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestAddr) { auto lk = FEXCore::GuardSignalDeferringSection(VMATracking.Mutex, Thread); // Get the first mapping after GuestAddr, or end @@ -174,14 +174,12 @@ FEXCore::HLE::AOTIRCacheEntryLookupResult SyscallHandler::LookupAOTIRCacheEntry( return {nullptr, 0}; } - return { - Entry->second.Resource ? Entry->second.Resource->AOTIRCacheEntry : nullptr, - Entry->second.Base - Entry->second.Offset - }; + return {Entry->second.Resource ? Entry->second.Resource->AOTIRCacheEntry : nullptr, Entry->second.Base - Entry->second.Offset}; } // MMan Tracking -void SyscallHandler::TrackMmap(FEXCore::Core::InternalThreadState *Thread, uintptr_t Base, uintptr_t Size, int Prot, int Flags, int fd, off_t Offset) { +void SyscallHandler::TrackMmap(FEXCore::Core::InternalThreadState* Thread, uintptr_t Base, uintptr_t Size, int Prot, int Flags, int fd, + off_t Offset) { Size = FEXCore::AlignUp(Size, FHU::FEX_PAGE_SIZE); if (Flags & MAP_SHARED) { @@ -196,7 +194,7 @@ void SyscallHandler::TrackMmap(FEXCore::Core::InternalThreadState *Thread, uintp static uint64_t AnonSharedId = 1; - MappedResource *Resource = nullptr; + MappedResource* Resource = nullptr; if (!(Flags & MAP_ANONYMOUS)) { struct stat64 buf; @@ -217,9 +215,9 @@ void SyscallHandler::TrackMmap(FEXCore::Core::InternalThreadState *Thread, uintp } } } else if (Flags & MAP_SHARED) { - MRID mrid{SpecialDev::Anon, AnonSharedId++}; + MRID mrid {SpecialDev::Anon, AnonSharedId++}; - auto [Iter, Inserted] = VMATracking.MappedResources.emplace(mrid, MappedResource{nullptr, nullptr, 0}); + auto [Iter, Inserted] = VMATracking.MappedResources.emplace(mrid, MappedResource {nullptr, nullptr, 0}); LOGMAN_THROW_AA_FMT(Inserted == true, "VMA tracking error"); Resource = &Iter->second; Resource->Iterator = Iter; @@ -236,7 +234,7 @@ void SyscallHandler::TrackMmap(FEXCore::Core::InternalThreadState *Thread, uintp } } -void SyscallHandler::TrackMunmap(FEXCore::Core::InternalThreadState *Thread, uintptr_t Base, uintptr_t Size) { +void SyscallHandler::TrackMunmap(FEXCore::Core::InternalThreadState* Thread, uintptr_t Base, uintptr_t Size) { Size = FEXCore::AlignUp(Size, FHU::FEX_PAGE_SIZE); { @@ -253,7 +251,7 @@ void SyscallHandler::TrackMunmap(FEXCore::Core::InternalThreadState *Thread, uin } } -void SyscallHandler::TrackMprotect(FEXCore::Core::InternalThreadState *Thread, uintptr_t Base, uintptr_t Size, int Prot) { +void SyscallHandler::TrackMprotect(FEXCore::Core::InternalThreadState* Thread, uintptr_t Base, uintptr_t Size, int Prot) { Size = FEXCore::AlignUp(Size, FHU::FEX_PAGE_SIZE); { @@ -267,7 +265,8 @@ void SyscallHandler::TrackMprotect(FEXCore::Core::InternalThreadState *Thread, u } } -void SyscallHandler::TrackMremap(FEXCore::Core::InternalThreadState *Thread, uintptr_t OldAddress, size_t OldSize, size_t NewSize, int flags, uintptr_t NewAddress) { +void SyscallHandler::TrackMremap(FEXCore::Core::InternalThreadState* Thread, uintptr_t OldAddress, size_t OldSize, size_t NewSize, + int flags, uintptr_t NewAddress) { OldSize = FEXCore::AlignUp(OldSize, FHU::FEX_PAGE_SIZE); NewSize = FEXCore::AlignUp(NewSize, FHU::FEX_PAGE_SIZE); @@ -319,7 +318,7 @@ void SyscallHandler::TrackMremap(FEXCore::Core::InternalThreadState *Thread, uin } } -void SyscallHandler::TrackShmat(FEXCore::Core::InternalThreadState *Thread, int shmid, uintptr_t Base, int shmflg) { +void SyscallHandler::TrackShmat(FEXCore::Core::InternalThreadState* Thread, int shmid, uintptr_t Base, int shmflg) { CTX->MarkMemoryShared(Thread); shmid_ds stat; @@ -333,7 +332,7 @@ void SyscallHandler::TrackShmat(FEXCore::Core::InternalThreadState *Thread, int auto lk = FEXCore::GuardSignalDeferringSection(VMATracking.Mutex, Thread); // TODO - MRID mrid{SpecialDev::SHM, static_cast(shmid)}; + MRID mrid {SpecialDev::SHM, static_cast(shmid)}; auto ResourceInserted = VMATracking.MappedResources.insert({mrid, {nullptr, nullptr, Length}}); auto Resource = &ResourceInserted.first->second; @@ -341,15 +340,14 @@ void SyscallHandler::TrackShmat(FEXCore::Core::InternalThreadState *Thread, int Resource->Iterator = ResourceInserted.first; } VMATracking.SetUnsafe(CTX, Resource, Base, 0, Length, VMAFlags::fromFlags(MAP_SHARED), - VMAProt::fromProt((shmflg & SHM_RDONLY) ? PROT_READ : (PROT_READ | PROT_WRITE)) - ); + VMAProt::fromProt((shmflg & SHM_RDONLY) ? PROT_READ : (PROT_READ | PROT_WRITE))); } if (SMCChecks != FEXCore::Config::CONFIG_SMC_NONE) { _SyscallHandler->TM.InvalidateGuestCodeRange(Thread, Base, Length); } } -void SyscallHandler::TrackShmdt(FEXCore::Core::InternalThreadState *Thread, uintptr_t Base) { +void SyscallHandler::TrackShmdt(FEXCore::Core::InternalThreadState* Thread, uintptr_t Base) { uintptr_t Length = 0; { auto lk = FEXCore::GuardSignalDeferringSection(VMATracking.Mutex, Thread); @@ -363,7 +361,7 @@ void SyscallHandler::TrackShmdt(FEXCore::Core::InternalThreadState *Thread, uint } } -void SyscallHandler::TrackMadvise(FEXCore::Core::InternalThreadState *Thread, uintptr_t Base, uintptr_t Size, int advice) { +void SyscallHandler::TrackMadvise(FEXCore::Core::InternalThreadState* Thread, uintptr_t Base, uintptr_t Size, int advice) { Size = FEXCore::AlignUp(Size, FHU::FEX_PAGE_SIZE); { auto lk = FEXCore::GuardSignalDeferringSection(VMATracking.Mutex, Thread); @@ -371,4 +369,4 @@ void SyscallHandler::TrackMadvise(FEXCore::Core::InternalThreadState *Thread, ui } } -} +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.cpp index f60940ede8..ab40d17d03 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.cpp @@ -12,7 +12,7 @@ desc: VMA Tracking namespace FEX::HLE { /// List Operations /// -inline void SyscallHandler::VMATracking::ListCheckVMALinks(VMAEntry *VMA) { +inline void SyscallHandler::VMATracking::ListCheckVMALinks(VMAEntry* VMA) { if (VMA) { LOGMAN_THROW_A_FMT(VMA->ResourceNextVMA != VMA, "VMA tracking error"); LOGMAN_THROW_A_FMT(VMA->ResourcePrevVMA != VMA, "VMA tracking error"); @@ -21,7 +21,7 @@ inline void SyscallHandler::VMATracking::ListCheckVMALinks(VMAEntry *VMA) { // Removes a VMA from corresponding MappedResource list // Returns true if list is empty -bool SyscallHandler::VMATracking::ListRemove(VMAEntry *VMA) { +bool SyscallHandler::VMATracking::ListRemove(VMAEntry* VMA) { LOGMAN_THROW_A_FMT(VMA->Resource != nullptr, "VMA tracking error"); // if it has prev, make prev to next @@ -55,7 +55,7 @@ bool SyscallHandler::VMATracking::ListRemove(VMAEntry *VMA) { // Replaces a VMA in corresponding MappedResource list // Requires NewVMA->Resource, NewVMA->ResourcePrevVMA and NewVMA->ResourceNextVMA to be already setup -void SyscallHandler::VMATracking::ListReplace(VMAEntry *VMA, VMAEntry *NewVMA) { +void SyscallHandler::VMATracking::ListReplace(VMAEntry* VMA, VMAEntry* NewVMA) { LOGMAN_THROW_A_FMT(VMA->Resource != nullptr, "VMA tracking error"); LOGMAN_THROW_A_FMT(VMA->Resource == NewVMA->Resource, "VMA tracking error"); @@ -84,7 +84,7 @@ void SyscallHandler::VMATracking::ListReplace(VMAEntry *VMA, VMAEntry *NewVMA) { // Inserts a VMA in corresponding MappedResource list // Requires NewVMA->Resource, NewVMA->ResourcePrevVMA and NewVMA->ResourceNextVMA to be already setup -void SyscallHandler::VMATracking::ListInsertAfter(VMAEntry *AfterVMA, VMAEntry *NewVMA) { +void SyscallHandler::VMATracking::ListInsertAfter(VMAEntry* AfterVMA, VMAEntry* NewVMA) { LOGMAN_THROW_A_FMT(NewVMA->Resource != nullptr, "VMA tracking error"); LOGMAN_THROW_A_FMT(AfterVMA->Resource == NewVMA->Resource, "VMA tracking error"); @@ -105,7 +105,7 @@ void SyscallHandler::VMATracking::ListInsertAfter(VMAEntry *AfterVMA, VMAEntry * // Prepends a VMA // Requires NewVMA->Resource, NewVMA->ResourcePrevVMA and NewVMA->ResourceNextVMA to be already setup -void SyscallHandler::VMATracking::ListPrepend(MappedResource *Resource, VMAEntry *NewVMA) { +void SyscallHandler::VMATracking::ListPrepend(MappedResource* Resource, VMAEntry* NewVMA) { LOGMAN_THROW_A_FMT(Resource != nullptr, "VMA tracking error"); LOGMAN_THROW_A_FMT(NewVMA->Resource == Resource, "VMA tracking error"); @@ -142,13 +142,13 @@ SyscallHandler::VMATracking::VMACIterator SyscallHandler::VMATracking::LookupVMA } // Set or Replace mappings in a range with a new mapping -void SyscallHandler::VMATracking::SetUnsafe(FEXCore::Context::Context *CTX, MappedResource *MappedResource, uintptr_t Base, +void SyscallHandler::VMATracking::SetUnsafe(FEXCore::Context::Context* CTX, MappedResource* MappedResource, uintptr_t Base, uintptr_t Offset, uintptr_t Length, VMAFlags Flags, VMAProt Prot) { ClearUnsafe(CTX, Base, Length, MappedResource); auto [Iter, Inserted] = VMAs.emplace( - Base, VMAEntry{MappedResource, nullptr, MappedResource ? MappedResource->FirstVMA : nullptr, Base, Offset, Length, Flags, Prot}); - + Base, VMAEntry {MappedResource, nullptr, MappedResource ? MappedResource->FirstVMA : nullptr, Base, Offset, Length, Flags, Prot}); + LOGMAN_THROW_A_FMT(Inserted == true, "VMA Tracking corruption"); if (MappedResource) { @@ -157,10 +157,10 @@ void SyscallHandler::VMATracking::SetUnsafe(FEXCore::Context::Context *CTX, Mapp } } -// Remove mappings in a range, possibly splitting them if needed and +// Remove mappings in a range, possibly splitting them if needed and // freeing their associated MappedResource unless it is equal to PreservedMappedResource -void SyscallHandler::VMATracking::ClearUnsafe(FEXCore::Context::Context *CTX, uintptr_t Base, uintptr_t Length, - MappedResource *PreservedMappedResource) { +void SyscallHandler::VMATracking::ClearUnsafe(FEXCore::Context::Context* CTX, uintptr_t Base, uintptr_t Length, + MappedResource* PreservedMappedResource) { const auto Top = Base + Length; // find the first Mapping at or after the Range ends, or ::end() @@ -219,8 +219,8 @@ void SyscallHandler::VMATracking::ClearUnsafe(FEXCore::Context::Context *CTX, ui auto NewOffset = OffsetDiff + Top; auto NewLength = MapTop - Top; - auto [Iter, Inserted] = VMAs.emplace( - Top, VMAEntry{Current->Resource, ReplaceAndErase ? Current->ResourcePrevVMA : Current, Current->ResourceNextVMA, Top, NewOffset, NewLength, Current->Flags, Current->Prot}); + auto [Iter, Inserted] = VMAs.emplace(Top, VMAEntry {Current->Resource, ReplaceAndErase ? Current->ResourcePrevVMA : Current, + Current->ResourceNextVMA, Top, NewOffset, NewLength, Current->Flags, Current->Prot}); LOGMAN_THROW_A_FMT(Inserted == true, "VMA tracking error"); auto TrailingPart = &Iter->second; if (Current->Resource) { @@ -284,7 +284,7 @@ void SyscallHandler::VMATracking::ChangeUnsafe(uintptr_t Base, uintptr_t Length, auto NewLength = Top - Base; auto [Iter, Inserted] = - VMAs.emplace(Base, VMAEntry{Current->Resource, Current, Current->ResourceNextVMA, Base, NewOffset, NewLength, MapFlags, NewProt}); + VMAs.emplace(Base, VMAEntry {Current->Resource, Current, Current->ResourceNextVMA, Base, NewOffset, NewLength, MapFlags, NewProt}); LOGMAN_THROW_A_FMT(Inserted == true, "VMA tracking error"); auto RestOfMapping = &Iter->second; @@ -311,7 +311,7 @@ void SyscallHandler::VMATracking::ChangeUnsafe(uintptr_t Base, uintptr_t Length, auto NewLength = MapTop - Top; auto [Iter, Inserted] = - VMAs.emplace(Top, VMAEntry{Current->Resource, Current, Current->ResourceNextVMA, Top, NewOffset, NewLength, MapFlags, MapProt}); + VMAs.emplace(Top, VMAEntry {Current->Resource, Current, Current->ResourceNextVMA, Top, NewOffset, NewLength, MapFlags, MapProt}); LOGMAN_THROW_A_FMT(Inserted == true, "VMA tracking error"); auto TrailingMapping = &Iter->second; @@ -324,7 +324,7 @@ void SyscallHandler::VMATracking::ChangeUnsafe(uintptr_t Base, uintptr_t Length, } // This matches the peculiarities algorithm used in linux ksys_shmdt (linux kernel 5.16, ipc/shm.c) -uintptr_t SyscallHandler::VMATracking::ClearShmUnsafe(FEXCore::Context::Context *CTX, uintptr_t Base) { +uintptr_t SyscallHandler::VMATracking::ClearShmUnsafe(FEXCore::Context::Context* CTX, uintptr_t Base) { // Find first VMA at or after Base // Iterate until first SHM VMA, with matching offset, get length @@ -337,7 +337,7 @@ uintptr_t SyscallHandler::VMATracking::ClearShmUnsafe(FEXCore::Context::Context LOGMAN_THROW_A_FMT(Entry->second.Base >= Base, "VMA tracking corruption"); if (Entry->second.Base - Base == Entry->second.Offset && Entry->second.Resource && Entry->second.Resource->Iterator->first.dev == SpecialDev::SHM) { - break; + break; } } @@ -364,4 +364,4 @@ uintptr_t SyscallHandler::VMATracking::ClearShmUnsafe(FEXCore::Context::Context return ShmLength; } -} +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.cpp index e6ae03a3b8..4297152e49 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.cpp @@ -6,241 +6,235 @@ #include namespace FEX::HLE { - FEXCore::Core::InternalThreadState *ThreadManager::CreateThread(uint64_t InitialRIP, uint64_t StackPointer, FEXCore::Core::CPUState *NewThreadState, uint64_t ParentTID) { - auto Thread = CTX->CreateThread(InitialRIP, StackPointer, NewThreadState, ParentTID); +FEXCore::Core::InternalThreadState* +ThreadManager::CreateThread(uint64_t InitialRIP, uint64_t StackPointer, FEXCore::Core::CPUState* NewThreadState, uint64_t ParentTID) { + auto Thread = CTX->CreateThread(InitialRIP, StackPointer, NewThreadState, ParentTID); - ++IdleWaitRefCount; - return Thread; + ++IdleWaitRefCount; + return Thread; +} + +void ThreadManager::DestroyThread(FEXCore::Core::InternalThreadState* Thread) { + { + std::lock_guard lk(ThreadCreationMutex); + auto It = std::find(Threads.begin(), Threads.end(), Thread); + LOGMAN_THROW_A_FMT(It != Threads.end(), "Thread wasn't in Threads"); + Threads.erase(It); } - void ThreadManager::DestroyThread(FEXCore::Core::InternalThreadState *Thread) { - { - std::lock_guard lk(ThreadCreationMutex); - auto It = std::find(Threads.begin(), Threads.end(), Thread); - LOGMAN_THROW_A_FMT(It != Threads.end(), "Thread wasn't in Threads"); - Threads.erase(It); - } + HandleThreadDeletion(Thread); +} - HandleThreadDeletion(Thread); +void ThreadManager::StopThread(FEXCore::Core::InternalThreadState* Thread) { + if (Thread->RunningEvents.Running.exchange(false)) { + SignalDelegation->SignalThread(Thread, FEXCore::Core::SignalEvent::Stop); } +} - void ThreadManager::StopThread(FEXCore::Core::InternalThreadState *Thread) { - if (Thread->RunningEvents.Running.exchange(false)) { - SignalDelegation->SignalThread(Thread, FEXCore::Core::SignalEvent::Stop); +void ThreadManager::RunThread(FEXCore::Core::InternalThreadState* Thread) { + // Tell the thread to start executing + Thread->StartRunning.NotifyAll(); +} + +void ThreadManager::HandleThreadDeletion(FEXCore::Core::InternalThreadState* Thread) { + if (Thread->ExecutionThread) { + if (Thread->ExecutionThread->joinable()) { + Thread->ExecutionThread->join(nullptr); } - } - void ThreadManager::RunThread(FEXCore::Core::InternalThreadState *Thread) { - // Tell the thread to start executing - Thread->StartRunning.NotifyAll(); + if (Thread->ExecutionThread->IsSelf()) { + Thread->ExecutionThread->detach(); + } } - void ThreadManager::HandleThreadDeletion(FEXCore::Core::InternalThreadState *Thread) { - if (Thread->ExecutionThread) { - if (Thread->ExecutionThread->joinable()) { - Thread->ExecutionThread->join(nullptr); - } + CTX->DestroyThread(Thread); + --IdleWaitRefCount; + IdleWaitCV.notify_all(); +} - if (Thread->ExecutionThread->IsSelf()) { - Thread->ExecutionThread->detach(); - } - } +void ThreadManager::NotifyPause() { + // Tell all the threads that they should pause + std::lock_guard lk(ThreadCreationMutex); + for (auto& Thread : Threads) { + SignalDelegation->SignalThread(Thread, FEXCore::Core::SignalEvent::Pause); + } +} + +void ThreadManager::Pause() { + NotifyPause(); + WaitForIdle(); +} - CTX->DestroyThread(Thread); - --IdleWaitRefCount; - IdleWaitCV.notify_all(); +void ThreadManager::Run() { + // Spin up all the threads + std::lock_guard lk(ThreadCreationMutex); + for (auto& Thread : Threads) { + Thread->SignalReason.store(FEXCore::Core::SignalEvent::Return); } - void ThreadManager::NotifyPause() { - // Tell all the threads that they should pause - std::lock_guard lk(ThreadCreationMutex); - for (auto &Thread : Threads) { - SignalDelegation->SignalThread(Thread, FEXCore::Core::SignalEvent::Pause); - } + for (auto& Thread : Threads) { + Thread->StartRunning.NotifyAll(); } +} + +void ThreadManager::WaitForIdleWithTimeout() { + std::unique_lock lk(IdleWaitMutex); + bool WaitResult = IdleWaitCV.wait_for(lk, std::chrono::milliseconds(1500), [this] { return IdleWaitRefCount.load() == 0; }); - void ThreadManager::Pause() { + if (!WaitResult) { + // The wait failed, this will occur if we stepped in to a syscall + // That's okay, we just need to pause the threads manually NotifyPause(); - WaitForIdle(); } - void ThreadManager::Run() { - // Spin up all the threads - std::lock_guard lk(ThreadCreationMutex); - for (auto &Thread : Threads) { - Thread->SignalReason.store(FEXCore::Core::SignalEvent::Return); - } + // We have sent every thread a pause signal + // Now wait again because they /will/ be going to sleep + WaitForIdle(); +} - for (auto &Thread : Threads) { - Thread->StartRunning.NotifyAll(); - } +void ThreadManager::WaitForThreadsToRun() { + size_t NumThreads {}; + { + std::lock_guard lk(ThreadCreationMutex); + NumThreads = Threads.size(); } - void ThreadManager::WaitForIdleWithTimeout() { - std::unique_lock lk(IdleWaitMutex); - bool WaitResult = IdleWaitCV.wait_for(lk, std::chrono::milliseconds(1500), - [this] { - return IdleWaitRefCount.load() == 0; - }); - - if (!WaitResult) { - // The wait failed, this will occur if we stepped in to a syscall - // That's okay, we just need to pause the threads manually - NotifyPause(); - } + // Spin while waiting for the threads to start up + std::unique_lock lk(IdleWaitMutex); + IdleWaitCV.wait(lk, [this, NumThreads] { return IdleWaitRefCount.load() >= NumThreads; }); - // We have sent every thread a pause signal - // Now wait again because they /will/ be going to sleep - WaitForIdle(); - } + Running = true; +} - void ThreadManager::WaitForThreadsToRun() { - size_t NumThreads{}; - { - std::lock_guard lk(ThreadCreationMutex); - NumThreads = Threads.size(); +void ThreadManager::Step() { + LogMan::Msg::AFmt("ThreadManager::Step currently not implemented"); + { + std::lock_guard lk(ThreadCreationMutex); + // Walk the threads and tell them to clear their caches + // Useful when our block size is set to a large number and we need to step a single instruction + for (auto& Thread : Threads) { + CTX->ClearCodeCache(Thread); } + } - // Spin while waiting for the threads to start up - std::unique_lock lk(IdleWaitMutex); - IdleWaitCV.wait(lk, [this, NumThreads] { - return IdleWaitRefCount.load() >= NumThreads; - }); + // TODO: Set to single step mode. + Run(); + WaitForThreadsToRun(); + WaitForIdle(); + // TODO: Set back to full running mode. +} - Running = true; - } +void ThreadManager::Stop(bool IgnoreCurrentThread) { + pid_t tid = FHU::Syscalls::gettid(); + FEXCore::Core::InternalThreadState* CurrentThread {}; - void ThreadManager::Step() { - LogMan::Msg::AFmt("ThreadManager::Step currently not implemented"); - { - std::lock_guard lk(ThreadCreationMutex); - // Walk the threads and tell them to clear their caches - // Useful when our block size is set to a large number and we need to step a single instruction - for (auto &Thread : Threads) { - CTX->ClearCodeCache(Thread); + // Tell all the threads that they should stop + { + std::lock_guard lk(ThreadCreationMutex); + for (auto& Thread : Threads) { + if (IgnoreCurrentThread && Thread->ThreadManager.TID == tid) { + // If we are callign stop from the current thread then we can ignore sending signals to this thread + // This means that this thread is already gone + } else if (Thread->ThreadManager.TID == tid) { + // We need to save the current thread for last to ensure all threads receive their stop signals + CurrentThread = Thread; + continue; } - } - // TODO: Set to single step mode. - Run(); - WaitForThreadsToRun(); - WaitForIdle(); - // TODO: Set back to full running mode. - } + if (Thread->RunningEvents.Running.load()) { + StopThread(Thread); + } - void ThreadManager::Stop(bool IgnoreCurrentThread) { - pid_t tid = FHU::Syscalls::gettid(); - FEXCore::Core::InternalThreadState* CurrentThread{}; - - // Tell all the threads that they should stop - { - std::lock_guard lk(ThreadCreationMutex); - for (auto &Thread : Threads) { - if (IgnoreCurrentThread && - Thread->ThreadManager.TID == tid) { - // If we are callign stop from the current thread then we can ignore sending signals to this thread - // This means that this thread is already gone - } - else if (Thread->ThreadManager.TID == tid) { - // We need to save the current thread for last to ensure all threads receive their stop signals - CurrentThread = Thread; - continue; - } - - if (Thread->RunningEvents.Running.load()) { - StopThread(Thread); - } - - // If the thread is waiting to start but immediately killed then there can be a hang - // This occurs in the case of gdb attach with immediate kill - if (Thread->RunningEvents.WaitingToStart.load()) { - Thread->RunningEvents.EarlyExit = true; - Thread->StartRunning.NotifyAll(); - } + // If the thread is waiting to start but immediately killed then there can be a hang + // This occurs in the case of gdb attach with immediate kill + if (Thread->RunningEvents.WaitingToStart.load()) { + Thread->RunningEvents.EarlyExit = true; + Thread->StartRunning.NotifyAll(); } } + } - // Stop the current thread now if we aren't ignoring it - if (CurrentThread) { - StopThread(CurrentThread); - } + // Stop the current thread now if we aren't ignoring it + if (CurrentThread) { + StopThread(CurrentThread); } +} - void ThreadManager::SleepThread(FEXCore::Context::Context *CTX, FEXCore::Core::CpuStateFrame *Frame) { - auto Thread = Frame->Thread; +void ThreadManager::SleepThread(FEXCore::Context::Context* CTX, FEXCore::Core::CpuStateFrame* Frame) { + auto Thread = Frame->Thread; - --IdleWaitRefCount; - IdleWaitCV.notify_all(); + --IdleWaitRefCount; + IdleWaitCV.notify_all(); - Thread->RunningEvents.ThreadSleeping = true; + Thread->RunningEvents.ThreadSleeping = true; - // Go to sleep - Thread->StartRunning.Wait(); + // Go to sleep + Thread->StartRunning.Wait(); - Thread->RunningEvents.Running = true; - ++IdleWaitRefCount; - Thread->RunningEvents.ThreadSleeping = false; + Thread->RunningEvents.Running = true; + ++IdleWaitRefCount; + Thread->RunningEvents.ThreadSleeping = false; - IdleWaitCV.notify_all(); - } + IdleWaitCV.notify_all(); +} - void ThreadManager::UnlockAfterFork(FEXCore::Core::InternalThreadState *LiveThread, bool Child) { - if (!Child) return; +void ThreadManager::UnlockAfterFork(FEXCore::Core::InternalThreadState* LiveThread, bool Child) { + if (!Child) { + return; + } - // This function is called after fork - // We need to cleanup some of the thread data that is dead - for (auto &DeadThread : Threads) { - if (DeadThread == LiveThread) { - continue; - } + // This function is called after fork + // We need to cleanup some of the thread data that is dead + for (auto& DeadThread : Threads) { + if (DeadThread == LiveThread) { + continue; + } - // Setting running to false ensures that when they are shutdown we won't send signals to kill them - DeadThread->RunningEvents.Running = false; + // Setting running to false ensures that when they are shutdown we won't send signals to kill them + DeadThread->RunningEvents.Running = false; - // Despite what google searches may susgest, glibc actually has special code to handle forks - // with multiple active threads. - // It cleans up the stacks of dead threads and marks them as terminated. - // It also cleans up a bunch of internal mutexes. + // Despite what google searches may susgest, glibc actually has special code to handle forks + // with multiple active threads. + // It cleans up the stacks of dead threads and marks them as terminated. + // It also cleans up a bunch of internal mutexes. - // FIXME: TLS is probally still alive. Investigate + // FIXME: TLS is probally still alive. Investigate - // Deconstructing the Interneal thread state should clean up most of the state. - // But if anything on the now deleted stack is holding a refrence to the heap, it will be leaked - CTX->DestroyThread(DeadThread); + // Deconstructing the Interneal thread state should clean up most of the state. + // But if anything on the now deleted stack is holding a refrence to the heap, it will be leaked + CTX->DestroyThread(DeadThread); - // FIXME: Make sure sure nothing gets leaked via the heap. Ideas: - // * Make sure nothing is allocated on the heap without ref in InternalThreadState - // * Surround any code that heap allocates with a per-thread mutex. - // Before forking, the the forking thread can lock all thread mutexes. - } + // FIXME: Make sure sure nothing gets leaked via the heap. Ideas: + // * Make sure nothing is allocated on the heap without ref in InternalThreadState + // * Surround any code that heap allocates with a per-thread mutex. + // Before forking, the the forking thread can lock all thread mutexes. + } - // Remove all threads but the live thread from Threads - Threads.clear(); - Threads.push_back(LiveThread); + // Remove all threads but the live thread from Threads + Threads.clear(); + Threads.push_back(LiveThread); - // Clean up dead stacks - FEXCore::Threads::Thread::CleanupAfterFork(); + // Clean up dead stacks + FEXCore::Threads::Thread::CleanupAfterFork(); - // We now only have one thread. - IdleWaitRefCount = 1; - ThreadCreationMutex.StealAndDropActiveLocks(); - } + // We now only have one thread. + IdleWaitRefCount = 1; + ThreadCreationMutex.StealAndDropActiveLocks(); +} - void ThreadManager::WaitForIdle() { - std::unique_lock lk(IdleWaitMutex); - IdleWaitCV.wait(lk, [this] { - return IdleWaitRefCount.load() == 0; - }); +void ThreadManager::WaitForIdle() { + std::unique_lock lk(IdleWaitMutex); + IdleWaitCV.wait(lk, [this] { return IdleWaitRefCount.load() == 0; }); - Running = false; - } + Running = false; +} - ThreadManager::~ThreadManager() { - std::lock_guard lk(ThreadCreationMutex); +ThreadManager::~ThreadManager() { + std::lock_guard lk(ThreadCreationMutex); - for (auto &Thread : Threads) { - HandleThreadDeletion(Thread); - } - Threads.clear(); + for (auto& Thread : Threads) { + HandleThreadDeletion(Thread); } + Threads.clear(); } +} // namespace FEX::HLE diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Utils/Threads.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Utils/Threads.cpp index 2574a30a9f..a803530ed7 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Utils/Threads.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Utils/Threads.cpp @@ -7,200 +7,196 @@ #include namespace FEX::LinuxEmulation::Threads { - // Stack pool handling - struct StackPoolItem { - void *Ptr; - size_t Size; - }; - std::mutex DeadStackPoolMutex{}; - std::mutex LiveStackPoolMutex{}; - - static fextl::deque DeadStackPool{}; - static fextl::deque LiveStackPool{}; - - void *AllocateStackObject() { - std::lock_guard lk{DeadStackPoolMutex}; - if (DeadStackPool.size() == 0) { - // Nothing in the pool, just allocate - return FEXCore::Allocator::mmap(nullptr, FEX::LinuxEmulation::Threads::STACK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - } +// Stack pool handling +struct StackPoolItem { + void* Ptr; + size_t Size; +}; +std::mutex DeadStackPoolMutex {}; +std::mutex LiveStackPoolMutex {}; + +static fextl::deque DeadStackPool {}; +static fextl::deque LiveStackPool {}; + +void* AllocateStackObject() { + std::lock_guard lk {DeadStackPoolMutex}; + if (DeadStackPool.size() == 0) { + // Nothing in the pool, just allocate + return FEXCore::Allocator::mmap(nullptr, FEX::LinuxEmulation::Threads::STACK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + } - // Keep the first item in the stack pool - auto Result = DeadStackPool.front().Ptr; - DeadStackPool.pop_front(); + // Keep the first item in the stack pool + auto Result = DeadStackPool.front().Ptr; + DeadStackPool.pop_front(); - // Erase the rest as a garbage collection step - for (auto &Item : DeadStackPool) { - FEXCore::Allocator::munmap(Item.Ptr, Item.Size); - } - return Result; + // Erase the rest as a garbage collection step + for (auto& Item : DeadStackPool) { + FEXCore::Allocator::munmap(Item.Ptr, Item.Size); } + return Result; +} - void AddStackToDeadPool(void *Ptr) { - std::lock_guard lk{DeadStackPoolMutex}; - DeadStackPool.emplace_back(StackPoolItem{Ptr, FEX::LinuxEmulation::Threads::STACK_SIZE}); - } +void AddStackToDeadPool(void* Ptr) { + std::lock_guard lk {DeadStackPoolMutex}; + DeadStackPool.emplace_back(StackPoolItem {Ptr, FEX::LinuxEmulation::Threads::STACK_SIZE}); +} - void AddStackToLivePool(void *Ptr) { - std::lock_guard lk{LiveStackPoolMutex}; - LiveStackPool.emplace_back(StackPoolItem{Ptr, FEX::LinuxEmulation::Threads::STACK_SIZE}); - } +void AddStackToLivePool(void* Ptr) { + std::lock_guard lk {LiveStackPoolMutex}; + LiveStackPool.emplace_back(StackPoolItem {Ptr, FEX::LinuxEmulation::Threads::STACK_SIZE}); +} - void RemoveStackFromLivePool(void *Ptr) { - std::lock_guard lk{LiveStackPoolMutex}; - for (auto it = LiveStackPool.begin(); it != LiveStackPool.end(); ++it) { - if (it->Ptr == Ptr) { - LiveStackPool.erase(it); - return; - } +void RemoveStackFromLivePool(void* Ptr) { + std::lock_guard lk {LiveStackPoolMutex}; + for (auto it = LiveStackPool.begin(); it != LiveStackPool.end(); ++it) { + if (it->Ptr == Ptr) { + LiveStackPool.erase(it); + return; } } +} - void DeallocateStackObject(void *Ptr) { - RemoveStackFromLivePool(Ptr); - AddStackToDeadPool(Ptr); - } +void DeallocateStackObject(void* Ptr) { + RemoveStackFromLivePool(Ptr); + AddStackToDeadPool(Ptr); +} - namespace PThreads { - void *InitializeThread(void *Ptr); - - class PThread final : public FEXCore::Threads::Thread { - public: - PThread(FEXCore::Threads::ThreadFunc Func, void *Arg) - : UserFunc {Func} - , UserArg {Arg} { - pthread_attr_t Attr{}; - Stack = AllocateStackObject(); - // pthreads allocates its dtv region behind our back and there is nothing we can do about it. - FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator glibc; - AddStackToLivePool(Stack); - pthread_attr_init(&Attr); - pthread_attr_setstack(&Attr, Stack, FEX::LinuxEmulation::Threads::STACK_SIZE); - // TODO: Thread creation should be using this instead. - // Causes Steam to crash early though. - // pthread_create(&Thread, &Attr, InitializeThread, this); - pthread_create(&Thread, &Attr, Func, Arg); - - pthread_attr_destroy(&Attr); - } +namespace PThreads { + void* InitializeThread(void* Ptr); + + class PThread final : public FEXCore::Threads::Thread { + public: + PThread(FEXCore::Threads::ThreadFunc Func, void* Arg) + : UserFunc {Func} + , UserArg {Arg} { + pthread_attr_t Attr {}; + Stack = AllocateStackObject(); + // pthreads allocates its dtv region behind our back and there is nothing we can do about it. + FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator glibc; + AddStackToLivePool(Stack); + pthread_attr_init(&Attr); + pthread_attr_setstack(&Attr, Stack, FEX::LinuxEmulation::Threads::STACK_SIZE); + // TODO: Thread creation should be using this instead. + // Causes Steam to crash early though. + // pthread_create(&Thread, &Attr, InitializeThread, this); + pthread_create(&Thread, &Attr, Func, Arg); + + pthread_attr_destroy(&Attr); + } - bool joinable() override { - pthread_attr_t Attr{}; - if (pthread_getattr_np(Thread, &Attr) == 0) { - int AttachState{}; - if (pthread_attr_getdetachstate(&Attr, &AttachState) == 0) { - if (AttachState == PTHREAD_CREATE_JOINABLE) { - return true; - } + bool joinable() override { + pthread_attr_t Attr {}; + if (pthread_getattr_np(Thread, &Attr) == 0) { + int AttachState {}; + if (pthread_attr_getdetachstate(&Attr, &AttachState) == 0) { + if (AttachState == PTHREAD_CREATE_JOINABLE) { + return true; } } - return false; } + return false; + } - bool join(void **ret) override { - return pthread_join(Thread, ret) == 0; - } + bool join(void** ret) override { + return pthread_join(Thread, ret) == 0; + } - bool detach() override { - return pthread_detach(Thread) == 0; - } + bool detach() override { + return pthread_detach(Thread) == 0; + } - bool IsSelf() override { - auto self = pthread_self(); - return self == Thread; - } + bool IsSelf() override { + auto self = pthread_self(); + return self == Thread; + } - void *Execute() { - return UserFunc(UserArg); - } + void* Execute() { + return UserFunc(UserArg); + } - void FreeStack() { - DeallocateStackObject(Stack); - } + void FreeStack() { + DeallocateStackObject(Stack); + } - private: - pthread_t Thread; - FEXCore::Threads::ThreadFunc UserFunc; - void *UserArg; - void *Stack{}; - }; + private: + pthread_t Thread; + FEXCore::Threads::ThreadFunc UserFunc; + void* UserArg; + void* Stack {}; + }; - void *InitializeThread(void *Ptr) { - PThread *Thread{reinterpret_cast(Ptr)}; + void* InitializeThread(void* Ptr) { + PThread* Thread {reinterpret_cast(Ptr)}; - // Run the user function - void *Result = Thread->Execute(); + // Run the user function + void* Result = Thread->Execute(); - // Put the stack back in to the stack pool - Thread->FreeStack(); + // Put the stack back in to the stack pool + Thread->FreeStack(); - // TLS/DTV teardown is something FEX can't control. Disable glibc checking when we leave a pthread. - FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator::HardDisable(); + // TLS/DTV teardown is something FEX can't control. Disable glibc checking when we leave a pthread. + FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator::HardDisable(); - return Result; - } + return Result; + } - fextl::unique_ptr CreateThread_PThread( - FEXCore::Threads::ThreadFunc Func, - void* Arg) { - return fextl::make_unique(Func, Arg); - } + fextl::unique_ptr CreateThread_PThread(FEXCore::Threads::ThreadFunc Func, void* Arg) { + return fextl::make_unique(Func, Arg); + } - void CleanupAfterFork_PThread() { - // We don't need to pull the mutex here - // After a fork we are the only thread running - // Just need to make sure not to delete our own stack - uintptr_t StackLocation = reinterpret_cast(alloca(0)); - - auto ClearStackPool = [&](auto &StackPool) { - for (auto it = StackPool.begin(); it != StackPool.end(); ) { - StackPoolItem &Item = *it; - uintptr_t ItemStack = reinterpret_cast(Item.Ptr); - if (ItemStack <= StackLocation && (ItemStack + Item.Size) > StackLocation) { - // This is our stack item, skip it - ++it; - } - else { - // Untracked stack. Clean it up - FEXCore::Allocator::munmap(Item.Ptr, Item.Size); - it = StackPool.erase(it); - } + void CleanupAfterFork_PThread() { + // We don't need to pull the mutex here + // After a fork we are the only thread running + // Just need to make sure not to delete our own stack + uintptr_t StackLocation = reinterpret_cast(alloca(0)); + + auto ClearStackPool = [&](auto& StackPool) { + for (auto it = StackPool.begin(); it != StackPool.end();) { + StackPoolItem& Item = *it; + uintptr_t ItemStack = reinterpret_cast(Item.Ptr); + if (ItemStack <= StackLocation && (ItemStack + Item.Size) > StackLocation) { + // This is our stack item, skip it + ++it; + } else { + // Untracked stack. Clean it up + FEXCore::Allocator::munmap(Item.Ptr, Item.Size); + it = StackPool.erase(it); } - }; + } + }; - // Clear both dead stacks and live stacks - ClearStackPool(DeadStackPool); - ClearStackPool(LiveStackPool); + // Clear both dead stacks and live stacks + ClearStackPool(DeadStackPool); + ClearStackPool(LiveStackPool); - LogMan::Throw::AFmt((DeadStackPool.size() + LiveStackPool.size()) <= 1, - "After fork we should only have zero or one tracked stacks!"); - } + LogMan::Throw::AFmt((DeadStackPool.size() + LiveStackPool.size()) <= 1, "After fork we should only have zero or one tracked stacks!"); + } +}; // namespace PThreads + +void SetupThreadHandlers() { + FEXCore::Threads::Pointers Ptrs = { + .CreateThread = PThreads::CreateThread_PThread, + .CleanupAfterFork = PThreads::CleanupAfterFork_PThread, }; - void SetupThreadHandlers() { - FEXCore::Threads::Pointers Ptrs = { - .CreateThread = PThreads::CreateThread_PThread, - .CleanupAfterFork = PThreads::CleanupAfterFork_PThread, - }; + FEXCore::Threads::Thread::SetInternalPointers(Ptrs); +} - FEXCore::Threads::Thread::SetInternalPointers(Ptrs); +void Shutdown() { + std::lock_guard lk {DeadStackPoolMutex}; + std::lock_guard lk2 {LiveStackPoolMutex}; + // Erase all the dead stack pools + for (auto& Item : DeadStackPool) { + FEXCore::Allocator::munmap(Item.Ptr, Item.Size); } - void Shutdown() { - std::lock_guard lk{DeadStackPoolMutex}; - std::lock_guard lk2{LiveStackPoolMutex}; - // Erase all the dead stack pools - for (auto &Item : DeadStackPool) { - FEXCore::Allocator::munmap(Item.Ptr, Item.Size); - } - - // Now clean up any that are considered to still be live - // We are in shutdown phase, everything in the process is dead - for (auto &Item : LiveStackPool) { - FEXCore::Allocator::munmap(Item.Ptr, Item.Size); - } - - DeadStackPool.clear(); - LiveStackPool.clear(); + // Now clean up any that are considered to still be live + // We are in shutdown phase, everything in the process is dead + for (auto& Item : LiveStackPool) { + FEXCore::Allocator::munmap(Item.Ptr, Item.Size); } + + DeadStackPool.clear(); + LiveStackPool.clear(); } +} // namespace FEX::LinuxEmulation::Threads diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/EPoll.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/EPoll.cpp index 569bff88a7..40ddc99e91 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/EPoll.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/EPoll.cpp @@ -25,39 +25,62 @@ ARG_TO_STR(FEX::HLE::x32::compat_ptr, "%lx") ARG_TO_STR(FEX::HLE::x32::compat_ptr, "%lx") namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } namespace FEX::HLE::x32 { - void RegisterEpoll(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X32(epoll_wait, [](FEXCore::Core::CpuStateFrame *Frame, int epfd, compat_ptr events, int maxevents, int timeout) -> uint64_t { - fextl::vector Events(std::max(0, maxevents)); - uint64_t Result = ::syscall(SYSCALL_DEF(epoll_pwait), epfd, Events.data(), maxevents, timeout, nullptr, 8); - - if (Result != -1) { - for (size_t i = 0; i < Result; ++i) { - events[i] = Events[i]; - } +void RegisterEpoll(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X32( + epoll_wait, + [](FEXCore::Core::CpuStateFrame* Frame, int epfd, compat_ptr events, int maxevents, int timeout) -> uint64_t { + fextl::vector Events(std::max(0, maxevents)); + uint64_t Result = ::syscall(SYSCALL_DEF(epoll_pwait), epfd, Events.data(), maxevents, timeout, nullptr, 8); + + if (Result != -1) { + for (size_t i = 0; i < Result; ++i) { + events[i] = Events[i]; } - SYSCALL_ERRNO(); - }); + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32( + epoll_ctl, [](FEXCore::Core::CpuStateFrame* Frame, int epfd, int op, int fd, compat_ptr event) -> uint64_t { + struct epoll_event Event = *event; + uint64_t Result = ::syscall(SYSCALL_DEF(epoll_ctl), epfd, op, fd, &Event); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(epoll_pwait, + [](FEXCore::Core::CpuStateFrame* Frame, int epfd, compat_ptr events, int maxevent, + int timeout, const uint64_t* sigmask, size_t sigsetsize) -> uint64_t { + fextl::vector Events(std::max(0, maxevent)); + + uint64_t Result = ::syscall(SYSCALL_DEF(epoll_pwait), epfd, Events.data(), maxevent, timeout, sigmask, sigsetsize); + + if (Result != -1) { + for (size_t i = 0; i < Result; ++i) { + events[i] = Events[i]; + } + } - REGISTER_SYSCALL_IMPL_X32(epoll_ctl, [](FEXCore::Core::CpuStateFrame *Frame, int epfd, int op, int fd, compat_ptr event) -> uint64_t { - struct epoll_event Event = *event; - uint64_t Result = ::syscall(SYSCALL_DEF(epoll_ctl), epfd, op, fd, &Event); - SYSCALL_ERRNO(); - }); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X32(epoll_pwait, [](FEXCore::Core::CpuStateFrame *Frame, int epfd, compat_ptr events, int maxevent, int timeout, const uint64_t* sigmask, size_t sigsetsize) -> uint64_t { + if (Handler->IsHostKernelVersionAtLeast(5, 11, 0)) { + REGISTER_SYSCALL_IMPL_X32(epoll_pwait2, + [](FEXCore::Core::CpuStateFrame* Frame, int epfd, compat_ptr events, + int maxevent, compat_ptr timeout, const uint64_t* sigmask, size_t sigsetsize) -> uint64_t { fextl::vector Events(std::max(0, maxevent)); - uint64_t Result = ::syscall(SYSCALL_DEF(epoll_pwait), - epfd, - Events.data(), - maxevent, - timeout, - sigmask, - sigsetsize); + struct timespec tp64 {}; + struct timespec* timed_ptr {}; + if (timeout) { + tp64 = *timeout; + timed_ptr = &tp64; + } + + uint64_t Result = ::syscall(SYSCALL_DEF(epoll_pwait2), epfd, Events.data(), maxevent, timed_ptr, sigmask, sigsetsize); if (Result != -1) { for (size_t i = 0; i < Result; ++i) { @@ -67,38 +90,8 @@ namespace FEX::HLE::x32 { SYSCALL_ERRNO(); }); - - if (Handler->IsHostKernelVersionAtLeast(5, 11, 0)) { - REGISTER_SYSCALL_IMPL_X32(epoll_pwait2, [](FEXCore::Core::CpuStateFrame *Frame, int epfd, compat_ptr events, int maxevent, compat_ptr timeout, const uint64_t* sigmask, size_t sigsetsize) -> uint64_t { - fextl::vector Events(std::max(0, maxevent)); - - struct timespec tp64{}; - struct timespec *timed_ptr{}; - if (timeout) { - tp64 = *timeout; - timed_ptr = &tp64; - } - - uint64_t Result = ::syscall(SYSCALL_DEF(epoll_pwait2), - epfd, - Events.data(), - maxevent, - timed_ptr, - sigmask, - sigsetsize); - - if (Result != -1) { - for (size_t i = 0; i < Result; ++i) { - events[i] = Events[i]; - } - } - - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL_X32(epoll_pwait2, UnimplementedSyscallSafe); - } - + } else { + REGISTER_SYSCALL_IMPL_X32(epoll_pwait2, UnimplementedSyscallSafe); } } +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/FD.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/FD.cpp index 640e6aa788..ef2220a584 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/FD.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/FD.cpp @@ -42,125 +42,555 @@ tags: LinuxSyscalls|syscalls-x86-32 ARG_TO_STR(FEX::HLE::x32::compat_ptr, "%lx") namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } namespace FEX::HLE::x32 { - // Used to ensure no bogus values are passed into readv/writev family syscalls. - // This is mainly to sanitize vector sizing. It's fine for the bogus value - // itself to pass into the syscall, since the kernel will handle it. - static constexpr int SanitizeIOCount(int count) { - return std::max(0, count); - } +// Used to ensure no bogus values are passed into readv/writev family syscalls. +// This is mainly to sanitize vector sizing. It's fine for the bogus value +// itself to pass into the syscall, since the kernel will handle it. +static constexpr int SanitizeIOCount(int count) { + return std::max(0, count); +} #ifdef _M_X86_64 - uint32_t ioctl_32(FEXCore::Core::CpuStateFrame*, int fd, uint32_t cmd, uint32_t args) { - uint32_t Result{}; - __asm volatile("int $0x80;" - : "=a" (Result) - : "a" (SYSCALL_x86_ioctl) - , "b" (fd) - , "c" (cmd) - , "d" (args) - : "memory"); - return Result; - } +uint32_t ioctl_32(FEXCore::Core::CpuStateFrame*, int fd, uint32_t cmd, uint32_t args) { + uint32_t Result {}; + __asm volatile("int $0x80;" : "=a"(Result) : "a"(SYSCALL_x86_ioctl), "b"(fd), "c"(cmd), "d"(args) : "memory"); + return Result; +} #endif - auto fcntlHandler = [](FEXCore::Core::CpuStateFrame *Frame, int fd, int cmd, uint64_t arg) -> uint64_t { - // fcntl64 struct directly matches the 64bit fcntl op - // cmd just needs to be fixed up - // These are redefined to be their non-64bit tagged value on x86-64 - constexpr int OP_GETLK64_32 = 12; - constexpr int OP_SETLK64_32 = 13; - constexpr int OP_SETLKW64_32 = 14; - - void *lock_arg = (void*)arg; - struct flock tmp{}; - int old_cmd = cmd; +auto fcntlHandler = [](FEXCore::Core::CpuStateFrame* Frame, int fd, int cmd, uint64_t arg) -> uint64_t { + // fcntl64 struct directly matches the 64bit fcntl op + // cmd just needs to be fixed up + // These are redefined to be their non-64bit tagged value on x86-64 + constexpr int OP_GETLK64_32 = 12; + constexpr int OP_SETLK64_32 = 13; + constexpr int OP_SETLKW64_32 = 14; + + void* lock_arg = (void*)arg; + struct flock tmp {}; + int old_cmd = cmd; + + switch (old_cmd) { + case OP_GETLK64_32: { + cmd = F_GETLK; + lock_arg = (void*)&tmp; + tmp = *reinterpret_cast(arg); + break; + } + case OP_SETLK64_32: { + cmd = F_SETLK; + lock_arg = (void*)&tmp; + tmp = *reinterpret_cast(arg); + break; + } + case OP_SETLKW64_32: { + cmd = F_SETLKW; + lock_arg = (void*)&tmp; + tmp = *reinterpret_cast(arg); + break; + } + case F_OFD_SETLK: + case F_OFD_GETLK: + case F_OFD_SETLKW: { + lock_arg = (void*)&tmp; + tmp = *reinterpret_cast(arg); + break; + } + case F_GETLK: + case F_SETLK: + case F_SETLKW: { + lock_arg = (void*)&tmp; + tmp = *reinterpret_cast(arg); + break; + } + + case F_SETFL: lock_arg = reinterpret_cast(FEX::HLE::RemapFromX86Flags(arg)); break; + // Maps directly + case F_DUPFD: + case F_DUPFD_CLOEXEC: + case F_GETFD: + case F_SETFD: + case F_GETFL: break; + default: LOGMAN_MSG_A_FMT("Unhandled fcntl64: 0x{:x}", cmd); break; + } + + uint64_t Result = ::fcntl(fd, cmd, lock_arg); + + if (Result != -1) { switch (old_cmd) { - case OP_GETLK64_32: { - cmd = F_GETLK; - lock_arg = (void*)&tmp; - tmp = *reinterpret_cast(arg); - break; + case OP_GETLK64_32: { + *reinterpret_cast(arg) = tmp; + break; + } + case F_OFD_GETLK: { + *reinterpret_cast(arg) = tmp; + break; + } + case F_GETLK: { + *reinterpret_cast(arg) = tmp; + break; + } break; + case F_DUPFD: + case F_DUPFD_CLOEXEC: FEX::HLE::x32::CheckAndAddFDDuplication(fd, Result); break; + case F_GETFL: { + Result = FEX::HLE::RemapToX86Flags(Result); + break; + } + default: break; + } + } + SYSCALL_ERRNO(); +}; + +auto selectHandler = [](FEXCore::Core::CpuStateFrame* Frame, int nfds, fd_set32* readfds, fd_set32* writefds, fd_set32* exceptfds, + struct timeval32* timeout) -> uint64_t { + struct timeval tp64 {}; + if (timeout) { + tp64 = *timeout; + } + + fd_set Host_readfds; + fd_set Host_writefds; + fd_set Host_exceptfds; + FD_ZERO(&Host_readfds); + FD_ZERO(&Host_writefds); + FD_ZERO(&Host_exceptfds); + + // Round up to the full 32bit word + uint32_t NumWords = FEXCore::AlignUp(nfds, 32) / 4; + + if (readfds) { + for (int i = 0; i < NumWords; ++i) { + uint32_t FD = readfds[i]; + int32_t Rem = nfds - (i * 32); + for (int j = 0; j < 32 && j < Rem; ++j) { + if ((FD >> j) & 1) { + FD_SET(i * 32 + j, &Host_readfds); + } } - case OP_SETLK64_32: { - cmd = F_SETLK; - lock_arg = (void*)&tmp; - tmp = *reinterpret_cast(arg); - break; + } + } + + if (writefds) { + for (int i = 0; i < NumWords; ++i) { + uint32_t FD = writefds[i]; + int32_t Rem = nfds - (i * 32); + for (int j = 0; j < 32 && j < Rem; ++j) { + if ((FD >> j) & 1) { + FD_SET(i * 32 + j, &Host_writefds); + } } - case OP_SETLKW64_32: { - cmd = F_SETLKW; - lock_arg = (void*)&tmp; - tmp = *reinterpret_cast(arg); - break; + } + } + + if (exceptfds) { + for (int i = 0; i < NumWords; ++i) { + uint32_t FD = exceptfds[i]; + int32_t Rem = nfds - (i * 32); + for (int j = 0; j < 32 && j < Rem; ++j) { + if ((FD >> j) & 1) { + FD_SET(i * 32 + j, &Host_exceptfds); + } } - case F_OFD_SETLK: - case F_OFD_GETLK: - case F_OFD_SETLKW: { - lock_arg = (void*)&tmp; - tmp = *reinterpret_cast(arg); - break; + } + } + + uint64_t Result = ::select(nfds, readfds ? &Host_readfds : nullptr, writefds ? &Host_writefds : nullptr, + exceptfds ? &Host_exceptfds : nullptr, timeout ? &tp64 : nullptr); + if (readfds) { + for (int i = 0; i < nfds; ++i) { + if (FD_ISSET(i, &Host_readfds)) { + readfds[i / 32] |= 1 << (i & 31); + } else { + readfds[i / 32] &= ~(1 << (i & 31)); } - case F_GETLK: - case F_SETLK: - case F_SETLKW: { - lock_arg = (void*)&tmp; - tmp = *reinterpret_cast(arg); - break; + } + } + + if (writefds) { + for (int i = 0; i < nfds; ++i) { + if (FD_ISSET(i, &Host_writefds)) { + writefds[i / 32] |= 1 << (i & 31); + } else { + writefds[i / 32] &= ~(1 << (i & 31)); } + } + } - case F_SETFL: - lock_arg = reinterpret_cast(FEX::HLE::RemapFromX86Flags(arg)); - break; - // Maps directly - case F_DUPFD: - case F_DUPFD_CLOEXEC: - case F_GETFD: - case F_SETFD: - case F_GETFL: - break; + if (exceptfds) { + for (int i = 0; i < nfds; ++i) { + if (FD_ISSET(i, &Host_exceptfds)) { + exceptfds[i / 32] |= 1 << (i & 31); + } else { + exceptfds[i / 32] &= ~(1 << (i & 31)); + } + } + } + + if (timeout) { + *timeout = tp64; + } + SYSCALL_ERRNO(); +}; + +void RegisterFD(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X32_PASS(poll, [](FEXCore::Core::CpuStateFrame* Frame, struct pollfd* fds, nfds_t nfds, int timeout) -> uint64_t { + uint64_t Result = ::poll(fds, nfds, timeout); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(ppoll, + [](FEXCore::Core::CpuStateFrame* Frame, struct pollfd* fds, nfds_t nfds, timespec32* timeout_ts, + const uint64_t* sigmask, size_t sigsetsize) -> uint64_t { + // sigsetsize is unused here since it is currently a constant and not exposed through glibc + struct timespec tp64 {}; + struct timespec* timed_ptr {}; + if (timeout_ts) { + struct timespec32 timeout {}; + if (FaultSafeMemcpy::CopyFromUser(&timeout, timeout_ts, sizeof(timeout)) == EFAULT) { + return -EFAULT; + } + + tp64 = timeout; + timed_ptr = &tp64; + } + + uint64_t Result = ::syscall(SYSCALL_DEF(ppoll), fds, nfds, timed_ptr, sigmask, sigsetsize); - default: - LOGMAN_MSG_A_FMT("Unhandled fcntl64: 0x{:x}", cmd); - break; + if (timeout_ts) { + struct timespec32 timeout {}; + timeout = tp64; + + if (FaultSafeMemcpy::CopyToUser(timeout_ts, &timeout, sizeof(timeout)) == EFAULT) { + // Write to user memory failed, this can occur if the timeout is defined in read-only memory. + // This is okay to happen, kernel continues happily. + } } - uint64_t Result = ::fcntl(fd, cmd, lock_arg); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(ppoll_time64, ppoll, + [](FEXCore::Core::CpuStateFrame* Frame, struct pollfd* fds, nfds_t nfds, + struct timespec* timeout_ts, const uint64_t* sigmask, size_t sigsetsize) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(ppoll), fds, nfds, timeout_ts, sigmask, sigsetsize); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32( + _llseek, [](FEXCore::Core::CpuStateFrame* Frame, uint32_t fd, uint32_t offset_high, uint32_t offset_low, loff_t* result, uint32_t whence) -> uint64_t { + uint64_t Offset = offset_high; + Offset <<= 32; + Offset |= offset_low; + uint64_t Result = lseek(fd, Offset, whence); if (Result != -1) { - switch (old_cmd) { - case OP_GETLK64_32: { - *reinterpret_cast(arg) = tmp; - break; - } - case F_OFD_GETLK: { - *reinterpret_cast(arg) = tmp; - break; - } - case F_GETLK: { - *reinterpret_cast(arg) = tmp; - break; - } - break; - case F_DUPFD: - case F_DUPFD_CLOEXEC: - FEX::HLE::x32::CheckAndAddFDDuplication(fd, Result); - break; - case F_GETFL: { - Result = FEX::HLE::RemapToX86Flags(Result); - break; - } - default: break; + *result = Result; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(readv, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec32* iov, int iovcnt) -> uint64_t { + fextl::vector Host_iovec(iov, iov + SanitizeIOCount(iovcnt)); + uint64_t Result = ::readv(fd, Host_iovec.data(), iovcnt); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(writev, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec32* iov, int iovcnt) -> uint64_t { + fextl::vector Host_iovec(iov, iov + SanitizeIOCount(iovcnt)); + uint64_t Result = ::writev(fd, Host_iovec.data(), iovcnt); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL( + chown32, chown, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, uid_t owner, gid_t group) -> uint64_t { + uint64_t Result = ::chown(pathname, owner, group); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(fchown32, fchown, [](FEXCore::Core::CpuStateFrame* Frame, int fd, uid_t owner, gid_t group) -> uint64_t { + uint64_t Result = ::fchown(fd, owner, group); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL( + lchown32, lchown, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, uid_t owner, gid_t group) -> uint64_t { + uint64_t Result = ::lchown(pathname, owner, group); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(oldstat, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, oldstat32* buf) -> uint64_t { + struct stat host_stat; + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Stat(pathname, &host_stat); + if (Result != -1) { + if (host_stat.st_ino > std::numeric_limitsst_ino)>::max()) { + return -EOVERFLOW; + } + if (host_stat.st_nlink > std::numeric_limitsst_nlink)>::max()) { + return -EOVERFLOW; + } + + *buf = host_stat; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(oldfstat, [](FEXCore::Core::CpuStateFrame* Frame, int fd, oldstat32* buf) -> uint64_t { + struct stat host_stat; + uint64_t Result = ::fstat(fd, &host_stat); + if (Result != -1) { + if (host_stat.st_ino > std::numeric_limitsst_ino)>::max()) { + return -EOVERFLOW; + } + if (host_stat.st_nlink > std::numeric_limitsst_nlink)>::max()) { + return -EOVERFLOW; } + + *buf = host_stat; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(oldlstat, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, oldstat32* buf) -> uint64_t { + struct stat host_stat; + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Lstat(path, &host_stat); + if (Result != -1) { + if (host_stat.st_ino > std::numeric_limitsst_ino)>::max()) { + return -EOVERFLOW; + } + if (host_stat.st_nlink > std::numeric_limitsst_nlink)>::max()) { + return -EOVERFLOW; + } + + *buf = host_stat; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(stat, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, stat32* buf) -> uint64_t { + struct stat host_stat; + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Stat(pathname, &host_stat); + if (Result != -1) { + *buf = host_stat; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(fstat, [](FEXCore::Core::CpuStateFrame* Frame, int fd, stat32* buf) -> uint64_t { + struct stat host_stat; + uint64_t Result = ::fstat(fd, &host_stat); + if (Result != -1) { + *buf = host_stat; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(lstat, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, stat32* buf) -> uint64_t { + struct stat host_stat; + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Lstat(path, &host_stat); + if (Result != -1) { + *buf = host_stat; } SYSCALL_ERRNO(); - }; + }); - auto selectHandler = [](FEXCore::Core::CpuStateFrame *Frame, int nfds, fd_set32 *readfds, fd_set32 *writefds, fd_set32 *exceptfds, struct timeval32 *timeout) -> uint64_t { - struct timeval tp64{}; + REGISTER_SYSCALL_IMPL_X32(stat64, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, stat64_32* buf) -> uint64_t { + struct stat host_stat; + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Stat(pathname, &host_stat); + if (Result != -1) { + *buf = host_stat; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(lstat64, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, stat64_32* buf) -> uint64_t { + struct stat host_stat; + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Lstat(path, &host_stat); + + if (Result != -1) { + *buf = host_stat; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(fstat64, [](FEXCore::Core::CpuStateFrame* Frame, int fd, stat64_32* buf) -> uint64_t { + struct stat64 host_stat; + uint64_t Result = ::fstat64(fd, &host_stat); + if (Result != -1) { + *buf = host_stat; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(statfs, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, statfs32_32* buf) -> uint64_t { + struct statfs host_stat; + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Statfs(path, &host_stat); + if (Result != -1) { + *buf = host_stat; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(fstatfs, [](FEXCore::Core::CpuStateFrame* Frame, int fd, statfs32_32* buf) -> uint64_t { + struct statfs host_stat; + uint64_t Result = ::fstatfs(fd, &host_stat); + if (Result != -1) { + *buf = host_stat; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(fstatfs64, [](FEXCore::Core::CpuStateFrame* Frame, int fd, size_t sz, struct statfs64_32* buf) -> uint64_t { + LOGMAN_THROW_AA_FMT(sz == sizeof(struct statfs64_32), "This needs to match"); + + struct statfs64 host_stat; + uint64_t Result = ::fstatfs64(fd, &host_stat); + if (Result != -1) { + *buf = host_stat; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(statfs64, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, size_t sz, struct statfs64_32* buf) -> uint64_t { + LOGMAN_THROW_AA_FMT(sz == sizeof(struct statfs64_32), "This needs to match"); + + struct statfs host_stat; + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Statfs(path, &host_stat); + if (Result != -1) { + *buf = host_stat; + } + + SYSCALL_ERRNO(); + }); + + // x86 32-bit fcntl syscall has a historical quirk that it uses the same handler as fcntl64 + // This is in direct opposition to all other 32-bit architectures that use the compat_fcntl handler + // This quirk goes back to the start of the Linux 2.6.12-rc2 git history. Seeing history before + // that point to see when this quirk happened would be difficult + // + // For more reference, the compat_fcntl handler blocks a few commands: + // - F_GETLK64 + // - F_SETLK64 + // - F_SETLKW64 + // - F_OFD_GETLK + // - F_OFD_SETLK + // - F_OFD_SETLKW + + REGISTER_SYSCALL_IMPL_X32(fcntl, fcntlHandler); + REGISTER_SYSCALL_IMPL_X32(fcntl64, fcntlHandler); + + REGISTER_SYSCALL_IMPL_X32(dup, [](FEXCore::Core::CpuStateFrame* Frame, int oldfd) -> uint64_t { + uint64_t Result = ::dup(oldfd); + if (Result != -1) { + CheckAndAddFDDuplication(oldfd, Result); + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(dup2, [](FEXCore::Core::CpuStateFrame* Frame, int oldfd, int newfd) -> uint64_t { + uint64_t Result = ::dup2(oldfd, newfd); + if (Result != -1) { + CheckAndAddFDDuplication(oldfd, newfd); + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32( + preadv, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec32* iov, uint32_t iovcnt, uint32_t pos_low, uint32_t pos_high) -> uint64_t { + fextl::vector Host_iovec(iov, iov + SanitizeIOCount(iovcnt)); + + uint64_t Result = ::syscall(SYSCALL_DEF(preadv), fd, Host_iovec.data(), iovcnt, pos_low, pos_high); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32( + pwritev, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec32* iov, uint32_t iovcnt, uint32_t pos_low, uint32_t pos_high) -> uint64_t { + fextl::vector Host_iovec(iov, iov + SanitizeIOCount(iovcnt)); + + uint64_t Result = ::syscall(SYSCALL_DEF(pwritev), fd, Host_iovec.data(), iovcnt, pos_low, pos_high); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(process_vm_readv, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, const struct iovec32* local_iov, unsigned long liovcnt, + const struct iovec32* remote_iov, unsigned long riovcnt, unsigned long flags) -> uint64_t { + fextl::vector Host_local_iovec(local_iov, local_iov + SanitizeIOCount(liovcnt)); + fextl::vector Host_remote_iovec(remote_iov, remote_iov + SanitizeIOCount(riovcnt)); + + uint64_t Result = ::process_vm_readv(pid, Host_local_iovec.data(), liovcnt, Host_remote_iovec.data(), riovcnt, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(process_vm_writev, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, const struct iovec32* local_iov, unsigned long liovcnt, + const struct iovec32* remote_iov, unsigned long riovcnt, unsigned long flags) -> uint64_t { + fextl::vector Host_local_iovec(local_iov, local_iov + SanitizeIOCount(liovcnt)); + fextl::vector Host_remote_iovec(remote_iov, remote_iov + SanitizeIOCount(riovcnt)); + + uint64_t Result = ::process_vm_writev(pid, Host_local_iovec.data(), liovcnt, Host_remote_iovec.data(), riovcnt, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(preadv2, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec32* iov, uint32_t iovcnt, uint32_t pos_low, + uint32_t pos_high, int flags) -> uint64_t { + fextl::vector Host_iovec(iov, iov + SanitizeIOCount(iovcnt)); + + uint64_t Result = ::syscall(SYSCALL_DEF(preadv2), fd, Host_iovec.data(), iovcnt, pos_low, pos_high, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(pwritev2, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec32* iov, uint32_t iovcnt, uint32_t pos_low, + uint32_t pos_high, int flags) -> uint64_t { + fextl::vector Host_iovec(iov, iov + SanitizeIOCount(iovcnt)); + + uint64_t Result = ::syscall(SYSCALL_DEF(pwritev2), fd, Host_iovec.data(), iovcnt, pos_low, pos_high, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(fstatat_64, [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, stat64_32* buf, int flag) -> uint64_t { + struct stat64 host_stat; + uint64_t Result = FEX::HLE::_SyscallHandler->FM.NewFSStatAt64(dirfd, pathname, &host_stat, flag); + if (Result != -1) { + *buf = host_stat; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(ioctl, ioctl32); + + REGISTER_SYSCALL_IMPL_X32(getdents, [](FEXCore::Core::CpuStateFrame* Frame, int fd, void* dirp, uint32_t count) -> uint64_t { + return GetDentsEmulation(fd, reinterpret_cast(dirp), count); + }); + + REGISTER_SYSCALL_IMPL_X32(getdents64, [](FEXCore::Core::CpuStateFrame* Frame, int fd, void* dirp, uint32_t count) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(getdents64), static_cast(fd), dirp, static_cast(count)); + if (Result != -1) { + // Walk each offset + // if we are passing the full d_off to the 32bit application then it seems to break things? + for (size_t i = 0, num = 0; i < Result; ++num) { + linux_dirent_64* Incoming = (linux_dirent_64*)(reinterpret_cast(dirp) + i); + Incoming->d_off = num; + i += Incoming->d_reclen; + } + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(select, [](FEXCore::Core::CpuStateFrame* Frame, compat_select_args* arg) -> uint64_t { + return selectHandler(Frame, arg->nfds, arg->readfds, arg->writefds, arg->exceptfds, arg->timeout); + }); + + REGISTER_SYSCALL_IMPL_X32(_newselect, selectHandler); + + REGISTER_SYSCALL_IMPL_X32(pselect6, + [](FEXCore::Core::CpuStateFrame* Frame, int nfds, fd_set32* readfds, fd_set32* writefds, fd_set32* exceptfds, + timespec32* timeout, compat_ptr sigmaskpack) -> uint64_t { + struct timespec tp64 {}; if (timeout) { tp64 = *timeout; } @@ -168,9 +598,12 @@ namespace FEX::HLE::x32 { fd_set Host_readfds; fd_set Host_writefds; fd_set Host_exceptfds; + sigset_t HostSet {}; + FD_ZERO(&Host_readfds); FD_ZERO(&Host_writefds); FD_ZERO(&Host_exceptfds); + sigemptyset(&HostSet); // Round up to the full 32bit word uint32_t NumWords = FEXCore::AlignUp(nfds, 32) / 4; @@ -211,17 +644,25 @@ namespace FEX::HLE::x32 { } } - uint64_t Result = ::select(nfds, - readfds ? &Host_readfds : nullptr, - writefds ? &Host_writefds : nullptr, - exceptfds ? &Host_exceptfds : nullptr, - timeout ? &tp64 : nullptr); + if (sigmaskpack && sigmaskpack->sigset) { + uint64_t* sigmask = sigmaskpack->sigset; + size_t sigsetsize = sigmaskpack->size; + for (int32_t i = 0; i < (sigsetsize * 8); ++i) { + if (*sigmask & (1ULL << i)) { + sigaddset(&HostSet, i + 1); + } + } + } + + uint64_t Result = ::pselect(nfds, readfds ? &Host_readfds : nullptr, writefds ? &Host_writefds : nullptr, + exceptfds ? &Host_exceptfds : nullptr, timeout ? &tp64 : nullptr, &HostSet); + if (readfds) { for (int i = 0; i < nfds; ++i) { if (FD_ISSET(i, &Host_readfds)) { - readfds[i/32] |= 1 << (i & 31); + readfds[i / 32] |= 1 << (i & 31); } else { - readfds[i/32] &= ~(1 << (i & 31)); + readfds[i / 32] &= ~(1 << (i & 31)); } } } @@ -229,9 +670,9 @@ namespace FEX::HLE::x32 { if (writefds) { for (int i = 0; i < nfds; ++i) { if (FD_ISSET(i, &Host_writefds)) { - writefds[i/32] |= 1 << (i & 31); + writefds[i / 32] |= 1 << (i & 31); } else { - writefds[i/32] &= ~(1 << (i & 31)); + writefds[i / 32] &= ~(1 << (i & 31)); } } } @@ -239,9 +680,9 @@ namespace FEX::HLE::x32 { if (exceptfds) { for (int i = 0; i < nfds; ++i) { if (FD_ISSET(i, &Host_exceptfds)) { - exceptfds[i/32] |= 1 << (i & 31); + exceptfds[i / 32] |= 1 << (i & 31); } else { - exceptfds[i/32] &= ~(1 << (i & 31)); + exceptfds[i / 32] &= ~(1 << (i & 31)); } } } @@ -250,730 +691,251 @@ namespace FEX::HLE::x32 { *timeout = tp64; } SYSCALL_ERRNO(); - }; - - void RegisterFD(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X32_PASS(poll, [](FEXCore::Core::CpuStateFrame *Frame, struct pollfd *fds, nfds_t nfds, int timeout) -> uint64_t { - uint64_t Result = ::poll(fds, nfds, timeout); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(ppoll, [](FEXCore::Core::CpuStateFrame *Frame, struct pollfd *fds, nfds_t nfds, timespec32 *timeout_ts, const uint64_t *sigmask, size_t sigsetsize) -> uint64_t { - // sigsetsize is unused here since it is currently a constant and not exposed through glibc - struct timespec tp64{}; - struct timespec *timed_ptr{}; - if (timeout_ts) { - struct timespec32 timeout{}; - if (FaultSafeMemcpy::CopyFromUser(&timeout, timeout_ts, sizeof(timeout)) == EFAULT) { - return -EFAULT; - } - - tp64 = timeout; - timed_ptr = &tp64; - } - - uint64_t Result = ::syscall(SYSCALL_DEF(ppoll), - fds, - nfds, - timed_ptr, - sigmask, - sigsetsize); - - if (timeout_ts) { - struct timespec32 timeout{}; - timeout = tp64; - - if (FaultSafeMemcpy::CopyToUser(timeout_ts, &timeout, sizeof(timeout)) == EFAULT) { - // Write to user memory failed, this can occur if the timeout is defined in read-only memory. - // This is okay to happen, kernel continues happily. - } - } - - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(ppoll_time64, ppoll, [](FEXCore::Core::CpuStateFrame *Frame, struct pollfd *fds, nfds_t nfds, struct timespec *timeout_ts, const uint64_t *sigmask, size_t sigsetsize) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(ppoll), - fds, - nfds, - timeout_ts, - sigmask, - sigsetsize); - - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(_llseek, [](FEXCore::Core::CpuStateFrame *Frame, uint32_t fd, uint32_t offset_high, uint32_t offset_low, loff_t *result, uint32_t whence) -> uint64_t { - uint64_t Offset = offset_high; - Offset <<= 32; - Offset |= offset_low; - uint64_t Result = lseek(fd, Offset, whence); - if (Result != -1) { - *result = Result; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(readv, [](FEXCore::Core::CpuStateFrame *Frame, int fd, const struct iovec32 *iov, int iovcnt) -> uint64_t { - fextl::vector Host_iovec(iov, iov + SanitizeIOCount(iovcnt)); - uint64_t Result = ::readv(fd, Host_iovec.data(), iovcnt); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(writev, [](FEXCore::Core::CpuStateFrame *Frame, int fd, const struct iovec32 *iov, int iovcnt) -> uint64_t { - fextl::vector Host_iovec(iov, iov + SanitizeIOCount(iovcnt)); - uint64_t Result = ::writev(fd, Host_iovec.data(), iovcnt); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(chown32, chown, [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname, uid_t owner, gid_t group) -> uint64_t { - uint64_t Result = ::chown(pathname, owner, group); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(fchown32, fchown, [](FEXCore::Core::CpuStateFrame *Frame, int fd, uid_t owner, gid_t group) -> uint64_t { - uint64_t Result = ::fchown(fd, owner, group); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(lchown32, lchown, [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname, uid_t owner, gid_t group) -> uint64_t { - uint64_t Result = ::lchown(pathname, owner, group); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(oldstat, [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname, oldstat32 *buf) -> uint64_t { - struct stat host_stat; - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Stat(pathname, &host_stat); - if (Result != -1) { - if (host_stat.st_ino > std::numeric_limitsst_ino)>::max()) { - return -EOVERFLOW; - } - if (host_stat.st_nlink > std::numeric_limitsst_nlink)>::max()) { - return -EOVERFLOW; - } - - *buf = host_stat; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(oldfstat, [](FEXCore::Core::CpuStateFrame *Frame, int fd, oldstat32 *buf) -> uint64_t { - struct stat host_stat; - uint64_t Result = ::fstat(fd, &host_stat); - if (Result != -1) { - if (host_stat.st_ino > std::numeric_limitsst_ino)>::max()) { - return -EOVERFLOW; - } - if (host_stat.st_nlink > std::numeric_limitsst_nlink)>::max()) { - return -EOVERFLOW; - } - - *buf = host_stat; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(oldlstat, [](FEXCore::Core::CpuStateFrame *Frame, const char *path, oldstat32 *buf) -> uint64_t { - struct stat host_stat; - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Lstat(path, &host_stat); - if (Result != -1) { - if (host_stat.st_ino > std::numeric_limitsst_ino)>::max()) { - return -EOVERFLOW; - } - if (host_stat.st_nlink > std::numeric_limitsst_nlink)>::max()) { - return -EOVERFLOW; - } - - *buf = host_stat; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(stat, [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname, stat32 *buf) -> uint64_t { - struct stat host_stat; - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Stat(pathname, &host_stat); - if (Result != -1) { - *buf = host_stat; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(fstat, [](FEXCore::Core::CpuStateFrame *Frame, int fd, stat32 *buf) -> uint64_t { - struct stat host_stat; - uint64_t Result = ::fstat(fd, &host_stat); - if (Result != -1) { - *buf = host_stat; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(lstat, [](FEXCore::Core::CpuStateFrame *Frame, const char *path, stat32 *buf) -> uint64_t { - struct stat host_stat; - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Lstat(path, &host_stat); - if (Result != -1) { - *buf = host_stat; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(stat64, [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname, stat64_32 *buf) -> uint64_t { - struct stat host_stat; - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Stat(pathname, &host_stat); - if (Result != -1) { - *buf = host_stat; - } - SYSCALL_ERRNO(); - }); + }); + + REGISTER_SYSCALL_IMPL_X32(fadvise64_64, + [](FEXCore::Core::CpuStateFrame* Frame, int32_t fd, uint32_t offset_low, uint32_t offset_high, uint32_t len_low, + uint32_t len_high, int advice) -> uint64_t { + uint64_t Offset = offset_high; + Offset <<= 32; + Offset |= offset_low; + uint64_t Len = len_high; + Len <<= 32; + Len |= len_low; + uint64_t Result = ::posix_fadvise64(fd, Offset, Len, advice); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X32(lstat64, [](FEXCore::Core::CpuStateFrame *Frame, const char *path, stat64_32 *buf) -> uint64_t { - struct stat host_stat; - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Lstat(path, &host_stat); + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL( + timerfd_settime64, timerfd_settime, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, int flags, const struct itimerspec* new_value, struct itimerspec* old_value) -> uint64_t { + uint64_t Result = ::timerfd_settime(fd, flags, new_value, old_value); + SYSCALL_ERRNO(); + }); - if (Result != -1) { - *buf = host_stat; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(fstat64, [](FEXCore::Core::CpuStateFrame *Frame, int fd, stat64_32 *buf) -> uint64_t { - struct stat64 host_stat; - uint64_t Result = ::fstat64(fd, &host_stat); - if (Result != -1) { - *buf = host_stat; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(statfs, [](FEXCore::Core::CpuStateFrame *Frame, const char *path, statfs32_32 *buf) -> uint64_t { - struct statfs host_stat; - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Statfs(path, &host_stat); - if (Result != -1) { - *buf = host_stat; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(fstatfs, [](FEXCore::Core::CpuStateFrame *Frame, int fd, statfs32_32 *buf) -> uint64_t { - struct statfs host_stat; - uint64_t Result = ::fstatfs(fd, &host_stat); - if (Result != -1) { - *buf = host_stat; - } - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(timerfd_gettime64, timerfd_gettime, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, struct itimerspec* curr_value) -> uint64_t { + uint64_t Result = ::timerfd_gettime(fd, curr_value); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(timerfd_settime, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, int flags, const FEX::HLE::x32::old_itimerspec32* new_value, + FEX::HLE::x32::old_itimerspec32* old_value) -> uint64_t { + struct itimerspec new_value_host {}; + struct itimerspec old_value_host {}; + struct itimerspec* old_value_host_p {}; + + new_value_host = *new_value; + if (old_value) { + old_value_host_p = &old_value_host; + } - REGISTER_SYSCALL_IMPL_X32(fstatfs64, [](FEXCore::Core::CpuStateFrame *Frame, int fd, size_t sz, struct statfs64_32 *buf) -> uint64_t { - LOGMAN_THROW_AA_FMT(sz == sizeof(struct statfs64_32), "This needs to match"); + // Flags don't need remapped + uint64_t Result = ::timerfd_settime(fd, flags, &new_value_host, old_value_host_p); - struct statfs64 host_stat; - uint64_t Result = ::fstatfs64(fd, &host_stat); - if (Result != -1) { - *buf = host_stat; - } - SYSCALL_ERRNO(); - }); + if (Result != -1 && old_value) { + *old_value = old_value_host; + } + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X32(statfs64, [](FEXCore::Core::CpuStateFrame *Frame, const char *path, size_t sz, struct statfs64_32 *buf) -> uint64_t { - LOGMAN_THROW_AA_FMT(sz == sizeof(struct statfs64_32), "This needs to match"); + REGISTER_SYSCALL_IMPL_X32(timerfd_gettime, [](FEXCore::Core::CpuStateFrame* Frame, int fd, FEX::HLE::x32::old_itimerspec32* curr_value) -> uint64_t { + struct itimerspec Host {}; - struct statfs host_stat; - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Statfs(path, &host_stat); - if (Result != -1) { - *buf = host_stat; - } + uint64_t Result = ::timerfd_gettime(fd, &Host); - SYSCALL_ERRNO(); - }); - - // x86 32-bit fcntl syscall has a historical quirk that it uses the same handler as fcntl64 - // This is in direct opposition to all other 32-bit architectures that use the compat_fcntl handler - // This quirk goes back to the start of the Linux 2.6.12-rc2 git history. Seeing history before - // that point to see when this quirk happened would be difficult - // - // For more reference, the compat_fcntl handler blocks a few commands: - // - F_GETLK64 - // - F_SETLK64 - // - F_SETLKW64 - // - F_OFD_GETLK - // - F_OFD_SETLK - // - F_OFD_SETLKW - - REGISTER_SYSCALL_IMPL_X32(fcntl, fcntlHandler); - REGISTER_SYSCALL_IMPL_X32(fcntl64, fcntlHandler); - - REGISTER_SYSCALL_IMPL_X32(dup, [](FEXCore::Core::CpuStateFrame *Frame, int oldfd) -> uint64_t { - uint64_t Result = ::dup(oldfd); - if (Result != -1) { - CheckAndAddFDDuplication(oldfd, Result); - } - SYSCALL_ERRNO(); - }); + if (Result != -1) { + *curr_value = Host; + } - REGISTER_SYSCALL_IMPL_X32(dup2, [](FEXCore::Core::CpuStateFrame *Frame, int oldfd, int newfd) -> uint64_t { - uint64_t Result = ::dup2(oldfd, newfd); - if (Result != -1) { - CheckAndAddFDDuplication(oldfd, newfd); - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(preadv, [](FEXCore::Core::CpuStateFrame *Frame, - int fd, - const struct iovec32 *iov, - uint32_t iovcnt, - uint32_t pos_low, - uint32_t pos_high) -> uint64_t { - fextl::vector Host_iovec(iov, iov + SanitizeIOCount(iovcnt)); - - uint64_t Result = ::syscall(SYSCALL_DEF(preadv), fd, Host_iovec.data(), iovcnt, pos_low, pos_high); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(pwritev, [](FEXCore::Core::CpuStateFrame *Frame, - int fd, - const struct iovec32 *iov, - uint32_t iovcnt, - uint32_t pos_low, - uint32_t pos_high) -> uint64_t { - fextl::vector Host_iovec(iov, iov + SanitizeIOCount(iovcnt)); - - uint64_t Result = ::syscall(SYSCALL_DEF(pwritev), fd, Host_iovec.data(), iovcnt, pos_low, pos_high); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(process_vm_readv, [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, const struct iovec32 *local_iov, unsigned long liovcnt, const struct iovec32 *remote_iov, unsigned long riovcnt, unsigned long flags) -> uint64_t { - fextl::vector Host_local_iovec(local_iov, local_iov + SanitizeIOCount(liovcnt)); - fextl::vector Host_remote_iovec(remote_iov, remote_iov + SanitizeIOCount(riovcnt)); - - uint64_t Result = ::process_vm_readv(pid, Host_local_iovec.data(), liovcnt, Host_remote_iovec.data(), riovcnt, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(process_vm_writev, [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, const struct iovec32 *local_iov, unsigned long liovcnt, const struct iovec32 *remote_iov, unsigned long riovcnt, unsigned long flags) -> uint64_t { - fextl::vector Host_local_iovec(local_iov, local_iov + SanitizeIOCount(liovcnt)); - fextl::vector Host_remote_iovec(remote_iov, remote_iov + SanitizeIOCount(riovcnt)); - - uint64_t Result = ::process_vm_writev(pid, Host_local_iovec.data(), liovcnt, Host_remote_iovec.data(), riovcnt, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(preadv2, [](FEXCore::Core::CpuStateFrame *Frame, - int fd, - const struct iovec32 *iov, - uint32_t iovcnt, - uint32_t pos_low, - uint32_t pos_high, - int flags) -> uint64_t { - fextl::vector Host_iovec(iov, iov + SanitizeIOCount(iovcnt)); - - uint64_t Result = ::syscall(SYSCALL_DEF(preadv2), fd, Host_iovec.data(), iovcnt, pos_low, pos_high, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(pwritev2, [](FEXCore::Core::CpuStateFrame *Frame, - int fd, - const struct iovec32 *iov, - uint32_t iovcnt, - uint32_t pos_low, - uint32_t pos_high, - int flags) -> uint64_t { - fextl::vector Host_iovec(iov, iov + SanitizeIOCount(iovcnt)); - - uint64_t Result = ::syscall(SYSCALL_DEF(pwritev2), fd, Host_iovec.data(),iovcnt, pos_low, pos_high, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(fstatat_64, [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, stat64_32 *buf, int flag) -> uint64_t { - struct stat64 host_stat; - uint64_t Result = FEX::HLE::_SyscallHandler->FM.NewFSStatAt64(dirfd, pathname, &host_stat, flag); - if (Result != -1) { - *buf = host_stat; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(ioctl, ioctl32); - - REGISTER_SYSCALL_IMPL_X32(getdents, [](FEXCore::Core::CpuStateFrame *Frame, int fd, void *dirp, uint32_t count) -> uint64_t { - return GetDentsEmulation(fd, reinterpret_cast(dirp), count); - }); - - REGISTER_SYSCALL_IMPL_X32(getdents64, [](FEXCore::Core::CpuStateFrame *Frame, int fd, void *dirp, uint32_t count) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(getdents64), - static_cast(fd), - dirp, - static_cast(count)); - if (Result != -1) { - // Walk each offset - // if we are passing the full d_off to the 32bit application then it seems to break things? - for (size_t i = 0, num = 0; i < Result; ++num) { - linux_dirent_64 *Incoming = (linux_dirent_64*)(reinterpret_cast(dirp) + i); - Incoming->d_off = num; - i += Incoming->d_reclen; - } - } - SYSCALL_ERRNO(); - }); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X32(select, [](FEXCore::Core::CpuStateFrame *Frame, compat_select_args *arg) -> uint64_t { - return selectHandler(Frame, arg->nfds, arg->readfds, arg->writefds, arg->exceptfds, arg->timeout); - }); + REGISTER_SYSCALL_IMPL_X32(pselect6_time64, + [](FEXCore::Core::CpuStateFrame* Frame, int nfds, fd_set32* readfds, fd_set32* writefds, fd_set32* exceptfds, + struct timespec* timeout, compat_ptr sigmaskpack) -> uint64_t { + fd_set Host_readfds; + fd_set Host_writefds; + fd_set Host_exceptfds; + sigset_t HostSet {}; - REGISTER_SYSCALL_IMPL_X32(_newselect, selectHandler); + FD_ZERO(&Host_readfds); + FD_ZERO(&Host_writefds); + FD_ZERO(&Host_exceptfds); + sigemptyset(&HostSet); - REGISTER_SYSCALL_IMPL_X32(pselect6, [](FEXCore::Core::CpuStateFrame *Frame, int nfds, fd_set32 *readfds, fd_set32 *writefds, fd_set32 *exceptfds, timespec32 *timeout, compat_ptr sigmaskpack) -> uint64_t { - struct timespec tp64{}; - if (timeout) { - tp64 = *timeout; - } + // Round up to the full 32bit word + uint32_t NumWords = FEXCore::AlignUp(nfds, 32) / 4; - fd_set Host_readfds; - fd_set Host_writefds; - fd_set Host_exceptfds; - sigset_t HostSet{}; - - FD_ZERO(&Host_readfds); - FD_ZERO(&Host_writefds); - FD_ZERO(&Host_exceptfds); - sigemptyset(&HostSet); - - // Round up to the full 32bit word - uint32_t NumWords = FEXCore::AlignUp(nfds, 32) / 4; - - if (readfds) { - for (int i = 0; i < NumWords; ++i) { - uint32_t FD = readfds[i]; - int32_t Rem = nfds - (i * 32); - for (int j = 0; j < 32 && j < Rem; ++j) { - if ((FD >> j) & 1) { - FD_SET(i * 32 + j, &Host_readfds); - } + if (readfds) { + for (int i = 0; i < NumWords; ++i) { + uint32_t FD = readfds[i]; + int32_t Rem = nfds - (i * 32); + for (int j = 0; j < 32 && j < Rem; ++j) { + if ((FD >> j) & 1) { + FD_SET(i * 32 + j, &Host_readfds); } } } + } - if (writefds) { - for (int i = 0; i < NumWords; ++i) { - uint32_t FD = writefds[i]; - int32_t Rem = nfds - (i * 32); - for (int j = 0; j < 32 && j < Rem; ++j) { - if ((FD >> j) & 1) { - FD_SET(i * 32 + j, &Host_writefds); - } + if (writefds) { + for (int i = 0; i < NumWords; ++i) { + uint32_t FD = writefds[i]; + int32_t Rem = nfds - (i * 32); + for (int j = 0; j < 32 && j < Rem; ++j) { + if ((FD >> j) & 1) { + FD_SET(i * 32 + j, &Host_writefds); } } } + } - if (exceptfds) { - for (int i = 0; i < NumWords; ++i) { - uint32_t FD = exceptfds[i]; - int32_t Rem = nfds - (i * 32); - for (int j = 0; j < 32 && j < Rem; ++j) { - if ((FD >> j) & 1) { - FD_SET(i * 32 + j, &Host_exceptfds); - } + if (exceptfds) { + for (int i = 0; i < NumWords; ++i) { + uint32_t FD = exceptfds[i]; + int32_t Rem = nfds - (i * 32); + for (int j = 0; j < 32 && j < Rem; ++j) { + if ((FD >> j) & 1) { + FD_SET(i * 32 + j, &Host_exceptfds); } } } + } - if (sigmaskpack && sigmaskpack->sigset) { - uint64_t *sigmask = sigmaskpack->sigset; - size_t sigsetsize = sigmaskpack->size; - for (int32_t i = 0; i < (sigsetsize * 8); ++i) { - if (*sigmask & (1ULL << i)) { - sigaddset(&HostSet, i + 1); - } + if (sigmaskpack && sigmaskpack->sigset) { + uint64_t* sigmask = sigmaskpack->sigset; + size_t sigsetsize = sigmaskpack->size; + for (int32_t i = 0; i < (sigsetsize * 8); ++i) { + if (*sigmask & (1ULL << i)) { + sigaddset(&HostSet, i + 1); } } + } - uint64_t Result = ::pselect(nfds, - readfds ? &Host_readfds : nullptr, - writefds ? &Host_writefds : nullptr, - exceptfds ? &Host_exceptfds : nullptr, - timeout ? &tp64 : nullptr, - &HostSet); - - if (readfds) { - for (int i = 0; i < nfds; ++i) { - if (FD_ISSET(i, &Host_readfds)) { - readfds[i/32] |= 1 << (i & 31); - } else { - readfds[i/32] &= ~(1 << (i & 31)); - } - } - } + uint64_t Result = ::pselect(nfds, readfds ? &Host_readfds : nullptr, writefds ? &Host_writefds : nullptr, + exceptfds ? &Host_exceptfds : nullptr, timeout, &HostSet); - if (writefds) { - for (int i = 0; i < nfds; ++i) { - if (FD_ISSET(i, &Host_writefds)) { - writefds[i/32] |= 1 << (i & 31); - } else { - writefds[i/32] &= ~(1 << (i & 31)); - } + if (readfds) { + for (int i = 0; i < nfds; ++i) { + if (FD_ISSET(i, &Host_readfds)) { + readfds[i / 32] |= 1 << (i & 31); + } else { + readfds[i / 32] &= ~(1 << (i & 31)); } } + } - if (exceptfds) { - for (int i = 0; i < nfds; ++i) { - if (FD_ISSET(i, &Host_exceptfds)) { - exceptfds[i/32] |= 1 << (i & 31); - } else { - exceptfds[i/32] &= ~(1 << (i & 31)); - } + if (writefds) { + for (int i = 0; i < nfds; ++i) { + if (FD_ISSET(i, &Host_writefds)) { + writefds[i / 32] |= 1 << (i & 31); + } else { + writefds[i / 32] &= ~(1 << (i & 31)); } } + } - if (timeout) { - *timeout = tp64; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(fadvise64_64, [](FEXCore::Core::CpuStateFrame *Frame, int32_t fd, uint32_t offset_low, uint32_t offset_high, uint32_t len_low, uint32_t len_high, int advice) -> uint64_t { - uint64_t Offset = offset_high; - Offset <<= 32; - Offset |= offset_low; - uint64_t Len = len_high; - Len <<= 32; - Len |= len_low; - uint64_t Result = ::posix_fadvise64(fd, Offset, Len, advice); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(timerfd_settime64, timerfd_settime, [](FEXCore::Core::CpuStateFrame *Frame, int fd, int flags, const struct itimerspec *new_value, struct itimerspec *old_value) -> uint64_t { - uint64_t Result = ::timerfd_settime(fd, flags, new_value, old_value); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(timerfd_gettime64, timerfd_gettime, [](FEXCore::Core::CpuStateFrame *Frame, int fd, struct itimerspec *curr_value) -> uint64_t { - uint64_t Result = ::timerfd_gettime(fd, curr_value); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(timerfd_settime, [](FEXCore::Core::CpuStateFrame *Frame, - int fd, - int flags, - const FEX::HLE::x32::old_itimerspec32 *new_value, - FEX::HLE::x32::old_itimerspec32 *old_value) -> uint64_t { - struct itimerspec new_value_host{}; - struct itimerspec old_value_host{}; - struct itimerspec *old_value_host_p{}; - - new_value_host = *new_value; - if (old_value) { - old_value_host_p = &old_value_host; + if (exceptfds) { + for (int i = 0; i < nfds; ++i) { + if (FD_ISSET(i, &Host_exceptfds)) { + exceptfds[i / 32] |= 1 << (i & 31); + } else { + exceptfds[i / 32] &= ~(1 << (i & 31)); + } } + } - // Flags don't need remapped - uint64_t Result = ::timerfd_settime(fd, flags, &new_value_host, old_value_host_p); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(sendfile, [](FEXCore::Core::CpuStateFrame* Frame, int out_fd, int in_fd, compat_off_t* offset, size_t count) -> uint64_t { + off_t Local {}; + off_t* Local_p {}; + if (offset) { + Local_p = &Local; + Local = *offset; + } + uint64_t Result = ::sendfile(out_fd, in_fd, Local_p, count); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL( + sendfile64, sendfile, [](FEXCore::Core::CpuStateFrame* Frame, int out_fd, int in_fd, off_t* offset, compat_size_t count) -> uint64_t { + // Linux definition for this is a bit confusing + // Defines offset as compat_loff_t* but loads loff_t worth of data + // count is defined as compat_size_t still + uint64_t Result = ::sendfile(out_fd, in_fd, offset, count); + SYSCALL_ERRNO(); + }); - if (Result != -1 && old_value) { - *old_value = old_value_host; - } - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_X32( + pread_64, [](FEXCore::Core::CpuStateFrame* Frame, int fd, void* buf, uint32_t count, uint32_t offset_low, uint32_t offset_high) -> uint64_t { + uint64_t Offset = offset_high; + Offset <<= 32; + Offset |= offset_low; - REGISTER_SYSCALL_IMPL_X32(timerfd_gettime, [](FEXCore::Core::CpuStateFrame *Frame, int fd, FEX::HLE::x32::old_itimerspec32 *curr_value) -> uint64_t { - struct itimerspec Host{}; + uint64_t Result = ::pread64(fd, buf, count, Offset); + SYSCALL_ERRNO(); + }); - uint64_t Result = ::timerfd_gettime(fd, &Host); + REGISTER_SYSCALL_IMPL_X32( + pwrite_64, [](FEXCore::Core::CpuStateFrame* Frame, int fd, void* buf, uint32_t count, uint32_t offset_low, uint32_t offset_high) -> uint64_t { + uint64_t Offset = offset_high; + Offset <<= 32; + Offset |= offset_low; - if (Result != -1) { - *curr_value = Host; - } + uint64_t Result = ::pwrite64(fd, buf, count, Offset); + SYSCALL_ERRNO(); + }); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(pselect6_time64, [](FEXCore::Core::CpuStateFrame *Frame, int nfds, fd_set32 *readfds, fd_set32 *writefds, fd_set32 *exceptfds, struct timespec *timeout, compat_ptr sigmaskpack) -> uint64_t { - fd_set Host_readfds; - fd_set Host_writefds; - fd_set Host_exceptfds; - sigset_t HostSet{}; - - FD_ZERO(&Host_readfds); - FD_ZERO(&Host_writefds); - FD_ZERO(&Host_exceptfds); - sigemptyset(&HostSet); - - // Round up to the full 32bit word - uint32_t NumWords = FEXCore::AlignUp(nfds, 32) / 4; - - if (readfds) { - for (int i = 0; i < NumWords; ++i) { - uint32_t FD = readfds[i]; - int32_t Rem = nfds - (i * 32); - for (int j = 0; j < 32 && j < Rem; ++j) { - if ((FD >> j) & 1) { - FD_SET(i * 32 + j, &Host_readfds); - } - } - } - } + REGISTER_SYSCALL_IMPL_X32( + readahead, [](FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t offset_low, uint64_t offset_high, size_t count) -> uint64_t { + uint64_t Offset = offset_high; + Offset <<= 32; + Offset |= offset_low; - if (writefds) { - for (int i = 0; i < NumWords; ++i) { - uint32_t FD = writefds[i]; - int32_t Rem = nfds - (i * 32); - for (int j = 0; j < 32 && j < Rem; ++j) { - if ((FD >> j) & 1) { - FD_SET(i * 32 + j, &Host_writefds); - } - } - } - } + uint64_t Result = ::readahead(fd, Offset, count); + SYSCALL_ERRNO(); + }); - if (exceptfds) { - for (int i = 0; i < NumWords; ++i) { - uint32_t FD = exceptfds[i]; - int32_t Rem = nfds - (i * 32); - for (int j = 0; j < 32 && j < Rem; ++j) { - if ((FD >> j) & 1) { - FD_SET(i * 32 + j, &Host_exceptfds); - } - } - } - } + REGISTER_SYSCALL_IMPL_X32(sync_file_range, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t offset_low, uint32_t offset_high, uint32_t len_low, + uint32_t len_high, unsigned int flags) -> uint64_t { + // Flags don't need remapped + uint64_t Offset = offset_high; + Offset <<= 32; + Offset |= offset_low; - if (sigmaskpack && sigmaskpack->sigset) { - uint64_t *sigmask = sigmaskpack->sigset; - size_t sigsetsize = sigmaskpack->size; - for (int32_t i = 0; i < (sigsetsize * 8); ++i) { - if (*sigmask & (1ULL << i)) { - sigaddset(&HostSet, i + 1); - } - } - } + uint64_t Len = len_high; + Len <<= 32; + Len |= len_low; - uint64_t Result = ::pselect(nfds, - readfds ? &Host_readfds : nullptr, - writefds ? &Host_writefds : nullptr, - exceptfds ? &Host_exceptfds : nullptr, - timeout, - &HostSet); - - if (readfds) { - for (int i = 0; i < nfds; ++i) { - if (FD_ISSET(i, &Host_readfds)) { - readfds[i/32] |= 1 << (i & 31); - } else { - readfds[i/32] &= ~(1 << (i & 31)); - } - } - } + uint64_t Result = ::syscall(SYSCALL_DEF(sync_file_range), fd, Offset, Len, flags); + SYSCALL_ERRNO(); + }); - if (writefds) { - for (int i = 0; i < nfds; ++i) { - if (FD_ISSET(i, &Host_writefds)) { - writefds[i/32] |= 1 << (i & 31); - } else { - writefds[i/32] &= ~(1 << (i & 31)); - } - } - } + REGISTER_SYSCALL_IMPL_X32(fallocate, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, int mode, uint32_t offset_low, uint32_t offset_high, + uint32_t len_low, uint32_t len_high) -> uint64_t { + uint64_t Offset = offset_high; + Offset <<= 32; + Offset |= offset_low; - if (exceptfds) { - for (int i = 0; i < nfds; ++i) { - if (FD_ISSET(i, &Host_exceptfds)) { - exceptfds[i/32] |= 1 << (i & 31); - } else { - exceptfds[i/32] &= ~(1 << (i & 31)); - } - } - } + uint64_t Len = len_high; + Len <<= 32; + Len |= len_low; - SYSCALL_ERRNO(); - }); + uint64_t Result = ::fallocate(fd, mode, Offset, Len); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X32(sendfile, [](FEXCore::Core::CpuStateFrame *Frame, int out_fd, int in_fd, compat_off_t *offset, size_t count) -> uint64_t { - off_t Local{}; - off_t *Local_p{}; - if (offset) { - Local_p = &Local; - Local = *offset; - } - uint64_t Result = ::sendfile(out_fd, in_fd, Local_p, count); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(sendfile64, sendfile, [](FEXCore::Core::CpuStateFrame *Frame, int out_fd, int in_fd, off_t *offset, compat_size_t count) -> uint64_t { - // Linux definition for this is a bit confusing - // Defines offset as compat_loff_t* but loads loff_t worth of data - // count is defined as compat_size_t still - uint64_t Result = ::sendfile(out_fd, in_fd, offset, count); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(pread_64, [](FEXCore::Core::CpuStateFrame *Frame, int fd, void *buf, uint32_t count, uint32_t offset_low, uint32_t offset_high) -> uint64_t { - uint64_t Offset = offset_high; - Offset <<= 32; - Offset |= offset_low; - - uint64_t Result = ::pread64(fd, buf, count, Offset); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(pwrite_64, [](FEXCore::Core::CpuStateFrame *Frame, int fd, void *buf, uint32_t count, uint32_t offset_low, uint32_t offset_high) -> uint64_t { - uint64_t Offset = offset_high; - Offset <<= 32; - Offset |= offset_low; - - uint64_t Result = ::pwrite64(fd, buf, count, Offset); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(readahead, [](FEXCore::Core::CpuStateFrame *Frame, int fd, uint32_t offset_low, uint64_t offset_high, size_t count) -> uint64_t { - uint64_t Offset = offset_high; - Offset <<= 32; - Offset |= offset_low; - - uint64_t Result = ::readahead(fd, Offset, count); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(sync_file_range, [](FEXCore::Core::CpuStateFrame *Frame, - int fd, - uint32_t offset_low, - uint32_t offset_high, - uint32_t len_low, - uint32_t len_high, - unsigned int flags) -> uint64_t { - // Flags don't need remapped - uint64_t Offset = offset_high; - Offset <<= 32; - Offset |= offset_low; - - uint64_t Len = len_high; - Len <<= 32; - Len |= len_low; - - uint64_t Result = ::syscall(SYSCALL_DEF(sync_file_range), fd, Offset, Len, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(fallocate, [](FEXCore::Core::CpuStateFrame *Frame, - int fd, - int mode, - uint32_t offset_low, - uint32_t offset_high, - uint32_t len_low, - uint32_t len_high) -> uint64_t { - uint64_t Offset = offset_high; - Offset <<= 32; - Offset |= offset_low; - - uint64_t Len = len_high; - Len <<= 32; - Len |= len_low; - - uint64_t Result = ::fallocate(fd, mode, Offset, Len); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(vmsplice, [](FEXCore::Core::CpuStateFrame *Frame, int fd, const struct iovec32 *iov, unsigned long nr_segs, unsigned int flags) -> uint64_t { - fextl::vector Host_iovec(iov, iov + nr_segs); - uint64_t Result = ::vmsplice(fd, Host_iovec.data(), nr_segs, flags); - SYSCALL_ERRNO(); - }); - } + REGISTER_SYSCALL_IMPL_X32( + vmsplice, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec32* iov, unsigned long nr_segs, unsigned int flags) -> uint64_t { + fextl::vector Host_iovec(iov, iov + nr_segs); + uint64_t Result = ::vmsplice(fd, Host_iovec.data(), nr_segs, flags); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/FS.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/FS.cpp index dab1539b72..905f0c7936 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/FS.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/FS.cpp @@ -15,30 +15,32 @@ tags: LinuxSyscalls|syscalls-x86-32 #include namespace FEX::HLE::x32 { - void RegisterFS(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X32(umount, [](FEXCore::Core::CpuStateFrame *Frame, const char *target) -> uint64_t { - uint64_t Result = ::umount(target); - SYSCALL_ERRNO(); - }); +void RegisterFS(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X32(umount, [](FEXCore::Core::CpuStateFrame* Frame, const char* target) -> uint64_t { + uint64_t Result = ::umount(target); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X32(truncate64, [](FEXCore::Core::CpuStateFrame *Frame, const char *path, uint32_t offset_low, uint32_t offset_high) -> uint64_t { - uint64_t Offset = offset_high; - Offset <<= 32; - Offset |= offset_low; - uint64_t Result = ::truncate(path, Offset); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_X32( + truncate64, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, uint32_t offset_low, uint32_t offset_high) -> uint64_t { + uint64_t Offset = offset_high; + Offset <<= 32; + Offset |= offset_low; + uint64_t Result = ::truncate(path, Offset); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X32(ftruncate64, [](FEXCore::Core::CpuStateFrame *Frame, int fd, uint32_t offset_low, uint32_t offset_high) -> uint64_t { - uint64_t Offset = offset_high; - Offset <<= 32; - Offset |= offset_low; - uint64_t Result = ::ftruncate(fd, Offset); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_X32(ftruncate64, [](FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t offset_low, uint32_t offset_high) -> uint64_t { + uint64_t Offset = offset_high; + Offset <<= 32; + Offset |= offset_low; + uint64_t Result = ::ftruncate(fd, Offset); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X32(sigprocmask, [](FEXCore::Core::CpuStateFrame *Frame, int how, const uint64_t *set, uint64_t *oldset, size_t sigsetsize) -> uint64_t { - return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigProcMask(how, set, oldset); - }); - } + REGISTER_SYSCALL_IMPL_X32( + sigprocmask, [](FEXCore::Core::CpuStateFrame* Frame, int how, const uint64_t* set, uint64_t* oldset, size_t sigsetsize) -> uint64_t { + return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigProcMask(how, set, oldset); + }); } +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/IO.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/IO.cpp index df9d936334..85b27943df 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/IO.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/IO.cpp @@ -16,41 +16,40 @@ tags: LinuxSyscalls|syscalls-x86-32 #include namespace FEX::HLE::x32 { - void RegisterIO(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X32(io_getevents, [](FEXCore::Core::CpuStateFrame *Frame, aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec32 *timeout) -> uint64_t { - struct timespec* timeout_ptr{}; - struct timespec tp64{}; - if (timeout) { - tp64 = *timeout; - timeout_ptr = &tp64; - } +void RegisterIO(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X32(io_getevents, + [](FEXCore::Core::CpuStateFrame* Frame, aio_context_t ctx_id, long min_nr, long nr, struct io_event* events, + struct timespec32* timeout) -> uint64_t { + struct timespec* timeout_ptr {}; + struct timespec tp64 {}; + if (timeout) { + tp64 = *timeout; + timeout_ptr = &tp64; + } - uint64_t Result = ::syscall(SYSCALL_DEF(io_getevents), ctx_id, min_nr, nr, events, timeout_ptr); - SYSCALL_ERRNO(); - }); + uint64_t Result = ::syscall(SYSCALL_DEF(io_getevents), ctx_id, min_nr, nr, events, timeout_ptr); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X32(io_pgetevents, [](FEXCore::Core::CpuStateFrame *Frame, aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec32 *timeout, const struct io_sigset *usig) -> uint64_t { - struct timespec* timeout_ptr{}; - struct timespec tp64{}; - if (timeout) { - tp64 = *timeout; - timeout_ptr = &tp64; - } + REGISTER_SYSCALL_IMPL_X32(io_pgetevents, + [](FEXCore::Core::CpuStateFrame* Frame, aio_context_t ctx_id, long min_nr, long nr, struct io_event* events, + struct timespec32* timeout, const struct io_sigset* usig) -> uint64_t { + struct timespec* timeout_ptr {}; + struct timespec tp64 {}; + if (timeout) { + tp64 = *timeout; + timeout_ptr = &tp64; + } - uint64_t Result = ::syscall(SYSCALL_DEF(io_pgetevents), ctx_id, min_nr, nr, events, timeout_ptr, usig); - SYSCALL_ERRNO(); - }); + uint64_t Result = ::syscall(SYSCALL_DEF(io_pgetevents), ctx_id, min_nr, nr, events, timeout_ptr, usig); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X32_PASS(io_pgetevents_time64, - [](FEXCore::Core::CpuStateFrame *Frame, - aio_context_t ctx_id, - long min_nr, - long nr, - struct io_event *events, - struct timespec *timeout, - const struct io_sigset *usig) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(io_pgetevents), ctx_id, min_nr, nr, events, timeout, usig); - SYSCALL_ERRNO(); - }); - } + REGISTER_SYSCALL_IMPL_X32_PASS(io_pgetevents_time64, + [](FEXCore::Core::CpuStateFrame* Frame, aio_context_t ctx_id, long min_nr, long nr, + struct io_event* events, struct timespec* timeout, const struct io_sigset* usig) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(io_pgetevents), ctx_id, min_nr, nr, events, timeout, usig); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Info.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Info.cpp index 27256304bb..ded76362c7 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Info.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Info.cpp @@ -21,145 +21,142 @@ tags: LinuxSyscalls|syscalls-x86-32 #include namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } ARG_TO_STR(FEX::HLE::x32::compat_ptr>, "%lx") ARG_TO_STR(FEX::HLE::x32::compat_ptr>, "%lx") namespace FEX::HLE::x32 { - struct sysinfo32 { - int32_t uptime; - uint32_t loads[3]; - uint32_t totalram; - uint32_t freeram; - uint32_t sharedram; - uint32_t bufferram; - uint32_t totalswap; - uint32_t freeswap; - uint16_t procs; - uint32_t totalhigh; - uint32_t freehigh; - uint32_t mem_unit; - char _pad[8]; - }; - - static_assert(sizeof(sysinfo32) == 64, "Needs to be 64bytes"); - - void RegisterInfo(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X32(oldolduname, [](FEXCore::Core::CpuStateFrame *Frame, struct oldold_utsname *buf) -> uint64_t { - struct utsname Local{}; - - memset(buf, 0, sizeof(*buf)); - if (::uname(&Local) == 0) { - memcpy(buf->nodename, Local.nodename, __OLD_UTS_LEN); - } - else { - strncpy(buf->nodename, "FEXCore", __OLD_UTS_LEN); - LogMan::Msg::EFmt("Couldn't determine host nodename. Defaulting to '{}'", buf->nodename); - } - strncpy(buf->sysname, "Linux", __OLD_UTS_LEN); - uint32_t GuestVersion = FEX::HLE::_SyscallHandler->GetGuestKernelVersion(); - snprintf(buf->release, __OLD_UTS_LEN, "%d.%d.%d", - FEX::HLE::SyscallHandler::KernelMajor(GuestVersion), - FEX::HLE::SyscallHandler::KernelMinor(GuestVersion), - FEX::HLE::SyscallHandler::KernelPatch(GuestVersion)); - - const char version[] = "#" GIT_DESCRIBE_STRING " SMP " __DATE__ " " __TIME__; - strncpy(buf->version, version, __OLD_UTS_LEN); - // Tell the guest that we are a 64bit kernel - strncpy(buf->machine, "x86_64", __OLD_UTS_LEN); - return 0; - }); - - REGISTER_SYSCALL_IMPL_X32(olduname, [](FEXCore::Core::CpuStateFrame *Frame, struct old_utsname *buf) -> uint64_t { - struct utsname Local{}; - - memset(buf, 0, sizeof(*buf)); - if (::uname(&Local) == 0) { - memcpy(buf->nodename, Local.nodename, __NEW_UTS_LEN); - } - else { - strncpy(buf->nodename, "FEXCore", __NEW_UTS_LEN); - LogMan::Msg::EFmt("Couldn't determine host nodename. Defaulting to '{}'", buf->nodename); - } - strncpy(buf->sysname, "Linux", __NEW_UTS_LEN); - uint32_t GuestVersion = FEX::HLE::_SyscallHandler->GetGuestKernelVersion(); - snprintf(buf->release, __NEW_UTS_LEN, "%d.%d.%d", - FEX::HLE::SyscallHandler::KernelMajor(GuestVersion), - FEX::HLE::SyscallHandler::KernelMinor(GuestVersion), - FEX::HLE::SyscallHandler::KernelPatch(GuestVersion)); - - const char version[] = "#" GIT_DESCRIBE_STRING " SMP " __DATE__ " " __TIME__; - strncpy(buf->version, version, __NEW_UTS_LEN); - // Tell the guest that we are a 64bit kernel - strncpy(buf->machine, "x86_64", __NEW_UTS_LEN); - return 0; - }); - - REGISTER_SYSCALL_IMPL_X32(getrlimit, [](FEXCore::Core::CpuStateFrame *Frame, int resource, compat_ptr> rlim) -> uint64_t { - struct rlimit rlim64{}; - uint64_t Result = ::getrlimit(resource, &rlim64); - *rlim = rlim64; - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(ugetrlimit, [](FEXCore::Core::CpuStateFrame *Frame, int resource, compat_ptr> rlim) -> uint64_t { - struct rlimit rlim64{}; - uint64_t Result = ::getrlimit(resource, &rlim64); - *rlim = rlim64; - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(setrlimit, [](FEXCore::Core::CpuStateFrame *Frame, int resource, const compat_ptr> rlim) -> uint64_t { - struct rlimit rlim64{}; - rlim64 = *rlim; - uint64_t Result = ::setrlimit(resource, &rlim64); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(sysinfo, [](FEXCore::Core::CpuStateFrame *Frame, struct sysinfo32 *info) -> uint64_t { - struct sysinfo Host{}; - uint64_t Result = ::sysinfo(&Host); - if (Result != -1) { -#define Copy(x) info->x = static_castx)>(std::min(Host.x, static_cast(std::numeric_limitsx)>::max()))); - Copy(uptime); - Copy(procs); +struct sysinfo32 { + int32_t uptime; + uint32_t loads[3]; + uint32_t totalram; + uint32_t freeram; + uint32_t sharedram; + uint32_t bufferram; + uint32_t totalswap; + uint32_t freeswap; + uint16_t procs; + uint32_t totalhigh; + uint32_t freehigh; + uint32_t mem_unit; + char _pad[8]; +}; + +static_assert(sizeof(sysinfo32) == 64, "Needs to be 64bytes"); + +void RegisterInfo(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X32(oldolduname, [](FEXCore::Core::CpuStateFrame* Frame, struct oldold_utsname* buf) -> uint64_t { + struct utsname Local {}; + + memset(buf, 0, sizeof(*buf)); + if (::uname(&Local) == 0) { + memcpy(buf->nodename, Local.nodename, __OLD_UTS_LEN); + } else { + strncpy(buf->nodename, "FEXCore", __OLD_UTS_LEN); + LogMan::Msg::EFmt("Couldn't determine host nodename. Defaulting to '{}'", buf->nodename); + } + strncpy(buf->sysname, "Linux", __OLD_UTS_LEN); + uint32_t GuestVersion = FEX::HLE::_SyscallHandler->GetGuestKernelVersion(); + snprintf(buf->release, __OLD_UTS_LEN, "%d.%d.%d", FEX::HLE::SyscallHandler::KernelMajor(GuestVersion), + FEX::HLE::SyscallHandler::KernelMinor(GuestVersion), FEX::HLE::SyscallHandler::KernelPatch(GuestVersion)); + + const char version[] = "#" GIT_DESCRIBE_STRING " SMP " __DATE__ " " __TIME__; + strncpy(buf->version, version, __OLD_UTS_LEN); + // Tell the guest that we are a 64bit kernel + strncpy(buf->machine, "x86_64", __OLD_UTS_LEN); + return 0; + }); + + REGISTER_SYSCALL_IMPL_X32(olduname, [](FEXCore::Core::CpuStateFrame* Frame, struct old_utsname* buf) -> uint64_t { + struct utsname Local {}; + + memset(buf, 0, sizeof(*buf)); + if (::uname(&Local) == 0) { + memcpy(buf->nodename, Local.nodename, __NEW_UTS_LEN); + } else { + strncpy(buf->nodename, "FEXCore", __NEW_UTS_LEN); + LogMan::Msg::EFmt("Couldn't determine host nodename. Defaulting to '{}'", buf->nodename); + } + strncpy(buf->sysname, "Linux", __NEW_UTS_LEN); + uint32_t GuestVersion = FEX::HLE::_SyscallHandler->GetGuestKernelVersion(); + snprintf(buf->release, __NEW_UTS_LEN, "%d.%d.%d", FEX::HLE::SyscallHandler::KernelMajor(GuestVersion), + FEX::HLE::SyscallHandler::KernelMinor(GuestVersion), FEX::HLE::SyscallHandler::KernelPatch(GuestVersion)); + + const char version[] = "#" GIT_DESCRIBE_STRING " SMP " __DATE__ " " __TIME__; + strncpy(buf->version, version, __NEW_UTS_LEN); + // Tell the guest that we are a 64bit kernel + strncpy(buf->machine, "x86_64", __NEW_UTS_LEN); + return 0; + }); + + REGISTER_SYSCALL_IMPL_X32( + getrlimit, [](FEXCore::Core::CpuStateFrame* Frame, int resource, compat_ptr> rlim) -> uint64_t { + struct rlimit rlim64 {}; + uint64_t Result = ::getrlimit(resource, &rlim64); + *rlim = rlim64; + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32( + ugetrlimit, [](FEXCore::Core::CpuStateFrame* Frame, int resource, compat_ptr> rlim) -> uint64_t { + struct rlimit rlim64 {}; + uint64_t Result = ::getrlimit(resource, &rlim64); + *rlim = rlim64; + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32( + setrlimit, [](FEXCore::Core::CpuStateFrame* Frame, int resource, const compat_ptr> rlim) -> uint64_t { + struct rlimit rlim64 {}; + rlim64 = *rlim; + uint64_t Result = ::setrlimit(resource, &rlim64); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(sysinfo, [](FEXCore::Core::CpuStateFrame* Frame, struct sysinfo32* info) -> uint64_t { + struct sysinfo Host {}; + uint64_t Result = ::sysinfo(&Host); + if (Result != -1) { +#define Copy(x) \ + info->x = static_castx)>(std::min(Host.x, static_cast(std::numeric_limitsx)>::max()))); + Copy(uptime); + Copy(procs); #define CopyShift(x) info->x = static_castx)>(Host.x >> ShiftAmount); - info->loads[0] = std::min(Host.loads[0], static_cast(std::numeric_limits::max())); - info->loads[1] = std::min(Host.loads[1], static_cast(std::numeric_limits::max())); - info->loads[2] = std::min(Host.loads[2], static_cast(std::numeric_limits::max())); + info->loads[0] = std::min(Host.loads[0], static_cast(std::numeric_limits::max())); + info->loads[1] = std::min(Host.loads[1], static_cast(std::numeric_limits::max())); + info->loads[2] = std::min(Host.loads[2], static_cast(std::numeric_limits::max())); - // If any result can't fit in to a uint32_t then we need to shift the mem_unit and all the members - // Set the mem_unit to the pagesize - uint32_t ShiftAmount{}; - if ((Host.totalram >> 32) != 0 || - (Host.totalswap >> 32) != 0) { + // If any result can't fit in to a uint32_t then we need to shift the mem_unit and all the members + // Set the mem_unit to the pagesize + uint32_t ShiftAmount {}; + if ((Host.totalram >> 32) != 0 || (Host.totalswap >> 32) != 0) { - while (Host.mem_unit < 4096) { - Host.mem_unit <<= 1; - ++ShiftAmount; - } + while (Host.mem_unit < 4096) { + Host.mem_unit <<= 1; + ++ShiftAmount; } - - CopyShift(totalram); - CopyShift(sharedram); - CopyShift(bufferram); - CopyShift(totalswap); - CopyShift(freeswap); - CopyShift(totalhigh); - CopyShift(freehigh); - Copy(mem_unit); } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(getrusage, [](FEXCore::Core::CpuStateFrame *Frame, int who, rusage_32 *usage) -> uint64_t { - struct rusage usage64 = *usage; - uint64_t Result = ::getrusage(who, &usage64); - *usage = usage64; - SYSCALL_ERRNO(); - }); - } + + CopyShift(totalram); + CopyShift(sharedram); + CopyShift(bufferram); + CopyShift(totalswap); + CopyShift(freeswap); + CopyShift(totalhigh); + CopyShift(freehigh); + Copy(mem_unit); + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(getrusage, [](FEXCore::Core::CpuStateFrame* Frame, int who, rusage_32* usage) -> uint64_t { + struct rusage usage64 = *usage; + uint64_t Result = ::getrusage(who, &usage64); + *usage = usage64; + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/IoctlEmulation.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/IoctlEmulation.cpp index 5e4a6364f6..776f7d7c3e 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/IoctlEmulation.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/IoctlEmulation.cpp @@ -26,376 +26,377 @@ #include namespace FEX::HLE::x32 { - static void UnhandledIoctl(const char *Type, int fd, uint32_t cmd, uint32_t args) { - LogMan::Msg::EFmt("@@@@@@@@@@@@@@@@@@@@@@@@@"); - LogMan::Msg::EFmt("Unhandled {} ioctl({}, 0x{:08x}, 0x{:08x})", Type, fd, cmd, args); - LogMan::Msg::EFmt("\tDir : 0x{:x}", _IOC_DIR(cmd)); - LogMan::Msg::EFmt("\tType : 0x{:x}", _IOC_TYPE(cmd)); - LogMan::Msg::EFmt("\tNR : 0x{:x}", _IOC_NR(cmd)); - LogMan::Msg::EFmt("\tSIZE : 0x{:x}", _IOC_SIZE(cmd)); - LogMan::Msg::AFmt("@@@@@@@@@@@@@@@@@@@@@@@@@"); +static void UnhandledIoctl(const char* Type, int fd, uint32_t cmd, uint32_t args) { + LogMan::Msg::EFmt("@@@@@@@@@@@@@@@@@@@@@@@@@"); + LogMan::Msg::EFmt("Unhandled {} ioctl({}, 0x{:08x}, 0x{:08x})", Type, fd, cmd, args); + LogMan::Msg::EFmt("\tDir : 0x{:x}", _IOC_DIR(cmd)); + LogMan::Msg::EFmt("\tType : 0x{:x}", _IOC_TYPE(cmd)); + LogMan::Msg::EFmt("\tNR : 0x{:x}", _IOC_NR(cmd)); + LogMan::Msg::EFmt("\tSIZE : 0x{:x}", _IOC_SIZE(cmd)); + LogMan::Msg::AFmt("@@@@@@@@@@@@@@@@@@@@@@@@@"); +} + +namespace BasicHandler { + uint64_t BasicHandler(int fd, uint32_t cmd, uint32_t args) { + uint64_t Result = ::ioctl(fd, cmd, args); + SYSCALL_ERRNO(); } +} // namespace BasicHandler + +namespace DRM { + uint32_t AddAndRunHandler(int fd, uint32_t cmd, uint32_t args); + void AssignDeviceTypeToFD(int fd, const drm_version& Version); + + template + class LRUCacheFDCache { + public: + LRUCacheFDCache() { + // Set the last element to our handler + // This element will always be the last one + LRUCache[LRUSize] = std::make_pair(0, AddAndRunHandler); + } - namespace BasicHandler { - uint64_t BasicHandler(int fd, uint32_t cmd, uint32_t args) { - uint64_t Result = ::ioctl(fd, cmd, args); - SYSCALL_ERRNO(); + using HandlerType = uint32_t (*)(int fd, uint32_t cmd, uint32_t args); + void SetFDHandler(uint32_t FD, HandlerType Handler) { + FDToHandler[FD] = Handler; } - } - namespace DRM { - uint32_t AddAndRunHandler(int fd, uint32_t cmd, uint32_t args); - void AssignDeviceTypeToFD(int fd, drm_version const &Version); + void DuplicateFD(int fd, int NewFD) { + auto it = FDToHandler.find(fd); + if (it != FDToHandler.end()) { + FDToHandler[NewFD] = it->second; + } + } - template - class LRUCacheFDCache { - public: - LRUCacheFDCache() { - // Set the last element to our handler - // This element will always be the last one - LRUCache[LRUSize] = std::make_pair(0, AddAndRunHandler); + HandlerType FindHandler(uint32_t FD) { + HandlerType Handler {}; + for (size_t i = 0; i < LRUSize; ++i) { + auto& it = LRUCache[i]; + if (it.first == FD) { + if (i == 0) { + // If we are the first in the queue then just return it + return it.second; + } + Handler = it.second; + break; + } } - using HandlerType = uint32_t(*)(int fd, uint32_t cmd, uint32_t args); - void SetFDHandler(uint32_t FD, HandlerType Handler) { - FDToHandler[FD] = Handler; + if (Handler) { + AddToFront(FD, Handler); + return Handler; } + return LRUCache[LRUSize].second; + } - void DuplicateFD(int fd, int NewFD) { + uint32_t AddAndRunMapHandler(int fd, uint32_t cmd, uint32_t args) { + // Couldn't find in cache, check map + { auto it = FDToHandler.find(fd); if (it != FDToHandler.end()) { - FDToHandler[NewFD] = it->second; + // Found, add to the cache + AddToFront(fd, it->second); + return it->second(fd, cmd, args); } } - HandlerType FindHandler(uint32_t FD) { - HandlerType Handler{}; - for (size_t i = 0; i < LRUSize; ++i) { - auto &it = LRUCache[i]; - if (it.first == FD) { - if (i == 0) { - // If we are the first in the queue then just return it - return it.second; - } - Handler = it.second; - break; - } - } + // Wasn't found in map, query it + drm_version Host_Version {}; + Host_Version.name = reinterpret_cast(alloca(128)); + Host_Version.name_len = 128; + uint64_t Result = ioctl(fd, DRM_IOCTL_VERSION, &Host_Version); - if (Handler) { - AddToFront(FD, Handler); - return Handler; - } - return LRUCache[LRUSize].second; + // Add it to the map and double check that it was added + // Next time around when the ioctl is used then it will be added to cache + if (Result != -1) { + AssignDeviceTypeToFD(fd, Host_Version); } - uint32_t AddAndRunMapHandler(int fd, uint32_t cmd, uint32_t args) { - // Couldn't find in cache, check map - { - auto it = FDToHandler.find(fd); - if (it != FDToHandler.end()) { - // Found, add to the cache - AddToFront(fd, it->second); - return it->second(fd, cmd, args); - } - } - - // Wasn't found in map, query it - drm_version Host_Version{}; - Host_Version.name = reinterpret_cast(alloca(128)); - Host_Version.name_len = 128; - uint64_t Result = ioctl(fd, DRM_IOCTL_VERSION, &Host_Version); - - // Add it to the map and double check that it was added - // Next time around when the ioctl is used then it will be added to cache - if (Result != -1) { - AssignDeviceTypeToFD(fd, Host_Version); - } - - auto it = FDToHandler.find(fd); + auto it = FDToHandler.find(fd); - if (it == FDToHandler.end()) { - // We don't understand this DRM ioctl - return -EPERM; - } - Result = it->second(fd, cmd, args); - SYSCALL_ERRNO(); + if (it == FDToHandler.end()) { + // We don't understand this DRM ioctl + return -EPERM; } + Result = it->second(fd, cmd, args); + SYSCALL_ERRNO(); + } - private: - void AddToFront(uint32_t FD, HandlerType Handler) { - // Push the element to the front if we found one - // First copy all the other elements back one - // Ensuring the final element isn't written over - memmove(&LRUCache[1], &LRUCache[0], (LRUSize - 1) * sizeof(LRUCache[0])); - // Now set the first element to the one we just found - LRUCache[0] = std::make_pair(FD, Handler); - } - // With four elements total (3 + 1) then this is a single cacheline in size - std::pair LRUCache[LRUSize + 1]; - fextl::map FDToHandler; - }; + private: + void AddToFront(uint32_t FD, HandlerType Handler) { + // Push the element to the front if we found one + // First copy all the other elements back one + // Ensuring the final element isn't written over + memmove(&LRUCache[1], &LRUCache[0], (LRUSize - 1) * sizeof(LRUCache[0])); + // Now set the first element to the one we just found + LRUCache[0] = std::make_pair(FD, Handler); + } + // With four elements total (3 + 1) then this is a single cacheline in size + std::pair LRUCache[LRUSize + 1]; + fextl::map FDToHandler; + }; - static LRUCacheFDCache<3> FDToHandler; + static LRUCacheFDCache<3> FDToHandler; - uint32_t AddAndRunHandler(int fd, uint32_t cmd, uint32_t args) { - return FDToHandler.AddAndRunMapHandler(fd, cmd, args); - } + uint32_t AddAndRunHandler(int fd, uint32_t cmd, uint32_t args) { + return FDToHandler.AddAndRunMapHandler(fd, cmd, args); + } - void CheckAndAddFDDuplication(int fd, int NewFD) { - FDToHandler.DuplicateFD(fd, NewFD); - } + void CheckAndAddFDDuplication(int fd, int NewFD) { + FDToHandler.DuplicateFD(fd, NewFD); + } - uint32_t AMDGPU_Handler(int fd, uint32_t cmd, uint32_t args) { - switch (_IOC_NR(cmd)) { - case _IOC_NR(FEX_DRM_IOCTL_AMDGPU_GEM_METADATA): { - AMDGPU::fex_drm_amdgpu_gem_metadata *val = reinterpret_cast(args); - drm_amdgpu_gem_metadata Host_val = *val; - uint64_t Result = ioctl(fd, DRM_IOCTL_AMDGPU_GEM_METADATA, &Host_val); - if (Result != -1) { - *val = Host_val; - } - SYSCALL_ERRNO(); - break; - } + uint32_t AMDGPU_Handler(int fd, uint32_t cmd, uint32_t args) { + switch (_IOC_NR(cmd)) { + case _IOC_NR(FEX_DRM_IOCTL_AMDGPU_GEM_METADATA): { + AMDGPU::fex_drm_amdgpu_gem_metadata* val = reinterpret_cast(args); + drm_amdgpu_gem_metadata Host_val = *val; + uint64_t Result = ioctl(fd, DRM_IOCTL_AMDGPU_GEM_METADATA, &Host_val); + if (Result != -1) { + *val = Host_val; + } + SYSCALL_ERRNO(); + break; + } #define _BASIC_META(x) case _IOC_NR(x): #define _BASIC_META_VAR(x, args...) case _IOC_NR(x): #define _CUSTOM_META(name, ioctl_num) #define _CUSTOM_META_OFFSET(name, ioctl_num, offset) - // DRM + // DRM #include "LinuxSyscalls/x32/Ioctl/amdgpu_drm.inl" - { - uint64_t Result = ::ioctl(fd, cmd, args); - SYSCALL_ERRNO(); - break; - } - default: - UnhandledIoctl("AMDGPU", fd, cmd, args); - return -EPERM; - break; + { + uint64_t Result = ::ioctl(fd, cmd, args); + SYSCALL_ERRNO(); + break; } + default: + UnhandledIoctl("AMDGPU", fd, cmd, args); + return -EPERM; + break; + } #undef _BASIC_META #undef _BASIC_META_VAR #undef _CUSTOM_META #undef _CUSTOM_META_OFFSET - return -EPERM; - } + return -EPERM; + } - uint32_t RADEON_Handler(int fd, uint32_t cmd, uint32_t args) { - switch (_IOC_NR(cmd)) { - case _IOC_NR(FEX_DRM_IOCTL_RADEON_CP_INIT): { - RADEON::fex_drm_radeon_init_t *val = reinterpret_cast(args); - drm_radeon_init_t Host_val = *val; - uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_CP_INIT, &Host_val); - if (Result != -1) { - *val = Host_val; - } - SYSCALL_ERRNO(); - break; - } - case _IOC_NR(FEX_DRM_IOCTL_RADEON_CLEAR): { - RADEON::fex_drm_radeon_clear_t *val = reinterpret_cast(args); - drm_radeon_clear_t Host_val = *val; - uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_CLEAR, &Host_val); - if (Result != -1) { - *val = Host_val; - } - SYSCALL_ERRNO(); - break; - } - case _IOC_NR(FEX_DRM_IOCTL_RADEON_STIPPLE): { - RADEON::fex_drm_radeon_stipple_t *val = reinterpret_cast(args); - drm_radeon_stipple_t Host_val = *val; - uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_STIPPLE, &Host_val); - if (Result != -1) { - *val = Host_val; - } - SYSCALL_ERRNO(); - break; - } - case _IOC_NR(FEX_DRM_IOCTL_RADEON_TEXTURE): { - RADEON::fex_drm_radeon_texture_t *val = reinterpret_cast(args); - drm_radeon_texture_t Host_val = *val; - uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_TEXTURE, &Host_val); - if (Result != -1) { - *val = Host_val; - } - SYSCALL_ERRNO(); - break; - } - case _IOC_NR(FEX_DRM_IOCTL_RADEON_VERTEX2): { - RADEON::fex_drm_radeon_vertex2_t *val = reinterpret_cast(args); - drm_radeon_vertex2_t Host_val = *val; - uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_VERTEX2, &Host_val); - if (Result != -1) { - *val = Host_val; - } - SYSCALL_ERRNO(); - break; - } - case _IOC_NR(FEX_DRM_IOCTL_RADEON_CMDBUF): { - RADEON::fex_drm_radeon_cmd_buffer_t *val = reinterpret_cast(args); - drm_radeon_cmd_buffer_t Host_val = *val; - uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_CMDBUF, &Host_val); - if (Result != -1) { - *val = Host_val; - } - SYSCALL_ERRNO(); - break; - } - case _IOC_NR(FEX_DRM_IOCTL_RADEON_GETPARAM): { - RADEON::fex_drm_radeon_getparam_t *val = reinterpret_cast(args); - drm_radeon_getparam_t Host_val = *val; - uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_GETPARAM, &Host_val); - if (Result != -1) { - *val = Host_val; - } - SYSCALL_ERRNO(); - break; - } - case _IOC_NR(FEX_DRM_IOCTL_RADEON_ALLOC): { - RADEON::fex_drm_radeon_mem_alloc_t *val = reinterpret_cast(args); - drm_radeon_mem_alloc_t Host_val = *val; - uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_ALLOC, &Host_val); - if (Result != -1) { - *val = Host_val; - } - SYSCALL_ERRNO(); - break; - } - case _IOC_NR(FEX_DRM_IOCTL_RADEON_IRQ_EMIT): { - RADEON::fex_drm_radeon_irq_emit_t *val = reinterpret_cast(args); - drm_radeon_irq_emit_t Host_val = *val; - uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_IRQ_EMIT, &Host_val); - if (Result != -1) { - *val = Host_val; - } - SYSCALL_ERRNO(); - break; - } - case _IOC_NR(FEX_DRM_IOCTL_RADEON_SETPARAM): { - RADEON::fex_drm_radeon_setparam_t *val = reinterpret_cast(args); - drm_radeon_setparam_t Host_val = *val; - uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_SETPARAM, &Host_val); - if (Result != -1) { - *val = Host_val; - } - SYSCALL_ERRNO(); - break; - } - case _IOC_NR(FEX_DRM_IOCTL_RADEON_GEM_CREATE): { - RADEON::fex_drm_radeon_gem_create *val = reinterpret_cast(args); - drm_radeon_gem_create Host_val = *val; - uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_GEM_CREATE, &Host_val); - if (Result != -1) { - *val = Host_val; - } - SYSCALL_ERRNO(); - break; - } + uint32_t RADEON_Handler(int fd, uint32_t cmd, uint32_t args) { + switch (_IOC_NR(cmd)) { + case _IOC_NR(FEX_DRM_IOCTL_RADEON_CP_INIT): { + RADEON::fex_drm_radeon_init_t* val = reinterpret_cast(args); + drm_radeon_init_t Host_val = *val; + uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_CP_INIT, &Host_val); + if (Result != -1) { + *val = Host_val; + } + SYSCALL_ERRNO(); + break; + } + case _IOC_NR(FEX_DRM_IOCTL_RADEON_CLEAR): { + RADEON::fex_drm_radeon_clear_t* val = reinterpret_cast(args); + drm_radeon_clear_t Host_val = *val; + uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_CLEAR, &Host_val); + if (Result != -1) { + *val = Host_val; + } + SYSCALL_ERRNO(); + break; + } + case _IOC_NR(FEX_DRM_IOCTL_RADEON_STIPPLE): { + RADEON::fex_drm_radeon_stipple_t* val = reinterpret_cast(args); + drm_radeon_stipple_t Host_val = *val; + uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_STIPPLE, &Host_val); + if (Result != -1) { + *val = Host_val; + } + SYSCALL_ERRNO(); + break; + } + case _IOC_NR(FEX_DRM_IOCTL_RADEON_TEXTURE): { + RADEON::fex_drm_radeon_texture_t* val = reinterpret_cast(args); + drm_radeon_texture_t Host_val = *val; + uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_TEXTURE, &Host_val); + if (Result != -1) { + *val = Host_val; + } + SYSCALL_ERRNO(); + break; + } + case _IOC_NR(FEX_DRM_IOCTL_RADEON_VERTEX2): { + RADEON::fex_drm_radeon_vertex2_t* val = reinterpret_cast(args); + drm_radeon_vertex2_t Host_val = *val; + uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_VERTEX2, &Host_val); + if (Result != -1) { + *val = Host_val; + } + SYSCALL_ERRNO(); + break; + } + case _IOC_NR(FEX_DRM_IOCTL_RADEON_CMDBUF): { + RADEON::fex_drm_radeon_cmd_buffer_t* val = reinterpret_cast(args); + drm_radeon_cmd_buffer_t Host_val = *val; + uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_CMDBUF, &Host_val); + if (Result != -1) { + *val = Host_val; + } + SYSCALL_ERRNO(); + break; + } + case _IOC_NR(FEX_DRM_IOCTL_RADEON_GETPARAM): { + RADEON::fex_drm_radeon_getparam_t* val = reinterpret_cast(args); + drm_radeon_getparam_t Host_val = *val; + uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_GETPARAM, &Host_val); + if (Result != -1) { + *val = Host_val; + } + SYSCALL_ERRNO(); + break; + } + case _IOC_NR(FEX_DRM_IOCTL_RADEON_ALLOC): { + RADEON::fex_drm_radeon_mem_alloc_t* val = reinterpret_cast(args); + drm_radeon_mem_alloc_t Host_val = *val; + uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_ALLOC, &Host_val); + if (Result != -1) { + *val = Host_val; + } + SYSCALL_ERRNO(); + break; + } + case _IOC_NR(FEX_DRM_IOCTL_RADEON_IRQ_EMIT): { + RADEON::fex_drm_radeon_irq_emit_t* val = reinterpret_cast(args); + drm_radeon_irq_emit_t Host_val = *val; + uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_IRQ_EMIT, &Host_val); + if (Result != -1) { + *val = Host_val; + } + SYSCALL_ERRNO(); + break; + } + case _IOC_NR(FEX_DRM_IOCTL_RADEON_SETPARAM): { + RADEON::fex_drm_radeon_setparam_t* val = reinterpret_cast(args); + drm_radeon_setparam_t Host_val = *val; + uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_SETPARAM, &Host_val); + if (Result != -1) { + *val = Host_val; + } + SYSCALL_ERRNO(); + break; + } + case _IOC_NR(FEX_DRM_IOCTL_RADEON_GEM_CREATE): { + RADEON::fex_drm_radeon_gem_create* val = reinterpret_cast(args); + drm_radeon_gem_create Host_val = *val; + uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_GEM_CREATE, &Host_val); + if (Result != -1) { + *val = Host_val; + } + SYSCALL_ERRNO(); + break; + } #define _BASIC_META(x) case _IOC_NR(x): #define _BASIC_META_VAR(x, args...) case _IOC_NR(x): #define _CUSTOM_META(name, ioctl_num) #define _CUSTOM_META_OFFSET(name, ioctl_num, offset) - // DRM + // DRM #include "LinuxSyscalls/x32/Ioctl/radeon_drm.inl" - { - uint64_t Result = ::ioctl(fd, cmd, args); - SYSCALL_ERRNO(); - break; - } - default: - UnhandledIoctl("RADEON", fd, cmd, args); - return -EPERM; - break; + { + uint64_t Result = ::ioctl(fd, cmd, args); + SYSCALL_ERRNO(); + break; } + default: + UnhandledIoctl("RADEON", fd, cmd, args); + return -EPERM; + break; + } #undef _BASIC_META #undef _BASIC_META_VAR #undef _CUSTOM_META #undef _CUSTOM_META_OFFSET - return -EPERM; - } + return -EPERM; + } - uint32_t MSM_Handler(int fd, uint32_t cmd, uint32_t args) { - switch (_IOC_NR(cmd)) { - case _IOC_NR(FEX_DRM_IOCTL_MSM_WAIT_FENCE): { - MSM::fex_drm_msm_wait_fence *val = reinterpret_cast(args); - drm_msm_wait_fence Host_val = *val; - uint64_t Result = ::ioctl(fd, DRM_IOCTL_MSM_WAIT_FENCE, &Host_val); - if (Result != -1) { - *val = Host_val; - } - SYSCALL_ERRNO(); - break; - } + uint32_t MSM_Handler(int fd, uint32_t cmd, uint32_t args) { + switch (_IOC_NR(cmd)) { + case _IOC_NR(FEX_DRM_IOCTL_MSM_WAIT_FENCE): { + MSM::fex_drm_msm_wait_fence* val = reinterpret_cast(args); + drm_msm_wait_fence Host_val = *val; + uint64_t Result = ::ioctl(fd, DRM_IOCTL_MSM_WAIT_FENCE, &Host_val); + if (Result != -1) { + *val = Host_val; + } + SYSCALL_ERRNO(); + break; + } #define _BASIC_META(x) case _IOC_NR(x): #define _BASIC_META_VAR(x, args...) case _IOC_NR(x): #define _CUSTOM_META(name, ioctl_num) #define _CUSTOM_META_OFFSET(name, ioctl_num, offset) - // DRM + // DRM #include "LinuxSyscalls/x32/Ioctl/msm_drm.inl" - { - uint64_t Result = ::ioctl(fd, cmd, args); - SYSCALL_ERRNO(); - break; - } - default: - UnhandledIoctl("MSM", fd, cmd, args); - return -EPERM; - break; + { + uint64_t Result = ::ioctl(fd, cmd, args); + SYSCALL_ERRNO(); + break; } + default: + UnhandledIoctl("MSM", fd, cmd, args); + return -EPERM; + break; + } #undef _BASIC_META #undef _BASIC_META_VAR #undef _CUSTOM_META #undef _CUSTOM_META_OFFSET - return -EPERM; - } + return -EPERM; + } - uint32_t Nouveau_Handler(int fd, uint32_t cmd, uint32_t args) { - switch (_IOC_NR(cmd)) { + uint32_t Nouveau_Handler(int fd, uint32_t cmd, uint32_t args) { + switch (_IOC_NR(cmd)) { #define _BASIC_META(x) case _IOC_NR(x): #define _BASIC_META_VAR(x, args...) case _IOC_NR(x): #define _CUSTOM_META(name, ioctl_num) #define _CUSTOM_META_OFFSET(name, ioctl_num, offset) // DRM #include "LinuxSyscalls/x32/Ioctl/nouveau_drm.inl" - { - uint64_t Result = ::ioctl(fd, cmd, args); - SYSCALL_ERRNO(); - break; - } - default: - UnhandledIoctl("Nouveau", fd, cmd, args); - return -EPERM; - break; + { + uint64_t Result = ::ioctl(fd, cmd, args); + SYSCALL_ERRNO(); + break; } + default: + UnhandledIoctl("Nouveau", fd, cmd, args); + return -EPERM; + break; + } #undef _BASIC_META #undef _BASIC_META_VAR #undef _CUSTOM_META #undef _CUSTOM_META_OFFSET - return -EPERM; - } + return -EPERM; + } - uint32_t I915_Handler(int fd, uint32_t cmd, uint32_t args) { -#define SIMPLE(enum, type) case _IOC_NR(FEX_##enum): { \ - I915::fex_##type *guest = reinterpret_cast(args); \ - type host = *guest; \ - uint64_t Result = ::ioctl(fd, enum, &host); \ - if (Result != -1) { \ - *guest = host; \ - } \ - SYSCALL_ERRNO(); \ - break; \ - } + uint32_t I915_Handler(int fd, uint32_t cmd, uint32_t args) { +#define SIMPLE(enum, type) \ + case _IOC_NR(FEX_##enum): { \ + I915::fex_##type* guest = reinterpret_cast(args); \ + type host = *guest; \ + uint64_t Result = ::ioctl(fd, enum, &host); \ + if (Result != -1) { \ + *guest = host; \ + } \ + SYSCALL_ERRNO(); \ + break; \ + } - switch (_IOC_NR(cmd)) { - SIMPLE(DRM_IOCTL_I915_BATCHBUFFER, drm_i915_batchbuffer_t) - SIMPLE(DRM_IOCTL_I915_IRQ_EMIT, drm_i915_irq_emit_t) - SIMPLE(DRM_IOCTL_I915_GETPARAM, drm_i915_getparam_t) - SIMPLE(DRM_IOCTL_I915_ALLOC, drm_i915_mem_alloc_t) - SIMPLE(DRM_IOCTL_I915_CMDBUFFER, drm_i915_cmdbuffer_t) + switch (_IOC_NR(cmd)) { + SIMPLE(DRM_IOCTL_I915_BATCHBUFFER, drm_i915_batchbuffer_t) + SIMPLE(DRM_IOCTL_I915_IRQ_EMIT, drm_i915_irq_emit_t) + SIMPLE(DRM_IOCTL_I915_GETPARAM, drm_i915_getparam_t) + SIMPLE(DRM_IOCTL_I915_ALLOC, drm_i915_mem_alloc_t) + SIMPLE(DRM_IOCTL_I915_CMDBUFFER, drm_i915_cmdbuffer_t) #define _BASIC_META(x) case _IOC_NR(x): #define _BASIC_META_VAR(x, args...) case _IOC_NR(x): @@ -403,342 +404,333 @@ namespace FEX::HLE::x32 { #define _CUSTOM_META_OFFSET(name, ioctl_num, offset) // DRM #include "LinuxSyscalls/x32/Ioctl/i915_drm.inl" - { - uint64_t Result = ::ioctl(fd, cmd, args); - SYSCALL_ERRNO(); - break; - } - default: - UnhandledIoctl("I915", fd, cmd, args); - return -EPERM; - break; + { + uint64_t Result = ::ioctl(fd, cmd, args); + SYSCALL_ERRNO(); + break; } + default: + UnhandledIoctl("I915", fd, cmd, args); + return -EPERM; + break; + } #undef SIMPLE #undef _BASIC_META #undef _BASIC_META_VAR #undef _CUSTOM_META #undef _CUSTOM_META_OFFSET - return -EPERM; - } + return -EPERM; + } - uint32_t Panfrost_Handler(int fd, uint32_t cmd, uint32_t args) { - switch (_IOC_NR(cmd)) { + uint32_t Panfrost_Handler(int fd, uint32_t cmd, uint32_t args) { + switch (_IOC_NR(cmd)) { #define _BASIC_META(x) case _IOC_NR(x): #define _BASIC_META_VAR(x, args...) case _IOC_NR(x): #define _CUSTOM_META(name, ioctl_num) #define _CUSTOM_META_OFFSET(name, ioctl_num, offset) // DRM #include "LinuxSyscalls/x32/Ioctl/panfrost_drm.inl" - { - uint64_t Result = ::ioctl(fd, cmd, args); - SYSCALL_ERRNO(); - break; - } - default: - UnhandledIoctl("Panfrost", fd, cmd, args); - return -EPERM; - break; + { + uint64_t Result = ::ioctl(fd, cmd, args); + SYSCALL_ERRNO(); + break; } + default: + UnhandledIoctl("Panfrost", fd, cmd, args); + return -EPERM; + break; + } #undef _BASIC_META #undef _BASIC_META_VAR #undef _CUSTOM_META #undef _CUSTOM_META_OFFSET - return -EPERM; - } + return -EPERM; + } - uint32_t Lima_Handler(int fd, uint32_t cmd, uint32_t args) { - switch (_IOC_NR(cmd)) { + uint32_t Lima_Handler(int fd, uint32_t cmd, uint32_t args) { + switch (_IOC_NR(cmd)) { #define _BASIC_META(x) case _IOC_NR(x): #define _BASIC_META_VAR(x, args...) case _IOC_NR(x): #define _CUSTOM_META(name, ioctl_num) #define _CUSTOM_META_OFFSET(name, ioctl_num, offset) // DRM #include "LinuxSyscalls/x32/Ioctl/lima_drm.inl" - { - uint64_t Result = ::ioctl(fd, cmd, args); - SYSCALL_ERRNO(); - break; - } - default: - UnhandledIoctl("Lima", fd, cmd, args); - return -EPERM; - break; + { + uint64_t Result = ::ioctl(fd, cmd, args); + SYSCALL_ERRNO(); + break; } + default: + UnhandledIoctl("Lima", fd, cmd, args); + return -EPERM; + break; + } #undef _BASIC_META #undef _BASIC_META_VAR #undef _CUSTOM_META #undef _CUSTOM_META_OFFSET - return -EPERM; - } + return -EPERM; + } - uint32_t VC4_Handler(int fd, uint32_t cmd, uint32_t args) { - switch (_IOC_NR(cmd)) { - case _IOC_NR(FEX_DRM_IOCTL_VC4_PERFMON_GET_VALUES): { - FEX::HLE::x32::VC4::fex_drm_vc4_perfmon_get_values *val = reinterpret_cast(args); - drm_vc4_perfmon_get_values Host_val = *val; - uint64_t Result = ::ioctl(fd, DRM_IOCTL_VC4_PERFMON_GET_VALUES, &Host_val); - if (Result != -1) { - *val = Host_val; - } - SYSCALL_ERRNO(); - break; - } + uint32_t VC4_Handler(int fd, uint32_t cmd, uint32_t args) { + switch (_IOC_NR(cmd)) { + case _IOC_NR(FEX_DRM_IOCTL_VC4_PERFMON_GET_VALUES): { + FEX::HLE::x32::VC4::fex_drm_vc4_perfmon_get_values* val = reinterpret_cast(args); + drm_vc4_perfmon_get_values Host_val = *val; + uint64_t Result = ::ioctl(fd, DRM_IOCTL_VC4_PERFMON_GET_VALUES, &Host_val); + if (Result != -1) { + *val = Host_val; + } + SYSCALL_ERRNO(); + break; + } #define _BASIC_META(x) case _IOC_NR(x): #define _BASIC_META_VAR(x, args...) case _IOC_NR(x): #define _CUSTOM_META(name, ioctl_num) #define _CUSTOM_META_OFFSET(name, ioctl_num, offset) - // DRM + // DRM #include "LinuxSyscalls/x32/Ioctl/vc4_drm.inl" - { - uint64_t Result = ::ioctl(fd, cmd, args); - SYSCALL_ERRNO(); - break; - } - default: - UnhandledIoctl("VC4", fd, cmd, args); - return -EPERM; - break; + { + uint64_t Result = ::ioctl(fd, cmd, args); + SYSCALL_ERRNO(); + break; } + default: + UnhandledIoctl("VC4", fd, cmd, args); + return -EPERM; + break; + } #undef _BASIC_META #undef _BASIC_META_VAR #undef _CUSTOM_META #undef _CUSTOM_META_OFFSET - return -EPERM; - } + return -EPERM; + } - uint32_t V3D_Handler(int fd, uint32_t cmd, uint32_t args) { - switch (_IOC_NR(cmd)) { - case _IOC_NR(FEX_DRM_IOCTL_V3D_SUBMIT_CSD): { - FEX::HLE::x32::V3D::fex_drm_v3d_submit_csd *val = reinterpret_cast(args); - drm_v3d_submit_csd Host_val = FEX::HLE::x32::V3D::fex_drm_v3d_submit_csd::SafeConvertToHost(val, _IOC_SIZE(cmd)); - uint64_t Result = ::ioctl(fd, DRM_IOCTL_V3D_SUBMIT_CSD, &Host_val); - if (Result != -1) { - FEX::HLE::x32::V3D::fex_drm_v3d_submit_csd::SafeConvertToGuest(val, Host_val, _IOC_SIZE(cmd)); - } - SYSCALL_ERRNO(); - break; - } + uint32_t V3D_Handler(int fd, uint32_t cmd, uint32_t args) { + switch (_IOC_NR(cmd)) { + case _IOC_NR(FEX_DRM_IOCTL_V3D_SUBMIT_CSD): { + FEX::HLE::x32::V3D::fex_drm_v3d_submit_csd* val = reinterpret_cast(args); + drm_v3d_submit_csd Host_val = FEX::HLE::x32::V3D::fex_drm_v3d_submit_csd::SafeConvertToHost(val, _IOC_SIZE(cmd)); + uint64_t Result = ::ioctl(fd, DRM_IOCTL_V3D_SUBMIT_CSD, &Host_val); + if (Result != -1) { + FEX::HLE::x32::V3D::fex_drm_v3d_submit_csd::SafeConvertToGuest(val, Host_val, _IOC_SIZE(cmd)); + } + SYSCALL_ERRNO(); + break; + } #define _BASIC_META(x) case _IOC_NR(x): #define _BASIC_META_VAR(x, args...) case _IOC_NR(x): #define _CUSTOM_META(name, ioctl_num) #define _CUSTOM_META_OFFSET(name, ioctl_num, offset) - // DRM + // DRM #include "LinuxSyscalls/x32/Ioctl/v3d_drm.inl" - { - uint64_t Result = ::ioctl(fd, cmd, args); - SYSCALL_ERRNO(); - break; - } - default: - UnhandledIoctl("V3D", fd, cmd, args); - return -EPERM; - break; + { + uint64_t Result = ::ioctl(fd, cmd, args); + SYSCALL_ERRNO(); + break; } + default: + UnhandledIoctl("V3D", fd, cmd, args); + return -EPERM; + break; + } #undef _BASIC_META #undef _BASIC_META_VAR #undef _CUSTOM_META #undef _CUSTOM_META_OFFSET - return -EPERM; - } + return -EPERM; + } - uint32_t Virtio_Handler(int fd, uint32_t cmd, uint32_t args) { - switch (_IOC_NR(cmd)) { + uint32_t Virtio_Handler(int fd, uint32_t cmd, uint32_t args) { + switch (_IOC_NR(cmd)) { #define _BASIC_META(x) case _IOC_NR(x): #define _BASIC_META_VAR(x, args...) case _IOC_NR(x): #define _CUSTOM_META(name, ioctl_num) #define _CUSTOM_META_OFFSET(name, ioctl_num, offset) // DRM #include "LinuxSyscalls/x32/Ioctl/virtio_drm.inl" - { - uint64_t Result = ::ioctl(fd, cmd, args); - SYSCALL_ERRNO(); - break; - } - default: - UnhandledIoctl("Virtio", fd, cmd, args); - return -EPERM; - break; + { + uint64_t Result = ::ioctl(fd, cmd, args); + SYSCALL_ERRNO(); + break; } + default: + UnhandledIoctl("Virtio", fd, cmd, args); + return -EPERM; + break; + } #undef _BASIC_META #undef _BASIC_META_VAR #undef _CUSTOM_META #undef _CUSTOM_META_OFFSET - return -EPERM; - } + return -EPERM; + } - uint32_t Default_Handler(int fd, uint32_t cmd, uint32_t args) { - // Default handler assumes everything is correct and doesn't need to do any work. - uint64_t Result = ::ioctl(fd, cmd, args); - SYSCALL_ERRNO(); - } + uint32_t Default_Handler(int fd, uint32_t cmd, uint32_t args) { + // Default handler assumes everything is correct and doesn't need to do any work. + uint64_t Result = ::ioctl(fd, cmd, args); + SYSCALL_ERRNO(); + } - void AssignDeviceTypeToFD(int fd, drm_version const &Version) { - if (Version.name) { - if (strncmp(Version.name, "amdgpu", Version.name_len) == 0) { - FDToHandler.SetFDHandler(fd, AMDGPU_Handler); - } - else if (strncmp(Version.name, "radeon", Version.name_len) == 0) { - FDToHandler.SetFDHandler(fd, RADEON_Handler); - } - else if (strncmp(Version.name, "msm", Version.name_len) == 0) { - FDToHandler.SetFDHandler(fd, MSM_Handler); - } - else if (strncmp(Version.name, "nouveau", Version.name_len) == 0) { - FDToHandler.SetFDHandler(fd, Nouveau_Handler); - } - else if (strncmp(Version.name, "i915", Version.name_len) == 0) { - FDToHandler.SetFDHandler(fd, I915_Handler); - } - else if (strncmp(Version.name, "panfrost", Version.name_len) == 0) { - FDToHandler.SetFDHandler(fd, Panfrost_Handler); - } - else if (strncmp(Version.name, "lima", Version.name_len) == 0) { - FDToHandler.SetFDHandler(fd, Lima_Handler); - } - else if (strncmp(Version.name, "vc4", Version.name_len) == 0) { - FDToHandler.SetFDHandler(fd, VC4_Handler); - } - else if (strncmp(Version.name, "v3d", Version.name_len) == 0) { - FDToHandler.SetFDHandler(fd, V3D_Handler); - } - else if (strncmp(Version.name, "virtio_gpu", Version.name_len) == 0) { - FDToHandler.SetFDHandler(fd, Virtio_Handler); - } - else { - LogMan::Msg::IFmt("Unknown DRM device: '{}'. Using default passthrough", Version.name); - FDToHandler.SetFDHandler(fd, Default_Handler); - } + void AssignDeviceTypeToFD(int fd, const drm_version& Version) { + if (Version.name) { + if (strncmp(Version.name, "amdgpu", Version.name_len) == 0) { + FDToHandler.SetFDHandler(fd, AMDGPU_Handler); + } else if (strncmp(Version.name, "radeon", Version.name_len) == 0) { + FDToHandler.SetFDHandler(fd, RADEON_Handler); + } else if (strncmp(Version.name, "msm", Version.name_len) == 0) { + FDToHandler.SetFDHandler(fd, MSM_Handler); + } else if (strncmp(Version.name, "nouveau", Version.name_len) == 0) { + FDToHandler.SetFDHandler(fd, Nouveau_Handler); + } else if (strncmp(Version.name, "i915", Version.name_len) == 0) { + FDToHandler.SetFDHandler(fd, I915_Handler); + } else if (strncmp(Version.name, "panfrost", Version.name_len) == 0) { + FDToHandler.SetFDHandler(fd, Panfrost_Handler); + } else if (strncmp(Version.name, "lima", Version.name_len) == 0) { + FDToHandler.SetFDHandler(fd, Lima_Handler); + } else if (strncmp(Version.name, "vc4", Version.name_len) == 0) { + FDToHandler.SetFDHandler(fd, VC4_Handler); + } else if (strncmp(Version.name, "v3d", Version.name_len) == 0) { + FDToHandler.SetFDHandler(fd, V3D_Handler); + } else if (strncmp(Version.name, "virtio_gpu", Version.name_len) == 0) { + FDToHandler.SetFDHandler(fd, Virtio_Handler); + } else { + LogMan::Msg::IFmt("Unknown DRM device: '{}'. Using default passthrough", Version.name); + FDToHandler.SetFDHandler(fd, Default_Handler); } } + } - uint32_t Handler(int fd, uint32_t cmd, uint32_t args) { -#define SIMPLE(enum, type) case _IOC_NR(FEX_##enum): { \ - DRM::fex_##type *guest = reinterpret_cast(args); \ - type host = *guest; \ - uint64_t Result = ::ioctl(fd, enum, &host); \ - if (Result != -1) { \ - *guest = host; \ - } \ - SYSCALL_ERRNO(); \ - break; \ - } + uint32_t Handler(int fd, uint32_t cmd, uint32_t args) { +#define SIMPLE(enum, type) \ + case _IOC_NR(FEX_##enum): { \ + DRM::fex_##type* guest = reinterpret_cast(args); \ + type host = *guest; \ + uint64_t Result = ::ioctl(fd, enum, &host); \ + if (Result != -1) { \ + *guest = host; \ + } \ + SYSCALL_ERRNO(); \ + break; \ + } - switch (_IOC_NR(cmd)) { - case _IOC_NR(FEX_DRM_IOCTL_VERSION): { - fex_drm_version *version = reinterpret_cast(args); - drm_version Host_Version = *version; - uint64_t Result = ::ioctl(fd, DRM_IOCTL_VERSION, &Host_Version); - if (Result != -1) { - *version = Host_Version; - AssignDeviceTypeToFD(fd, Host_Version); - } - SYSCALL_ERRNO(); - break; - } + switch (_IOC_NR(cmd)) { + case _IOC_NR(FEX_DRM_IOCTL_VERSION): { + fex_drm_version* version = reinterpret_cast(args); + drm_version Host_Version = *version; + uint64_t Result = ::ioctl(fd, DRM_IOCTL_VERSION, &Host_Version); + if (Result != -1) { + *version = Host_Version; + AssignDeviceTypeToFD(fd, Host_Version); + } + SYSCALL_ERRNO(); + break; + } - SIMPLE(DRM_IOCTL_GET_UNIQUE, drm_unique) - SIMPLE(DRM_IOCTL_GET_CLIENT, drm_client) - SIMPLE(DRM_IOCTL_GET_STATS, drm_stats) - SIMPLE(DRM_IOCTL_SET_UNIQUE, drm_unique) - - SIMPLE(DRM_IOCTL_ADD_MAP, drm_map) - SIMPLE(DRM_IOCTL_ADD_BUFS, drm_buf_desc) - SIMPLE(DRM_IOCTL_MARK_BUFS, drm_buf_desc) - SIMPLE(DRM_IOCTL_INFO_BUFS, drm_buf_info) - SIMPLE(DRM_IOCTL_MAP_BUFS, drm_buf_map) - SIMPLE(DRM_IOCTL_FREE_BUFS, drm_buf_free) - SIMPLE(DRM_IOCTL_RM_MAP, drm_map) - SIMPLE(DRM_IOCTL_SET_SAREA_CTX, drm_ctx_priv_map) - SIMPLE(DRM_IOCTL_GET_SAREA_CTX, drm_ctx_priv_map) - - SIMPLE(DRM_IOCTL_RES_CTX, drm_ctx_res) - SIMPLE(DRM_IOCTL_DMA, drm_dma) - SIMPLE(DRM_IOCTL_SG_ALLOC, drm_scatter_gather) - SIMPLE(DRM_IOCTL_SG_FREE, drm_scatter_gather) - SIMPLE(DRM_IOCTL_UPDATE_DRAW, drm_update_draw) - SIMPLE(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_get_plane_res) - SIMPLE(DRM_IOCTL_MODE_ADDFB2, drm_mode_fb_cmd2) - SIMPLE(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties) - SIMPLE(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property) - SIMPLE(DRM_IOCTL_MODE_GETFB2, drm_mode_fb_cmd2) - - case _IOC_NR(FEX_DRM_IOCTL_WAIT_VBLANK): { - fex_drm_wait_vblank *guest = reinterpret_cast(args); - drm_wait_vblank Host{}; - Host.request = guest->request; - uint64_t Result = ::ioctl(fd, FEX_DRM_IOCTL_WAIT_VBLANK, &Host); - if (Result != -1) { - guest->reply = Host.reply; - } - SYSCALL_ERRNO(); - break; - } - // Passthrough + SIMPLE(DRM_IOCTL_GET_UNIQUE, drm_unique) + SIMPLE(DRM_IOCTL_GET_CLIENT, drm_client) + SIMPLE(DRM_IOCTL_GET_STATS, drm_stats) + SIMPLE(DRM_IOCTL_SET_UNIQUE, drm_unique) + + SIMPLE(DRM_IOCTL_ADD_MAP, drm_map) + SIMPLE(DRM_IOCTL_ADD_BUFS, drm_buf_desc) + SIMPLE(DRM_IOCTL_MARK_BUFS, drm_buf_desc) + SIMPLE(DRM_IOCTL_INFO_BUFS, drm_buf_info) + SIMPLE(DRM_IOCTL_MAP_BUFS, drm_buf_map) + SIMPLE(DRM_IOCTL_FREE_BUFS, drm_buf_free) + SIMPLE(DRM_IOCTL_RM_MAP, drm_map) + SIMPLE(DRM_IOCTL_SET_SAREA_CTX, drm_ctx_priv_map) + SIMPLE(DRM_IOCTL_GET_SAREA_CTX, drm_ctx_priv_map) + + SIMPLE(DRM_IOCTL_RES_CTX, drm_ctx_res) + SIMPLE(DRM_IOCTL_DMA, drm_dma) + SIMPLE(DRM_IOCTL_SG_ALLOC, drm_scatter_gather) + SIMPLE(DRM_IOCTL_SG_FREE, drm_scatter_gather) + SIMPLE(DRM_IOCTL_UPDATE_DRAW, drm_update_draw) + SIMPLE(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_get_plane_res) + SIMPLE(DRM_IOCTL_MODE_ADDFB2, drm_mode_fb_cmd2) + SIMPLE(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties) + SIMPLE(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property) + SIMPLE(DRM_IOCTL_MODE_GETFB2, drm_mode_fb_cmd2) + + case _IOC_NR(FEX_DRM_IOCTL_WAIT_VBLANK): { + fex_drm_wait_vblank* guest = reinterpret_cast(args); + drm_wait_vblank Host {}; + Host.request = guest->request; + uint64_t Result = ::ioctl(fd, FEX_DRM_IOCTL_WAIT_VBLANK, &Host); + if (Result != -1) { + guest->reply = Host.reply; + } + SYSCALL_ERRNO(); + break; + } + // Passthrough #define _BASIC_META(x) case _IOC_NR(x): #define _BASIC_META_VAR(x, args...) case _IOC_NR(x): #define _CUSTOM_META(name, ioctl_num) #define _CUSTOM_META_OFFSET(name, ioctl_num, offset) - // DRM + // DRM #include "LinuxSyscalls/x32/Ioctl/drm.inl" - { - uint64_t Result = ::ioctl(fd, cmd, args); - SYSCALL_ERRNO(); - break; - } - - case DRM_COMMAND_BASE ... (DRM_COMMAND_END - 1): { - // This is the space of the DRM device commands - auto it = FDToHandler.FindHandler(fd); - return it(fd, cmd, args); + { + uint64_t Result = ::ioctl(fd, cmd, args); + SYSCALL_ERRNO(); break; - } - default: - UnhandledIoctl("DRM", fd, cmd, args); - return -EPERM; - break; } + + case DRM_COMMAND_BASE ...(DRM_COMMAND_END - 1): { + // This is the space of the DRM device commands + auto it = FDToHandler.FindHandler(fd); + return it(fd, cmd, args); + break; + } + default: + UnhandledIoctl("DRM", fd, cmd, args); + return -EPERM; + break; + } #undef SIMPLE #undef _BASIC_META #undef _BASIC_META_VAR #undef _CUSTOM_META #undef _CUSTOM_META_OFFSET - return -EPERM; - } + return -EPERM; } +} // namespace DRM - struct IoctlHandler { - uint32_t Command; - std::function Handler; - }; +struct IoctlHandler { + uint32_t Command; + std::function Handler; +}; - static fextl::vector> Handlers; +static fextl::vector> Handlers; - void InitializeStaticIoctlHandlers() { - using namespace DRM; - using namespace sockios; +void InitializeStaticIoctlHandlers() { + using namespace DRM; + using namespace sockios; - const fextl::vector LocalHandlers = {{ -#define _BASIC_META(x) IoctlHandler{_IOC_TYPE(x), FEX::HLE::x32::BasicHandler::BasicHandler}, -#define _BASIC_META_VAR(x, args...) IoctlHandler{_IOC_TYPE(x(args)), FEX::HLE::x32::BasicHandler::BasicHandler}, -#define _CUSTOM_META(name, ioctl_num) IoctlHandler{_IOC_TYPE(FEX_##name), FEX::HLE::x32::BasicHandler::BasicHandler}, -#define _CUSTOM_META_OFFSET(name, ioctl_num, offset) IoctlHandler{_IOC_TYPE(FEX_##name), FEX::HLE::x32::BasicHandler::BasicHandler}, + const fextl::vector LocalHandlers = {{ +#define _BASIC_META(x) IoctlHandler {_IOC_TYPE(x), FEX::HLE::x32::BasicHandler::BasicHandler}, +#define _BASIC_META_VAR(x, args...) IoctlHandler {_IOC_TYPE(x(args)), FEX::HLE::x32::BasicHandler::BasicHandler}, +#define _CUSTOM_META(name, ioctl_num) IoctlHandler {_IOC_TYPE(FEX_##name), FEX::HLE::x32::BasicHandler::BasicHandler}, +#define _CUSTOM_META_OFFSET(name, ioctl_num, offset) IoctlHandler {_IOC_TYPE(FEX_##name), FEX::HLE::x32::BasicHandler::BasicHandler}, - // Asound + // Asound #include "LinuxSyscalls/x32/Ioctl/asound.inl" - // Streams + // Streams #include "LinuxSyscalls/x32/Ioctl/streams.inl" - // USB Dev + // USB Dev #include "LinuxSyscalls/x32/Ioctl/usbdev.inl" - // Input + // Input #include "LinuxSyscalls/x32/Ioctl/input.inl" - // SOCKIOS + // SOCKIOS #include "LinuxSyscalls/x32/Ioctl/sockios.inl" - // Joystick + // Joystick #include "LinuxSyscalls/x32/Ioctl/joystick.inl" - // Wireless + // Wireless #include "LinuxSyscalls/x32/Ioctl/wireless.inl" #undef _BASIC_META @@ -746,11 +738,11 @@ namespace FEX::HLE::x32 { #undef _CUSTOM_META #undef _CUSTOM_META_OFFSET -#define _BASIC_META(x) IoctlHandler{_IOC_TYPE(x), FEX::HLE::x32::DRM::Handler}, -#define _BASIC_META_VAR(x, args...) IoctlHandler{_IOC_TYPE(x(args)), FEX::HLE::x32::DRM::Handler}, -#define _CUSTOM_META(name, ioctl_num) IoctlHandler{_IOC_TYPE(FEX_##name), FEX::HLE::x32::DRM::Handler}, -#define _CUSTOM_META_OFFSET(name, ioctl_num, offset) IoctlHandler{_IOC_TYPE(FEX_##name), FEX::HLE::x32::DRM::Handler}, - // DRM +#define _BASIC_META(x) IoctlHandler {_IOC_TYPE(x), FEX::HLE::x32::DRM::Handler}, +#define _BASIC_META_VAR(x, args...) IoctlHandler {_IOC_TYPE(x(args)), FEX::HLE::x32::DRM::Handler}, +#define _CUSTOM_META(name, ioctl_num) IoctlHandler {_IOC_TYPE(FEX_##name), FEX::HLE::x32::DRM::Handler}, +#define _CUSTOM_META_OFFSET(name, ioctl_num, offset) IoctlHandler {_IOC_TYPE(FEX_##name), FEX::HLE::x32::DRM::Handler}, + // DRM #include "LinuxSyscalls/x32/Ioctl/drm.inl" #include "LinuxSyscalls/x32/Ioctl/amdgpu_drm.inl" @@ -768,21 +760,20 @@ namespace FEX::HLE::x32 { #undef _BASIC_META_VAR #undef _CUSTOM_META #undef _CUSTOM_META_OFFSET - }}; - - Handlers.assign(1U << _IOC_TYPEBITS, FEX::HLE::x32::BasicHandler::BasicHandler); + }}; - for (auto &Arg : LocalHandlers) { - Handlers[Arg.Command] = Arg.Handler; - } - } + Handlers.assign(1U << _IOC_TYPEBITS, FEX::HLE::x32::BasicHandler::BasicHandler); - uint32_t ioctl32(FEXCore::Core::CpuStateFrame *Frame, int fd, uint32_t request, uint32_t args) { - return Handlers[_IOC_TYPE(request)](fd, request, args); + for (auto& Arg : LocalHandlers) { + Handlers[Arg.Command] = Arg.Handler; } +} - void CheckAndAddFDDuplication(int fd, int NewFD) { - DRM::CheckAndAddFDDuplication(fd, NewFD); - } +uint32_t ioctl32(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t request, uint32_t args) { + return Handlers[_IOC_TYPE(request)](fd, request, args); } +void CheckAndAddFDDuplication(int fd, int NewFD) { + DRM::CheckAndAddFDDuplication(fd, NewFD); +} +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Memory.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Memory.cpp index 36973a0538..49503bf01d 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Memory.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Memory.cpp @@ -21,123 +21,124 @@ tags: LinuxSyscalls|syscalls-x86-32 namespace FEX::HLE::x32 { - void *x32SyscallHandler::GuestMmap(FEXCore::Core::InternalThreadState *Thread, void *addr, size_t length, int prot, int flags, int fd, off_t offset) { - LOGMAN_THROW_AA_FMT((length >> 32) == 0, "values must fit to 32 bits"); +void* x32SyscallHandler::GuestMmap(FEXCore::Core::InternalThreadState* Thread, void* addr, size_t length, int prot, int flags, int fd, off_t offset) { + LOGMAN_THROW_AA_FMT((length >> 32) == 0, "values must fit to 32 bits"); - auto Result = (uint64_t)GetAllocator()->Mmap((void*)addr, length, prot, flags, fd, offset); + auto Result = (uint64_t)GetAllocator()->Mmap((void*)addr, length, prot, flags, fd, offset); - LOGMAN_THROW_AA_FMT((Result >> 32) == 0|| (Result >> 32) == 0xFFFFFFFF, "values must fit to 32 bits"); + LOGMAN_THROW_AA_FMT((Result >> 32) == 0 || (Result >> 32) == 0xFFFFFFFF, "values must fit to 32 bits"); - if (!FEX::HLE::HasSyscallError(Result)) { - FEX::HLE::_SyscallHandler->TrackMmap(Thread, Result, length, prot, flags, fd, offset); - return (void *)Result; - } else { - errno = -Result; - return MAP_FAILED; - } + if (!FEX::HLE::HasSyscallError(Result)) { + FEX::HLE::_SyscallHandler->TrackMmap(Thread, Result, length, prot, flags, fd, offset); + return (void*)Result; + } else { + errno = -Result; + return MAP_FAILED; } +} - int x32SyscallHandler::GuestMunmap(FEXCore::Core::InternalThreadState *Thread, void *addr, uint64_t length) { - LOGMAN_THROW_AA_FMT((uintptr_t(addr) >> 32) == 0, "values must fit to 32 bits"); - LOGMAN_THROW_AA_FMT((length >> 32) == 0, "values must fit to 32 bits"); +int x32SyscallHandler::GuestMunmap(FEXCore::Core::InternalThreadState* Thread, void* addr, uint64_t length) { + LOGMAN_THROW_AA_FMT((uintptr_t(addr) >> 32) == 0, "values must fit to 32 bits"); + LOGMAN_THROW_AA_FMT((length >> 32) == 0, "values must fit to 32 bits"); - auto Result = GetAllocator()->Munmap(addr, length); + auto Result = GetAllocator()->Munmap(addr, length); - if (Result == 0) { - FEX::HLE::_SyscallHandler->TrackMunmap(Thread, (uintptr_t)addr, length); - return Result; - } else { - errno = -Result; - return -1; - } + if (Result == 0) { + FEX::HLE::_SyscallHandler->TrackMunmap(Thread, (uintptr_t)addr, length); + return Result; + } else { + errno = -Result; + return -1; } +} + +void RegisterMemory(FEX::HLE::SyscallHandler* Handler) { + struct old_mmap_struct { + uint32_t addr; + uint32_t len; + uint32_t prot; + uint32_t flags; + uint32_t fd; + uint32_t offset; + }; + REGISTER_SYSCALL_IMPL_X32(mmap, [](FEXCore::Core::CpuStateFrame* Frame, const old_mmap_struct* arg) -> uint64_t { + uint64_t Result = (uint64_t) static_cast(FEX::HLE::_SyscallHandler) + ->GuestMmap(Frame->Thread, reinterpret_cast(arg->addr), arg->len, arg->prot, arg->flags, arg->fd, arg->offset); + + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32( + mmap2, [](FEXCore::Core::CpuStateFrame* Frame, uint32_t addr, uint32_t length, int prot, int flags, int fd, uint32_t pgoffset) -> uint64_t { + uint64_t Result = (uint64_t) static_cast(FEX::HLE::_SyscallHandler) + ->GuestMmap(Frame->Thread, reinterpret_cast(addr), length, prot, flags, fd, (uint64_t)pgoffset * 0x1000); + + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(munmap, [](FEXCore::Core::CpuStateFrame* Frame, void* addr, size_t length) -> uint64_t { + uint64_t Result = + (uint64_t) static_cast(FEX::HLE::_SyscallHandler)->GuestMunmap(Frame->Thread, addr, length); + + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(mprotect, [](FEXCore::Core::CpuStateFrame* Frame, void* addr, uint32_t len, int prot) -> uint64_t { + uint64_t Result = ::mprotect(addr, len, prot); + if (Result != -1) { + FEX::HLE::_SyscallHandler->TrackMprotect(Frame->Thread, (uintptr_t)addr, len, prot); + } - void RegisterMemory(FEX::HLE::SyscallHandler *Handler) { - struct old_mmap_struct { - uint32_t addr; - uint32_t len; - uint32_t prot; - uint32_t flags; - uint32_t fd; - uint32_t offset; - }; - REGISTER_SYSCALL_IMPL_X32(mmap, [](FEXCore::Core::CpuStateFrame *Frame, old_mmap_struct const* arg) -> uint64_t { - uint64_t Result = (uint64_t)static_cast(FEX::HLE::_SyscallHandler)-> - GuestMmap(Frame->Thread, reinterpret_cast(arg->addr), arg->len, arg->prot, arg->flags, arg->fd, arg->offset); - - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(mmap2, [](FEXCore::Core::CpuStateFrame *Frame, uint32_t addr, uint32_t length, int prot, int flags, int fd, uint32_t pgoffset) -> uint64_t { - uint64_t Result = (uint64_t)static_cast(FEX::HLE::_SyscallHandler)-> - GuestMmap(Frame->Thread, reinterpret_cast(addr), length, prot,flags, fd, (uint64_t)pgoffset * 0x1000); - - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(munmap, [](FEXCore::Core::CpuStateFrame *Frame, void *addr, size_t length) -> uint64_t { - uint64_t Result = (uint64_t)static_cast(FEX::HLE::_SyscallHandler)-> - GuestMunmap(Frame->Thread, addr, length); - - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(mprotect, [](FEXCore::Core::CpuStateFrame *Frame, void *addr, uint32_t len, int prot) -> uint64_t { - uint64_t Result = ::mprotect(addr, len, prot); - if (Result != -1) { - FEX::HLE::_SyscallHandler->TrackMprotect(Frame->Thread, (uintptr_t)addr, len, prot); - } - - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(mremap, [](FEXCore::Core::CpuStateFrame *Frame, void *old_address, size_t old_size, size_t new_size, int flags, void *new_address) -> uint64_t { - uint64_t Result = reinterpret_cast(static_cast(FEX::HLE::_SyscallHandler)->GetAllocator()-> - Mremap(old_address, old_size, new_size, flags, new_address)); - - if (!FEX::HLE::HasSyscallError(Result)) { - FEX::HLE::_SyscallHandler->TrackMremap(Frame->Thread, (uintptr_t)old_address, old_size, new_size, flags, Result); - } + SYSCALL_ERRNO(); + }); - return Result; - }); - - REGISTER_SYSCALL_IMPL_X32(mlockall, [](FEXCore::Core::CpuStateFrame *Frame, int flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(mlock2), reinterpret_cast(0x1'0000), 0x1'0000'0000ULL - 0x1'0000, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(munlockall, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = ::munlock(reinterpret_cast(0x1'0000), 0x1'0000'0000ULL - 0x1'0000); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(_shmat, [](FEXCore::Core::CpuStateFrame *Frame, int shmid, const void *shmaddr, int shmflg) -> uint64_t { - // also implemented in ipc:OP_SHMAT - uint32_t ResultAddr{}; - uint64_t Result = static_cast(FEX::HLE::_SyscallHandler)->GetAllocator()-> - Shmat(shmid, reinterpret_cast(shmaddr), shmflg, &ResultAddr); - - if (!FEX::HLE::HasSyscallError(Result)) { - FEX::HLE::_SyscallHandler->TrackShmat(Frame->Thread, shmid, ResultAddr, shmflg); - return ResultAddr; - } - else { - return Result; - } - }); - - REGISTER_SYSCALL_IMPL_X32(_shmdt, [](FEXCore::Core::CpuStateFrame *Frame, const void *shmaddr) -> uint64_t { - // also implemented in ipc:OP_SHMDT - uint64_t Result = static_cast(FEX::HLE::_SyscallHandler)->GetAllocator()-> - Shmdt(shmaddr); - - if (!FEX::HLE::HasSyscallError(Result)) { - FEX::HLE::_SyscallHandler->TrackShmdt(Frame->Thread, (uintptr_t)shmaddr); - } + REGISTER_SYSCALL_IMPL_X32( + mremap, [](FEXCore::Core::CpuStateFrame* Frame, void* old_address, size_t old_size, size_t new_size, int flags, void* new_address) -> uint64_t { + uint64_t Result = reinterpret_cast( + static_cast(FEX::HLE::_SyscallHandler)->GetAllocator()->Mremap(old_address, old_size, new_size, flags, new_address)); + + if (!FEX::HLE::HasSyscallError(Result)) { + FEX::HLE::_SyscallHandler->TrackMremap(Frame->Thread, (uintptr_t)old_address, old_size, new_size, flags, Result); + } + return Result; + }); + + REGISTER_SYSCALL_IMPL_X32(mlockall, [](FEXCore::Core::CpuStateFrame* Frame, int flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(mlock2), reinterpret_cast(0x1'0000), 0x1'0000'0000ULL - 0x1'0000, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(munlockall, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = ::munlock(reinterpret_cast(0x1'0000), 0x1'0000'0000ULL - 0x1'0000); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(_shmat, [](FEXCore::Core::CpuStateFrame* Frame, int shmid, const void* shmaddr, int shmflg) -> uint64_t { + // also implemented in ipc:OP_SHMAT + uint32_t ResultAddr {}; + uint64_t Result = static_cast(FEX::HLE::_SyscallHandler) + ->GetAllocator() + ->Shmat(shmid, reinterpret_cast(shmaddr), shmflg, &ResultAddr); + + if (!FEX::HLE::HasSyscallError(Result)) { + FEX::HLE::_SyscallHandler->TrackShmat(Frame->Thread, shmid, ResultAddr, shmflg); + return ResultAddr; + } else { return Result; - }); - } + } + }); + + REGISTER_SYSCALL_IMPL_X32(_shmdt, [](FEXCore::Core::CpuStateFrame* Frame, const void* shmaddr) -> uint64_t { + // also implemented in ipc:OP_SHMDT + uint64_t Result = static_cast(FEX::HLE::_SyscallHandler)->GetAllocator()->Shmdt(shmaddr); + + if (!FEX::HLE::HasSyscallError(Result)) { + FEX::HLE::_SyscallHandler->TrackShmdt(Frame->Thread, (uintptr_t)shmaddr); + } + return Result; + }); } + +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Msg.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Msg.cpp index 3ba511a7b5..ca890cfdd4 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Msg.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Msg.cpp @@ -20,83 +20,95 @@ ARG_TO_STR(FEX::HLE::x32::compat_ptr, "%lx") ARG_TO_STR(FEX::HLE::x32::compat_ptr, "%lx") namespace FEX::HLE::x32 { - void RegisterMsg(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X32(mq_timedsend, [](FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::mqd_t mqdes, const char *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct timespec32 *abs_timeout) -> uint64_t { - struct timespec tp64{}; - struct timespec *timed_ptr{}; - if (abs_timeout) { - tp64 = *abs_timeout; - timed_ptr = &tp64; - } - - uint64_t Result = ::syscall(SYSCALL_DEF(mq_timedsend), mqdes, msg_ptr, msg_len, msg_prio, timed_ptr); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(mq_timedreceive, [](FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::mqd_t mqdes, char *msg_ptr, size_t msg_len, unsigned int *msg_prio, const struct timespec32 *abs_timeout) -> uint64_t { - struct timespec tp64{}; - struct timespec *timed_ptr{}; - if (abs_timeout) { - tp64 = *abs_timeout; - timed_ptr = &tp64; - } - - uint64_t Result = ::syscall(SYSCALL_DEF(mq_timedreceive), mqdes, msg_ptr, msg_len, msg_prio, timed_ptr); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(mq_timedsend_time64, mq_timedsend, [](FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::mqd_t mqdes, const char *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(mq_timedsend), mqdes, msg_ptr, msg_len, msg_prio, abs_timeout); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(mq_timedreceive_time64, mq_timedreceive, [](FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::mqd_t mqdes, char *msg_ptr, size_t msg_len, unsigned int *msg_prio, const struct timespec *abs_timeout) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(mq_timedreceive), mqdes, msg_ptr, msg_len, msg_prio, abs_timeout); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(mq_open, [](FEXCore::Core::CpuStateFrame *Frame, const char *name, int oflag, mode_t mode, compat_ptr attr) -> uint64_t { - mq_attr HostAttr{}; - mq_attr *HostAttr_p{}; - if ((oflag & O_CREAT) && attr) { - // attr is optional unless O_CREAT is set - // Then attr can be valid or nullptr - HostAttr = *attr; - HostAttr_p = &HostAttr; - } - uint64_t Result = ::syscall(SYSCALL_DEF(mq_open), name, oflag, mode, HostAttr_p); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(mq_notify, [](FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::mqd_t mqdes, const compat_ptr sevp) -> uint64_t { - sigevent Host = *sevp; - uint64_t Result = ::syscall(SYSCALL_DEF(mq_notify), mqdes, &Host); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(mq_getsetattr, [](FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::mqd_t mqdes, compat_ptr newattr, compat_ptr oldattr) -> uint64_t { - mq_attr HostNew{}; - mq_attr *HostNew_p{}; - - mq_attr HostOld{}; - mq_attr *HostOld_p{}; - - if (newattr) { - HostNew = *newattr; - HostNew_p = &HostNew; - } - - if (oldattr) { - HostOld_p = &HostOld; - } - - uint64_t Result = ::syscall(SYSCALL_DEF(mq_getsetattr), mqdes, HostNew_p, HostOld_p); - - if (Result != 1 && oldattr) { - *oldattr = HostOld; - } - - SYSCALL_ERRNO(); - }); - } +void RegisterMsg(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X32(mq_timedsend, + [](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::mqd_t mqdes, const char* msg_ptr, size_t msg_len, + unsigned int msg_prio, const struct timespec32* abs_timeout) -> uint64_t { + struct timespec tp64 {}; + struct timespec* timed_ptr {}; + if (abs_timeout) { + tp64 = *abs_timeout; + timed_ptr = &tp64; + } + + uint64_t Result = ::syscall(SYSCALL_DEF(mq_timedsend), mqdes, msg_ptr, msg_len, msg_prio, timed_ptr); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(mq_timedreceive, + [](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::mqd_t mqdes, char* msg_ptr, size_t msg_len, + unsigned int* msg_prio, const struct timespec32* abs_timeout) -> uint64_t { + struct timespec tp64 {}; + struct timespec* timed_ptr {}; + if (abs_timeout) { + tp64 = *abs_timeout; + timed_ptr = &tp64; + } + + uint64_t Result = ::syscall(SYSCALL_DEF(mq_timedreceive), mqdes, msg_ptr, msg_len, msg_prio, timed_ptr); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(mq_timedsend_time64, mq_timedsend, + [](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::mqd_t mqdes, const char* msg_ptr, size_t msg_len, + unsigned int msg_prio, const struct timespec* abs_timeout) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(mq_timedsend), mqdes, msg_ptr, msg_len, msg_prio, abs_timeout); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(mq_timedreceive_time64, mq_timedreceive, + [](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::mqd_t mqdes, char* msg_ptr, size_t msg_len, + unsigned int* msg_prio, const struct timespec* abs_timeout) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(mq_timedreceive), mqdes, msg_ptr, msg_len, msg_prio, abs_timeout); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32( + mq_open, [](FEXCore::Core::CpuStateFrame* Frame, const char* name, int oflag, mode_t mode, compat_ptr attr) -> uint64_t { + mq_attr HostAttr {}; + mq_attr* HostAttr_p {}; + if ((oflag & O_CREAT) && attr) { + // attr is optional unless O_CREAT is set + // Then attr can be valid or nullptr + HostAttr = *attr; + HostAttr_p = &HostAttr; + } + uint64_t Result = ::syscall(SYSCALL_DEF(mq_open), name, oflag, mode, HostAttr_p); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32( + mq_notify, [](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::mqd_t mqdes, const compat_ptr sevp) -> uint64_t { + sigevent Host = *sevp; + uint64_t Result = ::syscall(SYSCALL_DEF(mq_notify), mqdes, &Host); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(mq_getsetattr, + [](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::mqd_t mqdes, compat_ptr newattr, + compat_ptr oldattr) -> uint64_t { + mq_attr HostNew {}; + mq_attr* HostNew_p {}; + + mq_attr HostOld {}; + mq_attr* HostOld_p {}; + + if (newattr) { + HostNew = *newattr; + HostNew_p = &HostNew; + } + + if (oldattr) { + HostOld_p = &HostOld; + } + + uint64_t Result = ::syscall(SYSCALL_DEF(mq_getsetattr), mqdes, HostNew_p, HostOld_p); + + if (Result != 1 && oldattr) { + *oldattr = HostOld; + } + + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/NotImplemented.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/NotImplemented.cpp index e7f35d72e6..71c810c475 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/NotImplemented.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/NotImplemented.cpp @@ -12,32 +12,32 @@ tags: LinuxSyscalls|syscalls-x86-32 #include namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } namespace FEX::HLE::x32 { -#define REGISTER_SYSCALL_NOT_IMPL_X32(name) REGISTER_SYSCALL_IMPL_X32(name, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { \ - LogMan::Msg::DFmt("Using deprecated/removed syscall: " #name); \ - return -ENOSYS; \ -}); -#define REGISTER_SYSCALL_NO_PERM_X32(name) REGISTER_SYSCALL_IMPL_X32(name, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { \ - return -EPERM; \ -}); +#define REGISTER_SYSCALL_NOT_IMPL_X32(name) \ + REGISTER_SYSCALL_IMPL_X32(name, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { \ + LogMan::Msg::DFmt("Using deprecated/removed syscall: " #name); \ + return -ENOSYS; \ + }); +#define REGISTER_SYSCALL_NO_PERM_X32(name) \ + REGISTER_SYSCALL_IMPL_X32(name, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { return -EPERM; }); - // these are removed/not implemented in the linux kernel we present - void RegisterNotImplemented(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_NOT_IMPL_X32(break); - REGISTER_SYSCALL_NOT_IMPL_X32(stty); - REGISTER_SYSCALL_NOT_IMPL_X32(gtty); - REGISTER_SYSCALL_NOT_IMPL_X32(prof); - REGISTER_SYSCALL_NOT_IMPL_X32(ftime); - REGISTER_SYSCALL_NOT_IMPL_X32(mpx); - REGISTER_SYSCALL_NOT_IMPL_X32(lock); - REGISTER_SYSCALL_NOT_IMPL_X32(ulimit); - REGISTER_SYSCALL_NOT_IMPL_X32(profil); - REGISTER_SYSCALL_NOT_IMPL_X32(idle); +// these are removed/not implemented in the linux kernel we present +void RegisterNotImplemented(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_NOT_IMPL_X32(break); + REGISTER_SYSCALL_NOT_IMPL_X32(stty); + REGISTER_SYSCALL_NOT_IMPL_X32(gtty); + REGISTER_SYSCALL_NOT_IMPL_X32(prof); + REGISTER_SYSCALL_NOT_IMPL_X32(ftime); + REGISTER_SYSCALL_NOT_IMPL_X32(mpx); + REGISTER_SYSCALL_NOT_IMPL_X32(lock); + REGISTER_SYSCALL_NOT_IMPL_X32(ulimit); + REGISTER_SYSCALL_NOT_IMPL_X32(profil); + REGISTER_SYSCALL_NOT_IMPL_X32(idle); - REGISTER_SYSCALL_NO_PERM_X32(stime); - REGISTER_SYSCALL_NO_PERM_X32(bdflush); - } + REGISTER_SYSCALL_NO_PERM_X32(stime); + REGISTER_SYSCALL_NO_PERM_X32(bdflush); } +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Sched.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Sched.cpp index 359c69f4fd..ab826574e2 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Sched.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Sched.cpp @@ -16,23 +16,23 @@ tags: LinuxSyscalls|syscalls-x86-32 #include namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } namespace FEX::HLE::x32 { - void RegisterSched(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X32(sched_rr_get_interval, [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, struct timespec32 *tp) -> uint64_t { - struct timespec tp64{}; - uint64_t Result = ::sched_rr_get_interval(pid, tp ? &tp64 : nullptr); - if (tp) { - *tp = tp64; - } - SYSCALL_ERRNO(); - }); +void RegisterSched(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X32(sched_rr_get_interval, [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, struct timespec32* tp) -> uint64_t { + struct timespec tp64 {}; + uint64_t Result = ::sched_rr_get_interval(pid, tp ? &tp64 : nullptr); + if (tp) { + *tp = tp64; + } + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X32_PASS(sched_rr_get_interval_time64, [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, struct timespec *tp) -> uint64_t { - uint64_t Result = ::sched_rr_get_interval(pid, tp); - SYSCALL_ERRNO(); - }); - } + REGISTER_SYSCALL_IMPL_X32_PASS(sched_rr_get_interval_time64, [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, struct timespec* tp) -> uint64_t { + uint64_t Result = ::sched_rr_get_interval(pid, tp); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Semaphore.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Semaphore.cpp index 579fe305cf..6111595bcf 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Semaphore.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Semaphore.cpp @@ -26,442 +26,411 @@ tags: LinuxSyscalls|syscalls-x86-32 #include namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } namespace FEX::HLE::x32 { - // Define the IPC ops - enum IPCOp { - OP_SEMOP = 1, - OP_SEMGET = 2, - OP_SEMCTL = 3, - OP_SEMTIMEDOP = 4, - OP_MSGSND = 11, - OP_MSGRCV = 12, - OP_MSGGET = 13, - OP_MSGCTL = 14, - OP_SHMAT = 21, - OP_SHMDT = 22, - OP_SHMGET = 23, - OP_SHMCTL = 24, - }; +// Define the IPC ops +enum IPCOp { + OP_SEMOP = 1, + OP_SEMGET = 2, + OP_SEMCTL = 3, + OP_SEMTIMEDOP = 4, + OP_MSGSND = 11, + OP_MSGRCV = 12, + OP_MSGGET = 13, + OP_MSGCTL = 14, + OP_SHMAT = 21, + OP_SHMDT = 22, + OP_SHMGET = 23, + OP_SHMCTL = 24, +}; - struct msgbuf_32 { - compat_long_t mtype; - char mtext[1]; - }; +struct msgbuf_32 { + compat_long_t mtype; + char mtext[1]; +}; - union semun_32 { - int32_t val; // Value for SETVAL - compat_ptr buf32; // struct semid_ds* - Buffer ptr for IPC_STAT, IPC_SET - compat_ptr buf64; // struct semid_ds* - Buffer ptr for IPC_STAT, IPC_SET - uint32_t array; // uint16_t array for GETALL, SETALL - compat_ptr __buf; // struct seminfo * - Buffer for IPC_INFO - }; +union semun_32 { + int32_t val; // Value for SETVAL + compat_ptr buf32; // struct semid_ds* - Buffer ptr for IPC_STAT, IPC_SET + compat_ptr buf64; // struct semid_ds* - Buffer ptr for IPC_STAT, IPC_SET + uint32_t array; // uint16_t array for GETALL, SETALL + compat_ptr __buf; // struct seminfo * - Buffer for IPC_INFO +}; - union msgun_32 { - int32_t val; // Value for SETVAL - compat_ptr buf32; // struct msgid_ds* - Buffer ptr for IPC_STAT, IPC_SET - compat_ptr buf64; // struct msgid_ds* - Buffer ptr for IPC_STAT, IPC_SET - uint32_t array; // uint16_t array for GETALL, SETALL - compat_ptr __buf; // struct msginfo * - Buffer for IPC_INFO - }; +union msgun_32 { + int32_t val; // Value for SETVAL + compat_ptr buf32; // struct msgid_ds* - Buffer ptr for IPC_STAT, IPC_SET + compat_ptr buf64; // struct msgid_ds* - Buffer ptr for IPC_STAT, IPC_SET + uint32_t array; // uint16_t array for GETALL, SETALL + compat_ptr __buf; // struct msginfo * - Buffer for IPC_INFO +}; - union shmun_32 { - int32_t val; // Value for SETVAL - compat_ptr buf32; // struct shmid_ds* - Buffer ptr for IPC_STAT, IPC_SET - compat_ptr buf64; // struct shmid_ds* - Buffer ptr for IPC_STAT, IPC_SET - uint32_t array; // uint16_t array for GETALL, SETALL - compat_ptr __buf32; // struct shminfo * - Buffer for IPC_INFO - compat_ptr __buf64; // struct shminfo * - Buffer for IPC_INFO +union shmun_32 { + int32_t val; // Value for SETVAL + compat_ptr buf32; // struct shmid_ds* - Buffer ptr for IPC_STAT, IPC_SET + compat_ptr buf64; // struct shmid_ds* - Buffer ptr for IPC_STAT, IPC_SET + uint32_t array; // uint16_t array for GETALL, SETALL + compat_ptr __buf32; // struct shminfo * - Buffer for IPC_INFO + compat_ptr __buf64; // struct shminfo * - Buffer for IPC_INFO - compat_ptr __buf_info_32; // struct shm_info * - Buffer for SHM_INFO - }; + compat_ptr __buf_info_32; // struct shm_info * - Buffer for SHM_INFO +}; - union semun { - int val; /* value for SETVAL */ - struct semid_ds_32 *buf; /* buffer for IPC_STAT & IPC_SET */ - unsigned short *array; /* array for GETALL & SETALL */ - struct fex_seminfo *__buf; /* buffer for IPC_INFO */ - void *__pad; - }; +union semun { + int val; /* value for SETVAL */ + struct semid_ds_32* buf; /* buffer for IPC_STAT & IPC_SET */ + unsigned short* array; /* array for GETALL & SETALL */ + struct fex_seminfo* __buf; /* buffer for IPC_INFO */ + void* __pad; +}; - uint64_t _ipc(FEXCore::Core::CpuStateFrame *Frame, uint32_t call, uint32_t first, uint32_t second, uint32_t third, uint32_t ptr, uint32_t fifth) { - uint64_t Result{}; +uint64_t _ipc(FEXCore::Core::CpuStateFrame* Frame, uint32_t call, uint32_t first, uint32_t second, uint32_t third, uint32_t ptr, uint32_t fifth) { + uint64_t Result {}; - switch (static_cast(call)) { - case OP_SEMOP: { - Result = ::syscall(SYSCALL_DEF(semop), first, reinterpret_cast(ptr), second); - break; - } - case OP_SEMGET: { - Result = ::syscall(SYSCALL_DEF(semget), first, second, third); - break; + switch (static_cast(call)) { + case OP_SEMOP: { + Result = ::syscall(SYSCALL_DEF(semop), first, reinterpret_cast(ptr), second); + break; + } + case OP_SEMGET: { + Result = ::syscall(SYSCALL_DEF(semget), first, second, third); + break; + } + case OP_SEMCTL: { + uint32_t semid = first; + uint32_t semnum = second; + // Upper 16bits used for a different flag? + int32_t cmd = third & 0xFF; + compat_ptr semun(ptr); + bool IPC64 = third & 0x100; + switch (cmd) { + case IPC_SET: { + struct semid64_ds buf {}; + if (IPC64) { + buf = *semun->buf64; + } else { + buf = *semun->buf32; } - case OP_SEMCTL: { - uint32_t semid = first; - uint32_t semnum = second; - // Upper 16bits used for a different flag? - int32_t cmd = third & 0xFF; - compat_ptr semun(ptr); - bool IPC64 = third & 0x100; - switch (cmd) { - case IPC_SET: { - struct semid64_ds buf{}; - if (IPC64) { - buf = *semun->buf64; - } - else { - buf = *semun->buf32; - } - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf); - if (Result != -1) { - if (IPC64) { - *semun->buf64 = buf; - } - else { - *semun->buf32 = buf; - } - } - break; - } - case SEM_STAT: - case SEM_STAT_ANY: - case IPC_STAT: { - struct semid64_ds buf{}; - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf); - if (Result != -1) { - if (IPC64) { - *semun->buf64 = buf; - } - else { - *semun->buf32 = buf; - } - } - break; - } - case SEM_INFO: - case IPC_INFO: { - struct fex_seminfo si{}; - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &si); - if (Result != -1) { - memcpy(semun->__buf, &si, sizeof(si)); - } - break; - } - case GETALL: - case SETALL: { - // ptr is just a int32_t* in this case - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun->array); - break; - } - case SETVAL: { - // ptr is just a int32_t in this case - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun->val); - break; - } - case IPC_RMID: - case GETPID: - case GETNCNT: - case GETZCNT: - case GETVAL: - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled semctl cmd: {}", cmd); - return -EINVAL; + Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf); + if (Result != -1) { + if (IPC64) { + *semun->buf64 = buf; + } else { + *semun->buf32 = buf; } - break; } - case OP_SEMTIMEDOP: { - timespec32 *timeout = reinterpret_cast(fifth); - struct timespec tp64{}; - struct timespec *timed_ptr{}; - if (timeout) { - tp64 = *timeout; - timed_ptr = &tp64; + break; + } + case SEM_STAT: + case SEM_STAT_ANY: + case IPC_STAT: { + struct semid64_ds buf {}; + Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf); + if (Result != -1) { + if (IPC64) { + *semun->buf64 = buf; + } else { + *semun->buf32 = buf; } - - Result = ::syscall(SYSCALL_DEF(semtimedop), first, reinterpret_cast(ptr), second, timed_ptr); - break; } - case OP_MSGSND: { - // Requires a temporary buffer - fextl::vector Tmp(second + sizeof(size_t)); - struct msgbuf *TmpMsg = reinterpret_cast(&Tmp.at(0)); - msgbuf_32 *src = reinterpret_cast(ptr); - TmpMsg->mtype = src->mtype; - memcpy(TmpMsg->mtext, src->mtext, second); - - Result = ::syscall(SYSCALL_DEF(msgsnd), first, TmpMsg, second, third); - break; + break; + } + case SEM_INFO: + case IPC_INFO: { + struct fex_seminfo si {}; + Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &si); + if (Result != -1) { + memcpy(semun->__buf, &si, sizeof(si)); } - case OP_MSGRCV: { - fextl::vector Tmp(second + sizeof(size_t)); - struct msgbuf *TmpMsg = reinterpret_cast(&Tmp.at(0)); + break; + } + case GETALL: + case SETALL: { + // ptr is just a int32_t* in this case + Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun->array); + break; + } + case SETVAL: { + // ptr is just a int32_t in this case + Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun->val); + break; + } + case IPC_RMID: + case GETPID: + case GETNCNT: + case GETZCNT: + case GETVAL: Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd); break; + default: LOGMAN_MSG_A_FMT("Unhandled semctl cmd: {}", cmd); return -EINVAL; + } + break; + } + case OP_SEMTIMEDOP: { + timespec32* timeout = reinterpret_cast(fifth); + struct timespec tp64 {}; + struct timespec* timed_ptr {}; + if (timeout) { + tp64 = *timeout; + timed_ptr = &tp64; + } - if (call >> 16) { - Result = ::syscall(SYSCALL_DEF(msgrcv), first, TmpMsg, second, fifth, third); - if (Result != -1) { - msgbuf_32 *src = reinterpret_cast(ptr); - src->mtype = TmpMsg->mtype; - memcpy(src->mtext, TmpMsg->mtext, Result); - } + Result = ::syscall(SYSCALL_DEF(semtimedop), first, reinterpret_cast(ptr), second, timed_ptr); + break; + } + case OP_MSGSND: { + // Requires a temporary buffer + fextl::vector Tmp(second + sizeof(size_t)); + struct msgbuf* TmpMsg = reinterpret_cast(&Tmp.at(0)); + msgbuf_32* src = reinterpret_cast(ptr); + TmpMsg->mtype = src->mtype; + memcpy(TmpMsg->mtext, src->mtext, second); - } - else { - struct compat_ipc_kludge { - compat_uptr_t msgp; - compat_long_t msgtyp; - }; - compat_ipc_kludge *ipck = reinterpret_cast(ptr); - Result = ::syscall(SYSCALL_DEF(msgrcv), first, TmpMsg, second, ipck->msgtyp, third); - if (Result != -1) { - msgbuf_32 *src = reinterpret_cast(ipck->msgp); - ipck->msgtyp = TmpMsg->mtype; - memcpy(src->mtext, TmpMsg->mtext, Result); - } - } + Result = ::syscall(SYSCALL_DEF(msgsnd), first, TmpMsg, second, third); + break; + } + case OP_MSGRCV: { + fextl::vector Tmp(second + sizeof(size_t)); + struct msgbuf* TmpMsg = reinterpret_cast(&Tmp.at(0)); - break; - } - case OP_MSGGET: { - Result = ::syscall(SYSCALL_DEF(msgget), first, second); - break; + if (call >> 16) { + Result = ::syscall(SYSCALL_DEF(msgrcv), first, TmpMsg, second, fifth, third); + if (Result != -1) { + msgbuf_32* src = reinterpret_cast(ptr); + src->mtype = TmpMsg->mtype; + memcpy(src->mtext, TmpMsg->mtext, Result); } - case OP_MSGCTL: { - uint32_t msqid = first; - int32_t cmd = second & 0xFF; - msgun_32 msgun{}; - msgun.val = ptr; - bool IPC64 = second & 0x100; - switch (cmd) { - case IPC_SET: { - struct msqid64_ds buf{}; - if (IPC64) { - buf = *msgun.buf64; - } - else { - buf = *msgun.buf32; - } - Result = ::syscall(SYSCALL_DEF(msgctl), msqid, cmd, &buf); - break; - } - case MSG_STAT: - case MSG_STAT_ANY: - case IPC_STAT: { - struct msqid64_ds buf{}; - Result = ::syscall(SYSCALL_DEF(msgctl), msqid, cmd, &buf); - if (Result != -1) { - if (IPC64) { - *msgun.buf64 = buf; - } - else { - *msgun.buf32 = buf; - } - } - break; - } - case MSG_INFO: - case IPC_INFO: { - struct msginfo mi{}; - Result = ::syscall(SYSCALL_DEF(msgctl), msqid, cmd, reinterpret_cast(&mi)); - if (Result != -1) { - memcpy(msgun.__buf, &mi, sizeof(mi)); - } - break; - } - case IPC_RMID: - Result = ::syscall(SYSCALL_DEF(msgctl), msqid, cmd, nullptr); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled msgctl cmd: {}", cmd); - return -EINVAL; - } - break; + + } else { + struct compat_ipc_kludge { + compat_uptr_t msgp; + compat_long_t msgtyp; + }; + compat_ipc_kludge* ipck = reinterpret_cast(ptr); + Result = ::syscall(SYSCALL_DEF(msgrcv), first, TmpMsg, second, ipck->msgtyp, third); + if (Result != -1) { + msgbuf_32* src = reinterpret_cast(ipck->msgp); + ipck->msgtyp = TmpMsg->mtype; + memcpy(src->mtext, TmpMsg->mtext, Result); } - case OP_SHMAT: { - // also implemented in memory:shmat - Result = static_cast(FEX::HLE::_SyscallHandler)->GetAllocator()-> - Shmat(first, reinterpret_cast(ptr), second, reinterpret_cast(third)); - if (!FEX::HLE::HasSyscallError(Result)) { - FEX::HLE::_SyscallHandler->TrackShmat(Frame->Thread, first, *reinterpret_cast(third), second); - } - break; + } + + break; + } + case OP_MSGGET: { + Result = ::syscall(SYSCALL_DEF(msgget), first, second); + break; + } + case OP_MSGCTL: { + uint32_t msqid = first; + int32_t cmd = second & 0xFF; + msgun_32 msgun {}; + msgun.val = ptr; + bool IPC64 = second & 0x100; + switch (cmd) { + case IPC_SET: { + struct msqid64_ds buf {}; + if (IPC64) { + buf = *msgun.buf64; + } else { + buf = *msgun.buf32; } - case OP_SHMDT: { - // also implemented in memory:shmdt - Result = static_cast(FEX::HLE::_SyscallHandler)->GetAllocator()-> - Shmdt(reinterpret_cast(ptr)); - if (!FEX::HLE::HasSyscallError(Result)) { - FEX::HLE::_SyscallHandler->TrackShmdt(Frame->Thread, ptr); + Result = ::syscall(SYSCALL_DEF(msgctl), msqid, cmd, &buf); + break; + } + case MSG_STAT: + case MSG_STAT_ANY: + case IPC_STAT: { + struct msqid64_ds buf {}; + Result = ::syscall(SYSCALL_DEF(msgctl), msqid, cmd, &buf); + if (Result != -1) { + if (IPC64) { + *msgun.buf64 = buf; + } else { + *msgun.buf32 = buf; } - break; } - case OP_SHMGET: { - Result = ::shmget(first, second, third); - break; + break; + } + case MSG_INFO: + case IPC_INFO: { + struct msginfo mi {}; + Result = ::syscall(SYSCALL_DEF(msgctl), msqid, cmd, reinterpret_cast(&mi)); + if (Result != -1) { + memcpy(msgun.__buf, &mi, sizeof(mi)); } - case OP_SHMCTL: { - int32_t shmid = first; - int32_t shmcmd = second; - int32_t cmd = shmcmd & 0xFF; - bool IPC64 = shmcmd & 0x100; - shmun_32 shmun{}; - shmun.val = reinterpret_cast(ptr); - - switch (cmd) { - case IPC_SET: { - struct shmid64_ds buf{}; - if (IPC64) { - buf = *shmun.buf64; - } - else { - buf = *shmun.buf32; - } - Result = ::syscall(SYSCALL_DEF(_shmctl), shmid, cmd, &buf); - // IPC_SET sets the internal data structure that the kernel uses - // No need to writeback - break; - } - case SHM_STAT: - case SHM_STAT_ANY: - case IPC_STAT: { - struct shmid64_ds buf{}; - Result = ::syscall(SYSCALL_DEF(_shmctl), shmid, cmd, &buf); - if (Result != -1) { - if (IPC64) { - *shmun.buf64 = buf; - } - else { - *shmun.buf32 = buf; - } - } - break; - } - case IPC_INFO: { - struct shminfo si{}; - Result = ::syscall(SYSCALL_DEF(_shmctl), shmid, cmd, reinterpret_cast(&si)); - if (Result != -1) { - if (IPC64) { - *shmun.__buf64 = si; - } - else { - *shmun.__buf32 = si; - } - } - break; - } - case SHM_INFO: { - struct shm_info si{}; - Result = ::syscall(SYSCALL_DEF(_shmctl), shmid, cmd, reinterpret_cast(&si)); - if (Result != -1) { - // SHM_INFO doesn't follow IPC64 behaviour - *shmun.__buf_info_32 = si; - } - break; - } - case SHM_LOCK: - Result = ::syscall(SYSCALL_DEF(_shmctl), shmid, cmd, nullptr); - break; - case SHM_UNLOCK: - Result = ::syscall(SYSCALL_DEF(_shmctl), shmid, cmd, nullptr); - break; - case IPC_RMID: - Result = ::syscall(SYSCALL_DEF(_shmctl), shmid, cmd, nullptr); - break; + break; + } + case IPC_RMID: Result = ::syscall(SYSCALL_DEF(msgctl), msqid, cmd, nullptr); break; + default: LOGMAN_MSG_A_FMT("Unhandled msgctl cmd: {}", cmd); return -EINVAL; + } + break; + } + case OP_SHMAT: { + // also implemented in memory:shmat + Result = static_cast(FEX::HLE::_SyscallHandler) + ->GetAllocator() + ->Shmat(first, reinterpret_cast(ptr), second, reinterpret_cast(third)); + if (!FEX::HLE::HasSyscallError(Result)) { + FEX::HLE::_SyscallHandler->TrackShmat(Frame->Thread, first, *reinterpret_cast(third), second); + } + break; + } + case OP_SHMDT: { + // also implemented in memory:shmdt + Result = static_cast(FEX::HLE::_SyscallHandler)->GetAllocator()->Shmdt(reinterpret_cast(ptr)); + if (!FEX::HLE::HasSyscallError(Result)) { + FEX::HLE::_SyscallHandler->TrackShmdt(Frame->Thread, ptr); + } + break; + } + case OP_SHMGET: { + Result = ::shmget(first, second, third); + break; + } + case OP_SHMCTL: { + int32_t shmid = first; + int32_t shmcmd = second; + int32_t cmd = shmcmd & 0xFF; + bool IPC64 = shmcmd & 0x100; + shmun_32 shmun {}; + shmun.val = reinterpret_cast(ptr); - default: - LOGMAN_MSG_A_FMT("Unhandled shmctl cmd: {}", cmd); - return -EINVAL; + switch (cmd) { + case IPC_SET: { + struct shmid64_ds buf {}; + if (IPC64) { + buf = *shmun.buf64; + } else { + buf = *shmun.buf32; + } + Result = ::syscall(SYSCALL_DEF(_shmctl), shmid, cmd, &buf); + // IPC_SET sets the internal data structure that the kernel uses + // No need to writeback + break; + } + case SHM_STAT: + case SHM_STAT_ANY: + case IPC_STAT: { + struct shmid64_ds buf {}; + Result = ::syscall(SYSCALL_DEF(_shmctl), shmid, cmd, &buf); + if (Result != -1) { + if (IPC64) { + *shmun.buf64 = buf; + } else { + *shmun.buf32 = buf; + } + } + break; + } + case IPC_INFO: { + struct shminfo si {}; + Result = ::syscall(SYSCALL_DEF(_shmctl), shmid, cmd, reinterpret_cast(&si)); + if (Result != -1) { + if (IPC64) { + *shmun.__buf64 = si; + } else { + *shmun.__buf32 = si; } - break; } + break; + } + case SHM_INFO: { + struct shm_info si {}; + Result = ::syscall(SYSCALL_DEF(_shmctl), shmid, cmd, reinterpret_cast(&si)); + if (Result != -1) { + // SHM_INFO doesn't follow IPC64 behaviour + *shmun.__buf_info_32 = si; + } + break; + } + case SHM_LOCK: Result = ::syscall(SYSCALL_DEF(_shmctl), shmid, cmd, nullptr); break; + case SHM_UNLOCK: Result = ::syscall(SYSCALL_DEF(_shmctl), shmid, cmd, nullptr); break; + case IPC_RMID: Result = ::syscall(SYSCALL_DEF(_shmctl), shmid, cmd, nullptr); break; - default: return -ENOSYS; + default: LOGMAN_MSG_A_FMT("Unhandled shmctl cmd: {}", cmd); return -EINVAL; } - SYSCALL_ERRNO(); + break; + } + + default: return -ENOSYS; } - void RegisterSemaphore(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X32(ipc, _ipc); + SYSCALL_ERRNO(); +} +void RegisterSemaphore(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X32(ipc, _ipc); - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(semtimedop_time64, semtimedop, [](FEXCore::Core::CpuStateFrame *Frame, int semid, struct sembuf *sops, size_t nsops, const struct timespec *timeout) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(semtimedop), semid, sops, nsops, timeout); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL( + semtimedop_time64, semtimedop, + [](FEXCore::Core::CpuStateFrame* Frame, int semid, struct sembuf* sops, size_t nsops, const struct timespec* timeout) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(semtimedop), semid, sops, nsops, timeout); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X32(semctl, [](FEXCore::Core::CpuStateFrame *Frame, int semid, int semnum, int cmd, semun_32 *semun) -> uint64_t { - uint64_t Result{}; - bool IPC64 = cmd & 0x100; + REGISTER_SYSCALL_IMPL_X32(semctl, [](FEXCore::Core::CpuStateFrame* Frame, int semid, int semnum, int cmd, semun_32* semun) -> uint64_t { + uint64_t Result {}; + bool IPC64 = cmd & 0x100; - switch (cmd) { - case IPC_SET: { - struct semid64_ds buf{}; - if (IPC64) { - buf = *semun->buf64; - } - else { - buf = *semun->buf32; - } - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf); - if (Result != -1) { - if (IPC64) { - *semun->buf64 = buf; - } - else { - *semun->buf32 = buf; - } - } - break; - } - case SEM_STAT: - case SEM_STAT_ANY: - case IPC_STAT: { - struct semid64_ds buf{}; - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf); - if (Result != -1) { - if (IPC64) { - *semun->buf64 = buf; - } - else { - *semun->buf32 = buf; - } - } - break; - } - case SEM_INFO: - case IPC_INFO: { - struct fex_seminfo si{}; - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &si); - if (Result != -1) { - memcpy(semun->__buf, &si, sizeof(si)); - } - break; - } - case GETALL: - case SETALL: { - // ptr is just a int32_t* in this case - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun->array); - break; + switch (cmd) { + case IPC_SET: { + struct semid64_ds buf {}; + if (IPC64) { + buf = *semun->buf64; + } else { + buf = *semun->buf32; + } + Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf); + if (Result != -1) { + if (IPC64) { + *semun->buf64 = buf; + } else { + *semun->buf32 = buf; } - case SETVAL: { - // ptr is just a int32_t in this case - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun->val); - break; + } + break; + } + case SEM_STAT: + case SEM_STAT_ANY: + case IPC_STAT: { + struct semid64_ds buf {}; + Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf); + if (Result != -1) { + if (IPC64) { + *semun->buf64 = buf; + } else { + *semun->buf32 = buf; } - case IPC_RMID: - case GETPID: - case GETNCNT: - case GETZCNT: - case GETVAL: - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled semctl cmd: {}", cmd); - return -EINVAL; } - SYSCALL_ERRNO(); - }); - - } + break; + } + case SEM_INFO: + case IPC_INFO: { + struct fex_seminfo si {}; + Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &si); + if (Result != -1) { + memcpy(semun->__buf, &si, sizeof(si)); + } + break; + } + case GETALL: + case SETALL: { + // ptr is just a int32_t* in this case + Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun->array); + break; + } + case SETVAL: { + // ptr is just a int32_t in this case + Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun->val); + break; + } + case IPC_RMID: + case GETPID: + case GETNCNT: + case GETZCNT: + case GETVAL: Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun); break; + default: LOGMAN_MSG_A_FMT("Unhandled semctl cmd: {}", cmd); return -EINVAL; + } + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Signals.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Signals.cpp index 99443f6a1b..d1333e5dae 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Signals.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Signals.cpp @@ -22,219 +22,226 @@ tags: LinuxSyscalls|syscalls-x86-32 #include namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } ARG_TO_STR(FEX::HLE::x32::compat_ptr, "%lx") namespace FEX::HLE::x32 { - void CopySigInfo(FEXCore::x86::siginfo_t *Info, siginfo_t const &Host) { - // Copy the basic things first - Info->si_signo = Host.si_signo; - Info->si_errno = Host.si_errno; - Info->si_code = Host.si_code; - - // Check si_code to determine how we need to interpret this - if (Info->si_code == SI_TIMER) { - // SI_TIMER means pid, uid, value - Info->_sifields._timer.tid = Host.si_timerid; +void CopySigInfo(FEXCore::x86::siginfo_t* Info, const siginfo_t& Host) { + // Copy the basic things first + Info->si_signo = Host.si_signo; + Info->si_errno = Host.si_errno; + Info->si_code = Host.si_code; + + // Check si_code to determine how we need to interpret this + if (Info->si_code == SI_TIMER) { + // SI_TIMER means pid, uid, value + Info->_sifields._timer.tid = Host.si_timerid; + Info->_sifields._timer.overrun = Host.si_overrun; + Info->_sifields._timer.sigval.sival_int = Host.si_value.sival_int; + } else { + // Now we need to copy over the more complex things + switch (Info->si_signo) { + case SIGSEGV: + case SIGBUS: + // This is the address trying to be accessed, not the RIP + Info->_sifields._sigfault.addr = static_cast(reinterpret_cast(Host.si_addr)); + break; + case SIGFPE: + case SIGILL: + // Can't really give a real result here. This is the RIP causing a sigill or sigfpe + // Claim at RIP 0 for now + Info->_sifields._sigfault.addr = 0; + break; + case SIGCHLD: + Info->_sifields._sigchld.pid = Host.si_pid; + Info->_sifields._sigchld.uid = Host.si_uid; + Info->_sifields._sigchld.status = Host.si_status; + Info->_sifields._sigchld.utime = Host.si_utime; + Info->_sifields._sigchld.stime = Host.si_stime; + break; + case SIGALRM: + case SIGVTALRM: + Info->_sifields._timer.tid = Host.si_timerid; Info->_sifields._timer.overrun = Host.si_overrun; - Info->_sifields._timer.sigval.sival_int = Host.si_value.sival_int; - } - else { - // Now we need to copy over the more complex things - switch (Info->si_signo) { - case SIGSEGV: - case SIGBUS: - // This is the address trying to be accessed, not the RIP - Info->_sifields._sigfault.addr = static_cast(reinterpret_cast(Host.si_addr)); - break; - case SIGFPE: - case SIGILL: - // Can't really give a real result here. This is the RIP causing a sigill or sigfpe - // Claim at RIP 0 for now - Info->_sifields._sigfault.addr = 0; - break; - case SIGCHLD: - Info->_sifields._sigchld.pid = Host.si_pid; - Info->_sifields._sigchld.uid = Host.si_uid; - Info->_sifields._sigchld.status = Host.si_status; - Info->_sifields._sigchld.utime = Host.si_utime; - Info->_sifields._sigchld.stime = Host.si_stime; - break; - case SIGALRM: - case SIGVTALRM: - Info->_sifields._timer.tid = Host.si_timerid; - Info->_sifields._timer.overrun = Host.si_overrun; - Info->_sifields._timer.sigval.sival_int = Host.si_int; - break; - default: - LogMan::Msg::EFmt("Unhandled siginfo_t for sigtimedwait: {}", Info->si_signo); - break; - } + Info->_sifields._timer.sigval.sival_int = Host.si_int; + break; + default: LogMan::Msg::EFmt("Unhandled siginfo_t for sigtimedwait: {}", Info->si_signo); break; } } +} - void RegisterSignals(FEX::HLE::SyscallHandler *Handler) { - - // Only gets the lower 32-bits of the signal mask - REGISTER_SYSCALL_IMPL_X32(sgetmask, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Set{}; - FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigProcMask(0, nullptr, &Set); - return Set & ~0U; - }); - - // Only controls the lower 32-bits of the signal mask - // Blocks the upper 32-bits - REGISTER_SYSCALL_IMPL_X32(ssetmask, [](FEXCore::Core::CpuStateFrame *Frame, uint32_t New) -> uint64_t { - uint64_t Set{}; - uint64_t NewSet = (~0ULL << 32) | New; - FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigProcMask(SIG_SETMASK, &NewSet, &Set); - return Set & ~0U; - }); - - // Only masks the lower 32-bits of the signal mask - // The upper 32-bits are still active (unmasked) and can signal the program - REGISTER_SYSCALL_IMPL_X32(sigsuspend, [](FEXCore::Core::CpuStateFrame *Frame, uint32_t Mask) -> uint64_t { - uint64_t Mask64 = Mask; - return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigSuspend(&Mask64, 8); - }); - - REGISTER_SYSCALL_IMPL_X32(sigpending, [](FEXCore::Core::CpuStateFrame *Frame, compat_old_sigset_t *set) -> uint64_t { - uint64_t HostSet{}; - uint64_t Result = FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigPending(&HostSet, 8); - if (Result == 0) { - // This old interface only returns the lower signals - *set = HostSet & ~0U; - } - return Result; - }); - - REGISTER_SYSCALL_IMPL_X32(signal, [](FEXCore::Core::CpuStateFrame *Frame, int signum, uint32_t handler) -> uint64_t { - GuestSigAction newact{}; - GuestSigAction oldact{}; - newact.sigaction_handler.handler = reinterpret_cast(handler); - FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSignalHandler(signum, &newact, &oldact); - return static_cast(reinterpret_cast(oldact.sigaction_handler.handler)); - }); - - REGISTER_SYSCALL_IMPL_X32(sigaction, [](FEXCore::Core::CpuStateFrame *Frame, int signum, const OldGuestSigAction_32 *act, OldGuestSigAction_32 *oldact) -> uint64_t { - GuestSigAction *act64_p{}; - GuestSigAction *old64_p{}; - - GuestSigAction act64{}; - if (act) { - act64 = *act; - act64_p = &act64; - } - GuestSigAction old64{}; - - if (oldact) { - old64_p = &old64; - } - - uint64_t Result = FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSignalHandler(signum, act64_p, old64_p); - if (Result == 0 && oldact) { - *oldact = old64; - } - - return Result; - }); - - REGISTER_SYSCALL_IMPL_X32(rt_sigaction, [](FEXCore::Core::CpuStateFrame *Frame, int signum, const GuestSigAction_32 *act, GuestSigAction_32 *oldact, size_t sigsetsize) -> uint64_t { - if (sigsetsize != 8) { - return -EINVAL; - } +void RegisterSignals(FEX::HLE::SyscallHandler* Handler) { + + // Only gets the lower 32-bits of the signal mask + REGISTER_SYSCALL_IMPL_X32(sgetmask, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Set {}; + FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigProcMask(0, nullptr, &Set); + return Set & ~0U; + }); + + // Only controls the lower 32-bits of the signal mask + // Blocks the upper 32-bits + REGISTER_SYSCALL_IMPL_X32(ssetmask, [](FEXCore::Core::CpuStateFrame* Frame, uint32_t New) -> uint64_t { + uint64_t Set {}; + uint64_t NewSet = (~0ULL << 32) | New; + FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigProcMask(SIG_SETMASK, &NewSet, &Set); + return Set & ~0U; + }); + + // Only masks the lower 32-bits of the signal mask + // The upper 32-bits are still active (unmasked) and can signal the program + REGISTER_SYSCALL_IMPL_X32(sigsuspend, [](FEXCore::Core::CpuStateFrame* Frame, uint32_t Mask) -> uint64_t { + uint64_t Mask64 = Mask; + return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigSuspend(&Mask64, 8); + }); + + REGISTER_SYSCALL_IMPL_X32(sigpending, [](FEXCore::Core::CpuStateFrame* Frame, compat_old_sigset_t* set) -> uint64_t { + uint64_t HostSet {}; + uint64_t Result = FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigPending(&HostSet, 8); + if (Result == 0) { + // This old interface only returns the lower signals + *set = HostSet & ~0U; + } + return Result; + }); + + REGISTER_SYSCALL_IMPL_X32(signal, [](FEXCore::Core::CpuStateFrame* Frame, int signum, uint32_t handler) -> uint64_t { + GuestSigAction newact {}; + GuestSigAction oldact {}; + newact.sigaction_handler.handler = reinterpret_cast(handler); + FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSignalHandler(signum, &newact, &oldact); + return static_cast(reinterpret_cast(oldact.sigaction_handler.handler)); + }); + + REGISTER_SYSCALL_IMPL_X32( + sigaction, [](FEXCore::Core::CpuStateFrame* Frame, int signum, const OldGuestSigAction_32* act, OldGuestSigAction_32* oldact) -> uint64_t { + GuestSigAction* act64_p {}; + GuestSigAction* old64_p {}; + + GuestSigAction act64 {}; + if (act) { + act64 = *act; + act64_p = &act64; + } + GuestSigAction old64 {}; - GuestSigAction *act64_p{}; - GuestSigAction *old64_p{}; + if (oldact) { + old64_p = &old64; + } - GuestSigAction act64{}; - if (act) { - act64 = *act; - act64_p = &act64; - } - GuestSigAction old64{}; + uint64_t Result = FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSignalHandler(signum, act64_p, old64_p); + if (Result == 0 && oldact) { + *oldact = old64; + } - if (oldact) { - old64_p = &old64; - } + return Result; + }); - uint64_t Result = FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSignalHandler(signum, act64_p, old64_p); - if (Result == 0 && oldact) { - *oldact = old64; - } + REGISTER_SYSCALL_IMPL_X32( + rt_sigaction, + [](FEXCore::Core::CpuStateFrame* Frame, int signum, const GuestSigAction_32* act, GuestSigAction_32* oldact, size_t sigsetsize) -> uint64_t { + if (sigsetsize != 8) { + return -EINVAL; + } - return Result; - }); + GuestSigAction* act64_p {}; + GuestSigAction* old64_p {}; - REGISTER_SYSCALL_IMPL_X32(rt_sigtimedwait, [](FEXCore::Core::CpuStateFrame *Frame, uint64_t *set, compat_ptr info, const struct timespec32* timeout, size_t sigsetsize) -> uint64_t { - struct timespec* timeout_ptr{}; - struct timespec tp64{}; - if (timeout) { - tp64 = *timeout; - timeout_ptr = &tp64; - } + GuestSigAction act64 {}; + if (act) { + act64 = *act; + act64_p = &act64; + } + GuestSigAction old64 {}; - siginfo_t HostInfo{}; - uint64_t Result = FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigTimedWait(set, &HostInfo, timeout_ptr, sigsetsize); - if (Result != -1) { - // We need to translate the 64-bit siginfo_t to 32-bit siginfo_t - CopySigInfo(info, HostInfo); - } - return Result; - }); + if (oldact) { + old64_p = &old64; + } + uint64_t Result = FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSignalHandler(signum, act64_p, old64_p); + if (Result == 0 && oldact) { + *oldact = old64; + } - REGISTER_SYSCALL_IMPL_X32(rt_sigtimedwait_time64, [](FEXCore::Core::CpuStateFrame *Frame, uint64_t *set, compat_ptr info, const struct timespec* timeout, size_t sigsetsize) -> uint64_t { - siginfo_t HostInfo{}; - uint64_t Result = FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigTimedWait(set, &HostInfo, timeout, sigsetsize); - if (Result != -1) { - // We need to translate the 64-bit siginfo_t to 32-bit siginfo_t - CopySigInfo(info, HostInfo); - } - return Result; - }); + return Result; + }); + + REGISTER_SYSCALL_IMPL_X32(rt_sigtimedwait, + [](FEXCore::Core::CpuStateFrame* Frame, uint64_t* set, compat_ptr info, + const struct timespec32* timeout, size_t sigsetsize) -> uint64_t { + struct timespec* timeout_ptr {}; + struct timespec tp64 {}; + if (timeout) { + tp64 = *timeout; + timeout_ptr = &tp64; + } - if (Handler->IsHostKernelVersionAtLeast(5, 1, 0)) { - REGISTER_SYSCALL_IMPL_X32(pidfd_send_signal, [](FEXCore::Core::CpuStateFrame *Frame, int pidfd, int sig, compat_ptr info, unsigned int flags) -> uint64_t { - siginfo_t *InfoHost_ptr{}; - siginfo_t InfoHost{}; - if (info) { - InfoHost = *info; - InfoHost_ptr = &InfoHost; - } - - uint64_t Result = ::syscall(SYSCALL_DEF(pidfd_send_signal), pidfd, sig, InfoHost_ptr, flags); - SYSCALL_ERRNO(); - }); + siginfo_t HostInfo {}; + uint64_t Result = FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigTimedWait(set, &HostInfo, timeout_ptr, sigsetsize); + if (Result != -1) { + // We need to translate the 64-bit siginfo_t to 32-bit siginfo_t + CopySigInfo(info, HostInfo); } - else { - REGISTER_SYSCALL_IMPL_X32(pidfd_send_signal, UnimplementedSyscallSafe); + return Result; + }); + + + REGISTER_SYSCALL_IMPL_X32(rt_sigtimedwait_time64, + [](FEXCore::Core::CpuStateFrame* Frame, uint64_t* set, compat_ptr info, + const struct timespec* timeout, size_t sigsetsize) -> uint64_t { + siginfo_t HostInfo {}; + uint64_t Result = FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigTimedWait(set, &HostInfo, timeout, sigsetsize); + if (Result != -1) { + // We need to translate the 64-bit siginfo_t to 32-bit siginfo_t + CopySigInfo(info, HostInfo); } - - REGISTER_SYSCALL_IMPL_X32_PASS(rt_sigqueueinfo, [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, int sig, compat_ptr info) -> uint64_t { - siginfo_t info64{}; - siginfo_t *info64_p{}; - + return Result; + }); + + if (Handler->IsHostKernelVersionAtLeast(5, 1, 0)) { + REGISTER_SYSCALL_IMPL_X32( + pidfd_send_signal, + [](FEXCore::Core::CpuStateFrame* Frame, int pidfd, int sig, compat_ptr info, unsigned int flags) -> uint64_t { + siginfo_t* InfoHost_ptr {}; + siginfo_t InfoHost {}; if (info) { - info64_p = &info64; + InfoHost = *info; + InfoHost_ptr = &InfoHost; } - uint64_t Result = ::syscall(SYSCALL_DEF(rt_sigqueueinfo), pid, sig, info64_p); + uint64_t Result = ::syscall(SYSCALL_DEF(pidfd_send_signal), pidfd, sig, InfoHost_ptr, flags); SYSCALL_ERRNO(); }); + } else { + REGISTER_SYSCALL_IMPL_X32(pidfd_send_signal, UnimplementedSyscallSafe); + } - REGISTER_SYSCALL_IMPL_X32_PASS(rt_tgsigqueueinfo, [](FEXCore::Core::CpuStateFrame *Frame, pid_t tgid, pid_t tid, int sig, compat_ptr info) -> uint64_t { - siginfo_t info64{}; - siginfo_t *info64_p{}; + REGISTER_SYSCALL_IMPL_X32_PASS( + rt_sigqueueinfo, [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, int sig, compat_ptr info) -> uint64_t { + siginfo_t info64 {}; + siginfo_t* info64_p {}; - if (info) { - info64_p = &info64; - } + if (info) { + info64_p = &info64; + } - uint64_t Result = ::syscall(SYSCALL_DEF(rt_tgsigqueueinfo), tgid, tid, sig, info64_p); - SYSCALL_ERRNO(); - }); - } + uint64_t Result = ::syscall(SYSCALL_DEF(rt_sigqueueinfo), pid, sig, info64_p); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS( + rt_tgsigqueueinfo, [](FEXCore::Core::CpuStateFrame* Frame, pid_t tgid, pid_t tid, int sig, compat_ptr info) -> uint64_t { + siginfo_t info64 {}; + siginfo_t* info64_p {}; + + if (info) { + info64_p = &info64; + } + + uint64_t Result = ::syscall(SYSCALL_DEF(rt_tgsigqueueinfo), tgid, tid, sig, info64_p); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Socket.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Socket.cpp index a4e2d1a8dc..fd2cdcf0d2 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Socket.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Socket.cpp @@ -26,7 +26,7 @@ ARG_TO_STR(FEX::HLE::x32::compat_ptr, "%lx") ARG_TO_STR(FEX::HLE::x32::compat_ptr, "%lx") namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } namespace FEX::HLE::x32 { @@ -111,183 +111,116 @@ namespace FEX::HLE::x32 { #define SO_PEERPIDFD 77 #endif - enum SockOp { - OP_SOCKET = 1, - OP_BIND = 2, - OP_CONNECT = 3, - OP_LISTEN = 4, - OP_ACCEPT = 5, - OP_GETSOCKNAME = 6, - OP_GETPEERNAME = 7, - OP_SOCKETPAIR = 8, - OP_SEND = 9, - OP_RECV = 10, - OP_SENDTO = 11, - OP_RECVFROM = 12, - OP_SHUTDOWN = 13, - OP_SETSOCKOPT = 14, - OP_GETSOCKOPT = 15, - OP_SENDMSG = 16, - OP_RECVMSG = 17, - OP_ACCEPT4 = 18, - OP_RECVMMSG = 19, - OP_SENDMMSG = 20, - }; - - static uint64_t SendMsg(int sockfd, const struct msghdr32 *msg, int flags) { - struct msghdr HostHeader{}; - fextl::vector Host_iovec(msg->msg_iovlen); - for (size_t i = 0; i < msg->msg_iovlen; ++i) { - Host_iovec[i] = msg->msg_iov[i]; - } +enum SockOp { + OP_SOCKET = 1, + OP_BIND = 2, + OP_CONNECT = 3, + OP_LISTEN = 4, + OP_ACCEPT = 5, + OP_GETSOCKNAME = 6, + OP_GETPEERNAME = 7, + OP_SOCKETPAIR = 8, + OP_SEND = 9, + OP_RECV = 10, + OP_SENDTO = 11, + OP_RECVFROM = 12, + OP_SHUTDOWN = 13, + OP_SETSOCKOPT = 14, + OP_GETSOCKOPT = 15, + OP_SENDMSG = 16, + OP_RECVMSG = 17, + OP_ACCEPT4 = 18, + OP_RECVMMSG = 19, + OP_SENDMMSG = 20, +}; + +static uint64_t SendMsg(int sockfd, const struct msghdr32* msg, int flags) { + struct msghdr HostHeader {}; + fextl::vector Host_iovec(msg->msg_iovlen); + for (size_t i = 0; i < msg->msg_iovlen; ++i) { + Host_iovec[i] = msg->msg_iov[i]; + } - HostHeader.msg_name = msg->msg_name; - HostHeader.msg_namelen = msg->msg_namelen; + HostHeader.msg_name = msg->msg_name; + HostHeader.msg_namelen = msg->msg_namelen; - HostHeader.msg_iov = Host_iovec.data(); - HostHeader.msg_iovlen = msg->msg_iovlen; + HostHeader.msg_iov = Host_iovec.data(); + HostHeader.msg_iovlen = msg->msg_iovlen; - HostHeader.msg_control = alloca(msg->msg_controllen * 2); - HostHeader.msg_controllen = msg->msg_controllen; + HostHeader.msg_control = alloca(msg->msg_controllen * 2); + HostHeader.msg_controllen = msg->msg_controllen; - HostHeader.msg_flags = msg->msg_flags; - if (HostHeader.msg_controllen) { - void *CurrentGuestPtr = msg->msg_control; - struct cmsghdr *CurrentHost = reinterpret_cast(HostHeader.msg_control); + HostHeader.msg_flags = msg->msg_flags; + if (HostHeader.msg_controllen) { + void* CurrentGuestPtr = msg->msg_control; + struct cmsghdr* CurrentHost = reinterpret_cast(HostHeader.msg_control); - for (cmsghdr32 *msghdr_guest = reinterpret_cast(CurrentGuestPtr); - CurrentGuestPtr != 0; - msghdr_guest = reinterpret_cast(CurrentGuestPtr)) { + for (cmsghdr32* msghdr_guest = reinterpret_cast(CurrentGuestPtr); CurrentGuestPtr != 0; + msghdr_guest = reinterpret_cast(CurrentGuestPtr)) { - CurrentHost->cmsg_level = msghdr_guest->cmsg_level; - CurrentHost->cmsg_type = msghdr_guest->cmsg_type; + CurrentHost->cmsg_level = msghdr_guest->cmsg_level; + CurrentHost->cmsg_type = msghdr_guest->cmsg_type; - if (msghdr_guest->cmsg_len) { - size_t SizeIncrease = (CMSG_LEN(0) - sizeof(cmsghdr32)); - CurrentHost->cmsg_len = msghdr_guest->cmsg_len + SizeIncrease; - HostHeader.msg_controllen += SizeIncrease; - memcpy(CMSG_DATA(CurrentHost), msghdr_guest->cmsg_data, msghdr_guest->cmsg_len - sizeof(cmsghdr32)); - } + if (msghdr_guest->cmsg_len) { + size_t SizeIncrease = (CMSG_LEN(0) - sizeof(cmsghdr32)); + CurrentHost->cmsg_len = msghdr_guest->cmsg_len + SizeIncrease; + HostHeader.msg_controllen += SizeIncrease; + memcpy(CMSG_DATA(CurrentHost), msghdr_guest->cmsg_data, msghdr_guest->cmsg_len - sizeof(cmsghdr32)); + } - // Go to next host - CurrentHost = CMSG_NXTHDR(&HostHeader, CurrentHost); + // Go to next host + CurrentHost = CMSG_NXTHDR(&HostHeader, CurrentHost); - // Go to next msg - if (msghdr_guest->cmsg_len < sizeof(cmsghdr32)) { + // Go to next msg + if (msghdr_guest->cmsg_len < sizeof(cmsghdr32)) { + CurrentGuestPtr = nullptr; + } else { + CurrentGuestPtr = reinterpret_cast(reinterpret_cast(CurrentGuestPtr) + msghdr_guest->cmsg_len); + CurrentGuestPtr = reinterpret_cast((reinterpret_cast(CurrentGuestPtr) + 3) & ~3ULL); + if (CurrentGuestPtr >= reinterpret_cast(reinterpret_cast(static_cast(msg->msg_control)) + msg->msg_controllen)) { CurrentGuestPtr = nullptr; } - else { - CurrentGuestPtr = reinterpret_cast(reinterpret_cast(CurrentGuestPtr) + msghdr_guest->cmsg_len); - CurrentGuestPtr = reinterpret_cast((reinterpret_cast(CurrentGuestPtr) + 3) & ~3ULL); - if (CurrentGuestPtr >= reinterpret_cast(reinterpret_cast(static_cast(msg->msg_control)) + msg->msg_controllen)) { - CurrentGuestPtr = nullptr; - } - } } } - - uint64_t Result = ::sendmsg(sockfd, &HostHeader, flags); - SYSCALL_ERRNO(); } - static uint64_t RecvMsg(int sockfd, struct msghdr32 *msg, int flags) { - struct msghdr HostHeader{}; - fextl::vector Host_iovec(msg->msg_iovlen); - for (size_t i = 0; i < msg->msg_iovlen; ++i) { - Host_iovec[i] = msg->msg_iov[i]; - } - - HostHeader.msg_name = msg->msg_name; - HostHeader.msg_namelen = msg->msg_namelen; - - HostHeader.msg_iov = Host_iovec.data(); - HostHeader.msg_iovlen = msg->msg_iovlen; - - HostHeader.msg_control = alloca(msg->msg_controllen*2); - HostHeader.msg_controllen = msg->msg_controllen*2; - - HostHeader.msg_flags = msg->msg_flags; - - uint64_t Result = ::recvmsg(sockfd, &HostHeader, flags); - if (Result != -1) { - for (size_t i = 0; i < msg->msg_iovlen; ++i) { - msg->msg_iov[i] = Host_iovec[i]; - } - - msg->msg_namelen = HostHeader.msg_namelen; - msg->msg_controllen = HostHeader.msg_controllen; - msg->msg_flags = HostHeader.msg_flags; - if (HostHeader.msg_controllen) { - // Host and guest cmsg data structures aren't compatible. - // Copy them over now - void *CurrentGuestPtr = msg->msg_control; - for (struct cmsghdr *cmsg = CMSG_FIRSTHDR(&HostHeader); - cmsg != nullptr; - cmsg = CMSG_NXTHDR(&HostHeader, cmsg)) { - cmsghdr32 *CurrentGuest = reinterpret_cast(CurrentGuestPtr); - - // Copy over the header first - // cmsg_len needs to be adjusted by the size of the header between host and guest - // Host is 16 bytes, guest is 12 bytes - CurrentGuest->cmsg_level = cmsg->cmsg_level; - CurrentGuest->cmsg_type = cmsg->cmsg_type; - - // Now copy over the data - if (cmsg->cmsg_len) { - size_t SizeIncrease = (CMSG_LEN(0) - sizeof(cmsghdr32)); - CurrentGuest->cmsg_len = cmsg->cmsg_len - SizeIncrease; - - // Controllen size also changes - msg->msg_controllen -= SizeIncrease; - - memcpy(CurrentGuest->cmsg_data, CMSG_DATA(cmsg), cmsg->cmsg_len - sizeof(struct cmsghdr)); - CurrentGuestPtr = reinterpret_cast(reinterpret_cast(CurrentGuestPtr) + CurrentGuest->cmsg_len); - CurrentGuestPtr = reinterpret_cast((reinterpret_cast(CurrentGuestPtr) + 3) & ~3ULL); + uint64_t Result = ::sendmsg(sockfd, &HostHeader, flags); + SYSCALL_ERRNO(); +} - } - } - } - } - SYSCALL_ERRNO(); +static uint64_t RecvMsg(int sockfd, struct msghdr32* msg, int flags) { + struct msghdr HostHeader {}; + fextl::vector Host_iovec(msg->msg_iovlen); + for (size_t i = 0; i < msg->msg_iovlen; ++i) { + Host_iovec[i] = msg->msg_iov[i]; } - void ConvertHeaderToHost(fextl::vector &iovec, struct msghdr *Host, const struct msghdr32 *Guest) { - size_t CurrentIOVecSize = iovec.size(); - iovec.resize(CurrentIOVecSize + Guest->msg_iovlen); - for (size_t i = 0; i < Guest->msg_iovlen; ++i) { - iovec[CurrentIOVecSize + i] = Guest->msg_iov[i]; - } + HostHeader.msg_name = msg->msg_name; + HostHeader.msg_namelen = msg->msg_namelen; - Host->msg_name = Guest->msg_name; - Host->msg_namelen = Guest->msg_namelen; + HostHeader.msg_iov = Host_iovec.data(); + HostHeader.msg_iovlen = msg->msg_iovlen; - Host->msg_iov = &iovec[CurrentIOVecSize]; - Host->msg_iovlen = Guest->msg_iovlen; + HostHeader.msg_control = alloca(msg->msg_controllen * 2); + HostHeader.msg_controllen = msg->msg_controllen * 2; - // XXX: This could result in a stack overflow - Host->msg_control = alloca(Guest->msg_controllen*2); - Host->msg_controllen = Guest->msg_controllen*2; + HostHeader.msg_flags = msg->msg_flags; - Host->msg_flags = Guest->msg_flags; - } - - void ConvertHeaderToGuest(struct msghdr32 *Guest, struct msghdr *Host) { - for (size_t i = 0; i < Guest->msg_iovlen; ++i) { - Guest->msg_iov[i] = Host->msg_iov[i]; + uint64_t Result = ::recvmsg(sockfd, &HostHeader, flags); + if (Result != -1) { + for (size_t i = 0; i < msg->msg_iovlen; ++i) { + msg->msg_iov[i] = Host_iovec[i]; } - Guest->msg_namelen = Host->msg_namelen; - Guest->msg_controllen = Host->msg_controllen; - Guest->msg_flags = Host->msg_flags; - - if (Host->msg_controllen) { + msg->msg_namelen = HostHeader.msg_namelen; + msg->msg_controllen = HostHeader.msg_controllen; + msg->msg_flags = HostHeader.msg_flags; + if (HostHeader.msg_controllen) { // Host and guest cmsg data structures aren't compatible. // Copy them over now - void *CurrentGuestPtr = Guest->msg_control; - for (struct cmsghdr *cmsg = CMSG_FIRSTHDR(Host); - cmsg != nullptr; - cmsg = CMSG_NXTHDR(Host, cmsg)) { - cmsghdr32 *CurrentGuest = reinterpret_cast(CurrentGuestPtr); + void* CurrentGuestPtr = msg->msg_control; + for (struct cmsghdr* cmsg = CMSG_FIRSTHDR(&HostHeader); cmsg != nullptr; cmsg = CMSG_NXTHDR(&HostHeader, cmsg)) { + cmsghdr32* CurrentGuest = reinterpret_cast(CurrentGuestPtr); // Copy over the header first // cmsg_len needs to be adjusted by the size of the header between host and guest @@ -301,7 +234,7 @@ namespace FEX::HLE::x32 { CurrentGuest->cmsg_len = cmsg->cmsg_len - SizeIncrease; // Controllen size also changes - Guest->msg_controllen -= SizeIncrease; + msg->msg_controllen -= SizeIncrease; memcpy(CurrentGuest->cmsg_data, CMSG_DATA(cmsg), cmsg->cmsg_len - sizeof(struct cmsghdr)); CurrentGuestPtr = reinterpret_cast(reinterpret_cast(CurrentGuestPtr) + CurrentGuest->cmsg_len); @@ -310,558 +243,547 @@ namespace FEX::HLE::x32 { } } } + SYSCALL_ERRNO(); +} + +void ConvertHeaderToHost(fextl::vector& iovec, struct msghdr* Host, const struct msghdr32* Guest) { + size_t CurrentIOVecSize = iovec.size(); + iovec.resize(CurrentIOVecSize + Guest->msg_iovlen); + for (size_t i = 0; i < Guest->msg_iovlen; ++i) { + iovec[CurrentIOVecSize + i] = Guest->msg_iov[i]; + } + + Host->msg_name = Guest->msg_name; + Host->msg_namelen = Guest->msg_namelen; + + Host->msg_iov = &iovec[CurrentIOVecSize]; + Host->msg_iovlen = Guest->msg_iovlen; + + // XXX: This could result in a stack overflow + Host->msg_control = alloca(Guest->msg_controllen * 2); + Host->msg_controllen = Guest->msg_controllen * 2; + + Host->msg_flags = Guest->msg_flags; +} + +void ConvertHeaderToGuest(struct msghdr32* Guest, struct msghdr* Host) { + for (size_t i = 0; i < Guest->msg_iovlen; ++i) { + Guest->msg_iov[i] = Host->msg_iov[i]; + } - static uint64_t RecvMMsg(int sockfd, compat_ptr msgvec, uint32_t vlen, int flags, struct timespec *timeout_ts) { - fextl::vector Host_iovec; - fextl::vector HostMHeader(vlen); - for (size_t i = 0; i < vlen; ++i) { - ConvertHeaderToHost(Host_iovec, &HostMHeader[i].msg_hdr, &msgvec[i].msg_hdr); - HostMHeader[i].msg_len = msgvec[i].msg_len; - } - uint64_t Result = ::recvmmsg(sockfd, HostMHeader.data(), vlen, flags, timeout_ts); - if (Result != -1) { - for (size_t i = 0; i < Result; ++i) { - ConvertHeaderToGuest(&msgvec[i].msg_hdr, &HostMHeader[i].msg_hdr); - msgvec[i].msg_len = HostMHeader[i].msg_len; + Guest->msg_namelen = Host->msg_namelen; + Guest->msg_controllen = Host->msg_controllen; + Guest->msg_flags = Host->msg_flags; + + if (Host->msg_controllen) { + // Host and guest cmsg data structures aren't compatible. + // Copy them over now + void* CurrentGuestPtr = Guest->msg_control; + for (struct cmsghdr* cmsg = CMSG_FIRSTHDR(Host); cmsg != nullptr; cmsg = CMSG_NXTHDR(Host, cmsg)) { + cmsghdr32* CurrentGuest = reinterpret_cast(CurrentGuestPtr); + + // Copy over the header first + // cmsg_len needs to be adjusted by the size of the header between host and guest + // Host is 16 bytes, guest is 12 bytes + CurrentGuest->cmsg_level = cmsg->cmsg_level; + CurrentGuest->cmsg_type = cmsg->cmsg_type; + + // Now copy over the data + if (cmsg->cmsg_len) { + size_t SizeIncrease = (CMSG_LEN(0) - sizeof(cmsghdr32)); + CurrentGuest->cmsg_len = cmsg->cmsg_len - SizeIncrease; + + // Controllen size also changes + Guest->msg_controllen -= SizeIncrease; + + memcpy(CurrentGuest->cmsg_data, CMSG_DATA(cmsg), cmsg->cmsg_len - sizeof(struct cmsghdr)); + CurrentGuestPtr = reinterpret_cast(reinterpret_cast(CurrentGuestPtr) + CurrentGuest->cmsg_len); + CurrentGuestPtr = reinterpret_cast((reinterpret_cast(CurrentGuestPtr) + 3) & ~3ULL); } } - SYSCALL_ERRNO(); } +} + +static uint64_t RecvMMsg(int sockfd, compat_ptr msgvec, uint32_t vlen, int flags, struct timespec* timeout_ts) { + fextl::vector Host_iovec; + fextl::vector HostMHeader(vlen); + for (size_t i = 0; i < vlen; ++i) { + ConvertHeaderToHost(Host_iovec, &HostMHeader[i].msg_hdr, &msgvec[i].msg_hdr); + HostMHeader[i].msg_len = msgvec[i].msg_len; + } + uint64_t Result = ::recvmmsg(sockfd, HostMHeader.data(), vlen, flags, timeout_ts); + if (Result != -1) { + for (size_t i = 0; i < Result; ++i) { + ConvertHeaderToGuest(&msgvec[i].msg_hdr, &HostMHeader[i].msg_hdr); + msgvec[i].msg_len = HostMHeader[i].msg_len; + } + } + SYSCALL_ERRNO(); +} - static uint64_t SendMMsg(int sockfd, compat_ptr msgvec, uint32_t vlen, int flags) { - fextl::vector Host_iovec; - fextl::vector HostMmsg(vlen); +static uint64_t SendMMsg(int sockfd, compat_ptr msgvec, uint32_t vlen, int flags) { + fextl::vector Host_iovec; + fextl::vector HostMmsg(vlen); - // Walk the iovec and convert them - // Calculate controllen at the same time - size_t Controllen_size{}; - for (size_t i = 0; i < vlen; ++i) { - msghdr32 &guest = msgvec[i].msg_hdr; + // Walk the iovec and convert them + // Calculate controllen at the same time + size_t Controllen_size {}; + for (size_t i = 0; i < vlen; ++i) { + msghdr32& guest = msgvec[i].msg_hdr; - Controllen_size += guest.msg_controllen * 2; - for (size_t j = 0; j < guest.msg_iovlen; ++j) { - iovec guest_iov = guest.msg_iov[j]; - Host_iovec.emplace_back(guest_iov); - } + Controllen_size += guest.msg_controllen * 2; + for (size_t j = 0; j < guest.msg_iovlen; ++j) { + iovec guest_iov = guest.msg_iov[j]; + Host_iovec.emplace_back(guest_iov); } + } - fextl::vector Controllen(Controllen_size); + fextl::vector Controllen(Controllen_size); - size_t current_iov{}; - size_t current_controllen_offset{}; - for (size_t i = 0; i < vlen; ++i) { - msghdr32 &guest = msgvec[i].msg_hdr; - struct msghdr &msg = HostMmsg[i].msg_hdr; - msg.msg_name = guest.msg_name; - msg.msg_namelen = guest.msg_namelen; + size_t current_iov {}; + size_t current_controllen_offset {}; + for (size_t i = 0; i < vlen; ++i) { + msghdr32& guest = msgvec[i].msg_hdr; + struct msghdr& msg = HostMmsg[i].msg_hdr; + msg.msg_name = guest.msg_name; + msg.msg_namelen = guest.msg_namelen; - msg.msg_iov = &Host_iovec.at(current_iov); - msg.msg_iovlen = guest.msg_iovlen; - current_iov += msg.msg_iovlen; + msg.msg_iov = &Host_iovec.at(current_iov); + msg.msg_iovlen = guest.msg_iovlen; + current_iov += msg.msg_iovlen; - if (guest.msg_controllen) { - msg.msg_control = &Controllen.at(current_controllen_offset); - current_controllen_offset += guest.msg_controllen * 2; - } - msg.msg_controllen = guest.msg_controllen; + if (guest.msg_controllen) { + msg.msg_control = &Controllen.at(current_controllen_offset); + current_controllen_offset += guest.msg_controllen * 2; + } + msg.msg_controllen = guest.msg_controllen; - msg.msg_flags = guest.msg_flags; + msg.msg_flags = guest.msg_flags; - if (msg.msg_controllen) { - void *CurrentGuestPtr = guest.msg_control; - struct cmsghdr *CurrentHost = reinterpret_cast(msg.msg_control); + if (msg.msg_controllen) { + void* CurrentGuestPtr = guest.msg_control; + struct cmsghdr* CurrentHost = reinterpret_cast(msg.msg_control); - for (cmsghdr32 *msghdr_guest = reinterpret_cast(CurrentGuestPtr); - CurrentGuestPtr != 0; - msghdr_guest = reinterpret_cast(CurrentGuestPtr)) { + for (cmsghdr32* msghdr_guest = reinterpret_cast(CurrentGuestPtr); CurrentGuestPtr != 0; + msghdr_guest = reinterpret_cast(CurrentGuestPtr)) { - CurrentHost->cmsg_level = msghdr_guest->cmsg_level; - CurrentHost->cmsg_type = msghdr_guest->cmsg_type; + CurrentHost->cmsg_level = msghdr_guest->cmsg_level; + CurrentHost->cmsg_type = msghdr_guest->cmsg_type; - if (msghdr_guest->cmsg_len) { - size_t SizeIncrease = (CMSG_LEN(0) - sizeof(cmsghdr32)); - CurrentHost->cmsg_len = msghdr_guest->cmsg_len + SizeIncrease; - msg.msg_controllen += SizeIncrease; - memcpy(CMSG_DATA(CurrentHost), msghdr_guest->cmsg_data, msghdr_guest->cmsg_len - sizeof(cmsghdr32)); - } + if (msghdr_guest->cmsg_len) { + size_t SizeIncrease = (CMSG_LEN(0) - sizeof(cmsghdr32)); + CurrentHost->cmsg_len = msghdr_guest->cmsg_len + SizeIncrease; + msg.msg_controllen += SizeIncrease; + memcpy(CMSG_DATA(CurrentHost), msghdr_guest->cmsg_data, msghdr_guest->cmsg_len - sizeof(cmsghdr32)); + } - // Go to next host - CurrentHost = CMSG_NXTHDR(&msg, CurrentHost); + // Go to next host + CurrentHost = CMSG_NXTHDR(&msg, CurrentHost); - // Go to next msg - if (msghdr_guest->cmsg_len < sizeof(cmsghdr32)) { + // Go to next msg + if (msghdr_guest->cmsg_len < sizeof(cmsghdr32)) { + CurrentGuestPtr = nullptr; + } else { + CurrentGuestPtr = reinterpret_cast(reinterpret_cast(CurrentGuestPtr) + msghdr_guest->cmsg_len); + CurrentGuestPtr = reinterpret_cast((reinterpret_cast(CurrentGuestPtr) + 3) & ~3ULL); + if (CurrentGuestPtr >= reinterpret_cast(reinterpret_cast(static_cast(guest.msg_control)) + guest.msg_controllen)) { CurrentGuestPtr = nullptr; } - else { - CurrentGuestPtr = reinterpret_cast(reinterpret_cast(CurrentGuestPtr) + msghdr_guest->cmsg_len); - CurrentGuestPtr = reinterpret_cast((reinterpret_cast(CurrentGuestPtr) + 3) & ~3ULL); - if (CurrentGuestPtr >= reinterpret_cast(reinterpret_cast(static_cast(guest.msg_control)) + guest.msg_controllen)) { - CurrentGuestPtr = nullptr; - } - } } } - - HostMmsg[i].msg_len = msgvec[i].msg_len; } - uint64_t Result = ::sendmmsg(sockfd, HostMmsg.data(), vlen, flags); - - if (Result != -1) { - // Update guest msglen - for (size_t i = 0; i < Result; ++i) { - msgvec[i].msg_len = HostMmsg[i].msg_len; - } - } - SYSCALL_ERRNO(); + HostMmsg[i].msg_len = msgvec[i].msg_len; } - static uint64_t SetSockOpt(int sockfd, int level, int optname, compat_ptr optval, int optlen) { - uint64_t Result{}; - - if (level == SOL_SOCKET) { - switch (optname) { - case SO_ATTACH_FILTER: - case SO_ATTACH_REUSEPORT_CBPF: { - struct sock_fprog32 { - uint16_t len; - uint32_t filter; - }; - struct sock_fprog64 { - uint16_t len; - uint64_t filter; - }; - - if (optlen != sizeof(sock_fprog32)) { - return -EINVAL; - } + uint64_t Result = ::sendmmsg(sockfd, HostMmsg.data(), vlen, flags); - sock_fprog32 *prog = reinterpret_cast(optval.Ptr); - sock_fprog64 prog64{}; - prog64.len = prog->len; - prog64.filter = prog->filter; - - Result = ::syscall(SYSCALL_DEF(setsockopt), - sockfd, - level, - optname, - &prog64, - sizeof(sock_fprog64) - ); - break; - } - case SO_RCVTIMEO_OLD: { - // _OLD uses old_timeval32. Needs to be converted - struct timeval tv64 = *reinterpret_cast(optval.Ptr); - Result = ::syscall(SYSCALL_DEF(setsockopt), - sockfd, - level, - SO_RCVTIMEO_NEW, - &tv64, - sizeof(tv64) - ); - break; - } - case SO_SNDTIMEO_OLD: { - // _OLD uses old_timeval32. Needs to be converted - struct timeval tv64 = *reinterpret_cast(optval.Ptr); - Result = ::syscall(SYSCALL_DEF(setsockopt), - sockfd, - level, - SO_SNDTIMEO_NEW, - &tv64, - sizeof(tv64) - ); - break; - } - // Each optname as a reminder which setting has been manually checked - case SO_DEBUG: - case SO_REUSEADDR: - case SO_TYPE: - case SO_ERROR: - case SO_DONTROUTE: - case SO_BROADCAST: - case SO_SNDBUF: - case SO_RCVBUF: - case SO_SNDBUFFORCE: - case SO_RCVBUFFORCE: - case SO_KEEPALIVE: - case SO_OOBINLINE: - case SO_NO_CHECK: - case SO_PRIORITY: - case SO_LINGER: - case SO_BSDCOMPAT: - case SO_REUSEPORT: - /** - * @name These end up differing between {x86,arm} and {powerpc, alpha, sparc, mips, parisc} - * @{ */ - case SO_PASSCRED: - case SO_PEERCRED: - case SO_RCVLOWAT: - case SO_SNDLOWAT: - /** @} */ - case SO_SECURITY_AUTHENTICATION: - case SO_SECURITY_ENCRYPTION_TRANSPORT: - case SO_SECURITY_ENCRYPTION_NETWORK: - case SO_DETACH_FILTER: - case SO_PEERNAME: - case SO_TIMESTAMP_OLD: // Returns int32_t boolean - case SO_ACCEPTCONN: - case SO_PEERSEC: - // Gap 32, 33 - case SO_PASSSEC: - case SO_TIMESTAMPNS_OLD: // Returns int32_t boolean - case SO_MARK: - case SO_TIMESTAMPING_OLD: // Returns so_timestamping - case SO_PROTOCOL: - case SO_DOMAIN: - case SO_RXQ_OVFL: - case SO_WIFI_STATUS: - case SO_PEEK_OFF: - case SO_NOFCS: - case SO_LOCK_FILTER: - case SO_SELECT_ERR_QUEUE: - case SO_BUSY_POLL: - case SO_MAX_PACING_RATE: - case SO_BPF_EXTENSIONS: - case SO_INCOMING_CPU: - case SO_ATTACH_BPF: - case SO_ATTACH_REUSEPORT_EBPF: - case SO_CNX_ADVICE: - // Gap 54 (SCM_TIMESTAMPING_OPT_STATS) - case SO_MEMINFO: - case SO_INCOMING_NAPI_ID: - case SO_COOKIE: // Cookie always returns 64-bit even on 32-bit - // Gap 58 (SCM_TIMESTAMPING_PKTINFO) - case SO_PEERGROUPS: - case SO_ZEROCOPY: - case SO_TXTIME: - case SO_BINDTOIFINDEX: - case SO_TIMESTAMP_NEW: - case SO_TIMESTAMPNS_NEW: - case SO_TIMESTAMPING_NEW: - case SO_RCVTIMEO_NEW: - case SO_SNDTIMEO_NEW: - case SO_DETACH_REUSEPORT_BPF: - case SO_PREFER_BUSY_POLL: - case SO_BUSY_POLL_BUDGET: - case SO_NETNS_COOKIE: // Cookie always returns 64-bit even on 32-bit - case SO_BUF_LOCK: - case SO_RESERVE_MEM: - case SO_TXREHASH: - case SO_RCVMARK: - case SO_PASSPIDFD: - case SO_PEERPIDFD: - default: - Result = ::syscall(SYSCALL_DEF(setsockopt), - sockfd, - level, - optname, - reinterpret_cast(optval.Ptr), - optlen - ); - break; - } + if (Result != -1) { + // Update guest msglen + for (size_t i = 0; i < Result; ++i) { + msgvec[i].msg_len = HostMmsg[i].msg_len; } - else { - Result = ::syscall(SYSCALL_DEF(setsockopt), - sockfd, - level, - optname, - reinterpret_cast(optval.Ptr), - optlen - ); - } - - SYSCALL_ERRNO(); } + SYSCALL_ERRNO(); +} - static uint64_t GetSockOpt(int sockfd, int level, int optname, compat_ptr optval, compat_ptr optlen) { - uint64_t Result{}; - if (level == SOL_SOCKET) { - switch (optname) { - case SO_RCVTIMEO_OLD: { - // _OLD uses old_timeval32. Needs to be converted - struct timeval tv64{}; - Result = ::syscall(SYSCALL_DEF(getsockopt), - sockfd, - level, - SO_RCVTIMEO_NEW, - &tv64, - sizeof(tv64) - ); - *reinterpret_cast(optval.Ptr) = tv64; - break; - } - case SO_SNDTIMEO_OLD: { - // _OLD uses old_timeval32. Needs to be converted - struct timeval tv64{}; - Result = ::syscall(SYSCALL_DEF(getsockopt), - sockfd, - level, - SO_SNDTIMEO_NEW, - &tv64, - sizeof(tv64) - ); - *reinterpret_cast(optval.Ptr) = tv64; - break; - } - // Each optname as a reminder which setting has been manually checked - case SO_DEBUG: - case SO_REUSEADDR: - case SO_TYPE: - case SO_ERROR: - case SO_DONTROUTE: - case SO_BROADCAST: - case SO_SNDBUF: - case SO_RCVBUF: - case SO_SNDBUFFORCE: - case SO_RCVBUFFORCE: - case SO_KEEPALIVE: - case SO_OOBINLINE: - case SO_NO_CHECK: - case SO_PRIORITY: - case SO_LINGER: - case SO_BSDCOMPAT: - case SO_REUSEPORT: - /** - * @name These end up differing between {x86,arm} and {powerpc, alpha, sparc, mips, parisc} - * @{ */ - case SO_PASSCRED: - case SO_PEERCRED: - case SO_RCVLOWAT: - case SO_SNDLOWAT: - /** @} */ - case SO_SECURITY_AUTHENTICATION: - case SO_SECURITY_ENCRYPTION_TRANSPORT: - case SO_SECURITY_ENCRYPTION_NETWORK: - case SO_ATTACH_FILTER: // Renamed to SO_GET_FILTER on get. Same between 32-bit and 64-bit - case SO_DETACH_FILTER: - case SO_PEERNAME: - case SO_TIMESTAMP_OLD: // Returns int32_t boolean - case SO_ACCEPTCONN: - case SO_PEERSEC: - // Gap 32, 33 - case SO_PASSSEC: - case SO_TIMESTAMPNS_OLD: // Returns int32_t boolean - case SO_MARK: - case SO_TIMESTAMPING_OLD: // Returns so_timestamping - case SO_PROTOCOL: - case SO_DOMAIN: - case SO_RXQ_OVFL: - case SO_WIFI_STATUS: - case SO_PEEK_OFF: - case SO_NOFCS: - case SO_LOCK_FILTER: - case SO_SELECT_ERR_QUEUE: - case SO_BUSY_POLL: - case SO_MAX_PACING_RATE: - case SO_BPF_EXTENSIONS: - case SO_INCOMING_CPU: - case SO_ATTACH_BPF: - case SO_ATTACH_REUSEPORT_CBPF: // Doesn't do anything in get - case SO_ATTACH_REUSEPORT_EBPF: - case SO_CNX_ADVICE: - // Gap 54 (SCM_TIMESTAMPING_OPT_STATS) - case SO_MEMINFO: - case SO_INCOMING_NAPI_ID: - case SO_COOKIE: // Cookie always returns 64-bit even on 32-bit - // Gap 58 (SCM_TIMESTAMPING_PKTINFO) - case SO_PEERGROUPS: - case SO_ZEROCOPY: - case SO_TXTIME: - case SO_BINDTOIFINDEX: - case SO_TIMESTAMP_NEW: - case SO_TIMESTAMPNS_NEW: - case SO_TIMESTAMPING_NEW: - case SO_RCVTIMEO_NEW: - case SO_SNDTIMEO_NEW: - case SO_DETACH_REUSEPORT_BPF: - case SO_PREFER_BUSY_POLL: - case SO_BUSY_POLL_BUDGET: - case SO_NETNS_COOKIE: // Cookie always returns 64-bit even on 32-bit - case SO_BUF_LOCK: - case SO_RESERVE_MEM: - default: - Result = ::syscall(SYSCALL_DEF(getsockopt), sockfd, level, optname, optval, optlen); - break; +static uint64_t SetSockOpt(int sockfd, int level, int optname, compat_ptr optval, int optlen) { + uint64_t Result {}; + + if (level == SOL_SOCKET) { + switch (optname) { + case SO_ATTACH_FILTER: + case SO_ATTACH_REUSEPORT_CBPF: { + struct sock_fprog32 { + uint16_t len; + uint32_t filter; + }; + struct sock_fprog64 { + uint16_t len; + uint64_t filter; + }; + + if (optlen != sizeof(sock_fprog32)) { + return -EINVAL; } + + sock_fprog32* prog = reinterpret_cast(optval.Ptr); + sock_fprog64 prog64 {}; + prog64.len = prog->len; + prog64.filter = prog->filter; + + Result = ::syscall(SYSCALL_DEF(setsockopt), sockfd, level, optname, &prog64, sizeof(sock_fprog64)); + break; } - else { - Result = ::syscall(SYSCALL_DEF(getsockopt), sockfd, level, optname, optval, optlen); + case SO_RCVTIMEO_OLD: { + // _OLD uses old_timeval32. Needs to be converted + struct timeval tv64 = *reinterpret_cast(optval.Ptr); + Result = ::syscall(SYSCALL_DEF(setsockopt), sockfd, level, SO_RCVTIMEO_NEW, &tv64, sizeof(tv64)); + break; } - SYSCALL_ERRNO(); + case SO_SNDTIMEO_OLD: { + // _OLD uses old_timeval32. Needs to be converted + struct timeval tv64 = *reinterpret_cast(optval.Ptr); + Result = ::syscall(SYSCALL_DEF(setsockopt), sockfd, level, SO_SNDTIMEO_NEW, &tv64, sizeof(tv64)); + break; + } + // Each optname as a reminder which setting has been manually checked + case SO_DEBUG: + case SO_REUSEADDR: + case SO_TYPE: + case SO_ERROR: + case SO_DONTROUTE: + case SO_BROADCAST: + case SO_SNDBUF: + case SO_RCVBUF: + case SO_SNDBUFFORCE: + case SO_RCVBUFFORCE: + case SO_KEEPALIVE: + case SO_OOBINLINE: + case SO_NO_CHECK: + case SO_PRIORITY: + case SO_LINGER: + case SO_BSDCOMPAT: + case SO_REUSEPORT: + /** + * @name These end up differing between {x86,arm} and {powerpc, alpha, sparc, mips, parisc} + * @{ */ + case SO_PASSCRED: + case SO_PEERCRED: + case SO_RCVLOWAT: + case SO_SNDLOWAT: + /** @} */ + case SO_SECURITY_AUTHENTICATION: + case SO_SECURITY_ENCRYPTION_TRANSPORT: + case SO_SECURITY_ENCRYPTION_NETWORK: + case SO_DETACH_FILTER: + case SO_PEERNAME: + case SO_TIMESTAMP_OLD: // Returns int32_t boolean + case SO_ACCEPTCONN: + case SO_PEERSEC: + // Gap 32, 33 + case SO_PASSSEC: + case SO_TIMESTAMPNS_OLD: // Returns int32_t boolean + case SO_MARK: + case SO_TIMESTAMPING_OLD: // Returns so_timestamping + case SO_PROTOCOL: + case SO_DOMAIN: + case SO_RXQ_OVFL: + case SO_WIFI_STATUS: + case SO_PEEK_OFF: + case SO_NOFCS: + case SO_LOCK_FILTER: + case SO_SELECT_ERR_QUEUE: + case SO_BUSY_POLL: + case SO_MAX_PACING_RATE: + case SO_BPF_EXTENSIONS: + case SO_INCOMING_CPU: + case SO_ATTACH_BPF: + case SO_ATTACH_REUSEPORT_EBPF: + case SO_CNX_ADVICE: + // Gap 54 (SCM_TIMESTAMPING_OPT_STATS) + case SO_MEMINFO: + case SO_INCOMING_NAPI_ID: + case SO_COOKIE: // Cookie always returns 64-bit even on 32-bit + // Gap 58 (SCM_TIMESTAMPING_PKTINFO) + case SO_PEERGROUPS: + case SO_ZEROCOPY: + case SO_TXTIME: + case SO_BINDTOIFINDEX: + case SO_TIMESTAMP_NEW: + case SO_TIMESTAMPNS_NEW: + case SO_TIMESTAMPING_NEW: + case SO_RCVTIMEO_NEW: + case SO_SNDTIMEO_NEW: + case SO_DETACH_REUSEPORT_BPF: + case SO_PREFER_BUSY_POLL: + case SO_BUSY_POLL_BUDGET: + case SO_NETNS_COOKIE: // Cookie always returns 64-bit even on 32-bit + case SO_BUF_LOCK: + case SO_RESERVE_MEM: + case SO_TXREHASH: + case SO_RCVMARK: + case SO_PASSPIDFD: + case SO_PEERPIDFD: + default: Result = ::syscall(SYSCALL_DEF(setsockopt), sockfd, level, optname, reinterpret_cast(optval.Ptr), optlen); break; + } + } else { + Result = ::syscall(SYSCALL_DEF(setsockopt), sockfd, level, optname, reinterpret_cast(optval.Ptr), optlen); } - void RegisterSocket(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X32(socketcall, [](FEXCore::Core::CpuStateFrame *Frame, uint32_t call, uint32_t *Arguments) -> uint64_t { - uint64_t Result{}; - - switch (call) { - case OP_SOCKET: { - Result = ::socket(Arguments[0], Arguments[1], Arguments[2]); - break; - } - case OP_BIND: { - Result = ::bind(Arguments[0], reinterpret_cast(Arguments[1]), Arguments[2]); - break; - } - case OP_CONNECT: { - Result = ::connect(Arguments[0], reinterpret_cast(Arguments[1]), Arguments[2]); - break; - } - case OP_LISTEN: { - Result = ::listen(Arguments[0], Arguments[1]); - break; - } - case OP_ACCEPT: { - Result = ::accept(Arguments[0], reinterpret_cast(Arguments[1]), reinterpret_cast(Arguments[2])); - break; - } - case OP_GETSOCKNAME: { - Result = ::getsockname(Arguments[0], reinterpret_cast(Arguments[1]), reinterpret_cast(Arguments[2])); - break; - } - case OP_GETPEERNAME: { - Result = ::getpeername(Arguments[0], reinterpret_cast(Arguments[1]), reinterpret_cast(Arguments[2])); - break; - } - case OP_SOCKETPAIR: { - Result = ::socketpair(Arguments[0], Arguments[1], Arguments[2], reinterpret_cast(Arguments[3])); - break; - } - case OP_SEND: { - Result = ::send(Arguments[0], reinterpret_cast(Arguments[1]), Arguments[2], Arguments[3]); - break; - } - case OP_RECV: { - Result = ::recv(Arguments[0], reinterpret_cast(Arguments[1]), Arguments[2], Arguments[3]); - break; - } - case OP_SENDTO: { - Result = ::sendto( - Arguments[0], - reinterpret_cast(Arguments[1]), - Arguments[2], - Arguments[3], - reinterpret_cast(Arguments[4]), reinterpret_cast(Arguments[5]) - ); - break; - } - case OP_RECVFROM: { - Result = ::recvfrom( - Arguments[0], - reinterpret_cast(Arguments[1]), - Arguments[2], - Arguments[3], - reinterpret_cast(Arguments[4]), reinterpret_cast(Arguments[5]) - ); - break; - } - case OP_SHUTDOWN: { - Result = ::shutdown(Arguments[0], Arguments[1]); - break; - } - case OP_SETSOCKOPT: { - return SetSockOpt( - Arguments[0], - Arguments[1], - Arguments[2], - Arguments[3], - reinterpret_cast(Arguments[4]) - ); - break; - } - case OP_GETSOCKOPT: { - return GetSockOpt( - Arguments[0], - Arguments[1], - Arguments[2], - reinterpret_cast(Arguments[3]), - reinterpret_cast(Arguments[4]) - ); - break; - } - case OP_SENDMSG: { - return SendMsg(Arguments[0], reinterpret_cast(Arguments[1]), Arguments[2]); - break; - } - case OP_RECVMSG: { - return RecvMsg(Arguments[0], reinterpret_cast(Arguments[1]), Arguments[2]); - break; - } - case OP_ACCEPT4: { - return ::accept4(Arguments[0], reinterpret_cast(Arguments[1]), reinterpret_cast(Arguments[2]), Arguments[3]); - break; - } - case OP_RECVMMSG: { - timespec32 *timeout_ts = reinterpret_cast(Arguments[4]); - struct timespec tp64{}; - struct timespec *timed_ptr{}; - if (timeout_ts) { - tp64 = *timeout_ts; - timed_ptr = &tp64; - } - - uint64_t Result = RecvMMsg(Arguments[0], Arguments[1], Arguments[2], Arguments[3], timed_ptr); - - if (timeout_ts) { - *timeout_ts = tp64; - } - - return Result; - break; - } - case OP_SENDMMSG: { - return SendMMsg(Arguments[0], reinterpret_cast(Arguments[1]), Arguments[2], Arguments[3]); - break; - } - default: - LOGMAN_MSG_A_FMT("Unsupported socketcall op: {}", call); - break; - } - SYSCALL_ERRNO(); - }); + SYSCALL_ERRNO(); +} - REGISTER_SYSCALL_IMPL_X32(sendmsg, [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, const struct msghdr32 *msg, int flags) -> uint64_t { - return SendMsg(sockfd, msg, flags); - }); +static uint64_t GetSockOpt(int sockfd, int level, int optname, compat_ptr optval, compat_ptr optlen) { + uint64_t Result {}; + if (level == SOL_SOCKET) { + switch (optname) { + case SO_RCVTIMEO_OLD: { + // _OLD uses old_timeval32. Needs to be converted + struct timeval tv64 {}; + Result = ::syscall(SYSCALL_DEF(getsockopt), sockfd, level, SO_RCVTIMEO_NEW, &tv64, sizeof(tv64)); + *reinterpret_cast(optval.Ptr) = tv64; + break; + } + case SO_SNDTIMEO_OLD: { + // _OLD uses old_timeval32. Needs to be converted + struct timeval tv64 {}; + Result = ::syscall(SYSCALL_DEF(getsockopt), sockfd, level, SO_SNDTIMEO_NEW, &tv64, sizeof(tv64)); + *reinterpret_cast(optval.Ptr) = tv64; + break; + } + // Each optname as a reminder which setting has been manually checked + case SO_DEBUG: + case SO_REUSEADDR: + case SO_TYPE: + case SO_ERROR: + case SO_DONTROUTE: + case SO_BROADCAST: + case SO_SNDBUF: + case SO_RCVBUF: + case SO_SNDBUFFORCE: + case SO_RCVBUFFORCE: + case SO_KEEPALIVE: + case SO_OOBINLINE: + case SO_NO_CHECK: + case SO_PRIORITY: + case SO_LINGER: + case SO_BSDCOMPAT: + case SO_REUSEPORT: + /** + * @name These end up differing between {x86,arm} and {powerpc, alpha, sparc, mips, parisc} + * @{ */ + case SO_PASSCRED: + case SO_PEERCRED: + case SO_RCVLOWAT: + case SO_SNDLOWAT: + /** @} */ + case SO_SECURITY_AUTHENTICATION: + case SO_SECURITY_ENCRYPTION_TRANSPORT: + case SO_SECURITY_ENCRYPTION_NETWORK: + case SO_ATTACH_FILTER: // Renamed to SO_GET_FILTER on get. Same between 32-bit and 64-bit + case SO_DETACH_FILTER: + case SO_PEERNAME: + case SO_TIMESTAMP_OLD: // Returns int32_t boolean + case SO_ACCEPTCONN: + case SO_PEERSEC: + // Gap 32, 33 + case SO_PASSSEC: + case SO_TIMESTAMPNS_OLD: // Returns int32_t boolean + case SO_MARK: + case SO_TIMESTAMPING_OLD: // Returns so_timestamping + case SO_PROTOCOL: + case SO_DOMAIN: + case SO_RXQ_OVFL: + case SO_WIFI_STATUS: + case SO_PEEK_OFF: + case SO_NOFCS: + case SO_LOCK_FILTER: + case SO_SELECT_ERR_QUEUE: + case SO_BUSY_POLL: + case SO_MAX_PACING_RATE: + case SO_BPF_EXTENSIONS: + case SO_INCOMING_CPU: + case SO_ATTACH_BPF: + case SO_ATTACH_REUSEPORT_CBPF: // Doesn't do anything in get + case SO_ATTACH_REUSEPORT_EBPF: + case SO_CNX_ADVICE: + // Gap 54 (SCM_TIMESTAMPING_OPT_STATS) + case SO_MEMINFO: + case SO_INCOMING_NAPI_ID: + case SO_COOKIE: // Cookie always returns 64-bit even on 32-bit + // Gap 58 (SCM_TIMESTAMPING_PKTINFO) + case SO_PEERGROUPS: + case SO_ZEROCOPY: + case SO_TXTIME: + case SO_BINDTOIFINDEX: + case SO_TIMESTAMP_NEW: + case SO_TIMESTAMPNS_NEW: + case SO_TIMESTAMPING_NEW: + case SO_RCVTIMEO_NEW: + case SO_SNDTIMEO_NEW: + case SO_DETACH_REUSEPORT_BPF: + case SO_PREFER_BUSY_POLL: + case SO_BUSY_POLL_BUDGET: + case SO_NETNS_COOKIE: // Cookie always returns 64-bit even on 32-bit + case SO_BUF_LOCK: + case SO_RESERVE_MEM: + default: Result = ::syscall(SYSCALL_DEF(getsockopt), sockfd, level, optname, optval, optlen); break; + } + } else { + Result = ::syscall(SYSCALL_DEF(getsockopt), sockfd, level, optname, optval, optlen); + } + SYSCALL_ERRNO(); +} - REGISTER_SYSCALL_IMPL_X32(sendmmsg, [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, compat_ptr msgvec, uint32_t vlen, int flags) -> uint64_t { - return SendMMsg(sockfd, msgvec, vlen, flags); - }); +void RegisterSocket(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X32(socketcall, [](FEXCore::Core::CpuStateFrame* Frame, uint32_t call, uint32_t* Arguments) -> uint64_t { + uint64_t Result {}; - REGISTER_SYSCALL_IMPL_X32(recvmmsg, [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, compat_ptr msgvec, uint32_t vlen, int flags, timespec32 *timeout_ts) -> uint64_t { - struct timespec tp64{}; - struct timespec *timed_ptr{}; + switch (call) { + case OP_SOCKET: { + Result = ::socket(Arguments[0], Arguments[1], Arguments[2]); + break; + } + case OP_BIND: { + Result = ::bind(Arguments[0], reinterpret_cast(Arguments[1]), Arguments[2]); + break; + } + case OP_CONNECT: { + Result = ::connect(Arguments[0], reinterpret_cast(Arguments[1]), Arguments[2]); + break; + } + case OP_LISTEN: { + Result = ::listen(Arguments[0], Arguments[1]); + break; + } + case OP_ACCEPT: { + Result = ::accept(Arguments[0], reinterpret_cast(Arguments[1]), reinterpret_cast(Arguments[2])); + break; + } + case OP_GETSOCKNAME: { + Result = ::getsockname(Arguments[0], reinterpret_cast(Arguments[1]), reinterpret_cast(Arguments[2])); + break; + } + case OP_GETPEERNAME: { + Result = ::getpeername(Arguments[0], reinterpret_cast(Arguments[1]), reinterpret_cast(Arguments[2])); + break; + } + case OP_SOCKETPAIR: { + Result = ::socketpair(Arguments[0], Arguments[1], Arguments[2], reinterpret_cast(Arguments[3])); + break; + } + case OP_SEND: { + Result = ::send(Arguments[0], reinterpret_cast(Arguments[1]), Arguments[2], Arguments[3]); + break; + } + case OP_RECV: { + Result = ::recv(Arguments[0], reinterpret_cast(Arguments[1]), Arguments[2], Arguments[3]); + break; + } + case OP_SENDTO: { + Result = ::sendto(Arguments[0], reinterpret_cast(Arguments[1]), Arguments[2], Arguments[3], + reinterpret_cast(Arguments[4]), reinterpret_cast(Arguments[5])); + break; + } + case OP_RECVFROM: { + Result = ::recvfrom(Arguments[0], reinterpret_cast(Arguments[1]), Arguments[2], Arguments[3], + reinterpret_cast(Arguments[4]), reinterpret_cast(Arguments[5])); + break; + } + case OP_SHUTDOWN: { + Result = ::shutdown(Arguments[0], Arguments[1]); + break; + } + case OP_SETSOCKOPT: { + return SetSockOpt(Arguments[0], Arguments[1], Arguments[2], Arguments[3], reinterpret_cast(Arguments[4])); + break; + } + case OP_GETSOCKOPT: { + return GetSockOpt(Arguments[0], Arguments[1], Arguments[2], reinterpret_cast(Arguments[3]), + reinterpret_cast(Arguments[4])); + break; + } + case OP_SENDMSG: { + return SendMsg(Arguments[0], reinterpret_cast(Arguments[1]), Arguments[2]); + break; + } + case OP_RECVMSG: { + return RecvMsg(Arguments[0], reinterpret_cast(Arguments[1]), Arguments[2]); + break; + } + case OP_ACCEPT4: { + return ::accept4(Arguments[0], reinterpret_cast(Arguments[1]), reinterpret_cast(Arguments[2]), Arguments[3]); + break; + } + case OP_RECVMMSG: { + timespec32* timeout_ts = reinterpret_cast(Arguments[4]); + struct timespec tp64 {}; + struct timespec* timed_ptr {}; if (timeout_ts) { tp64 = *timeout_ts; timed_ptr = &tp64; } - uint64_t Result = RecvMMsg(sockfd, msgvec, vlen, flags, timed_ptr); + uint64_t Result = RecvMMsg(Arguments[0], Arguments[1], Arguments[2], Arguments[3], timed_ptr); if (timeout_ts) { *timeout_ts = tp64; } return Result; - }); + break; + } + case OP_SENDMMSG: { + return SendMMsg(Arguments[0], reinterpret_cast(Arguments[1]), Arguments[2], Arguments[3]); + break; + } + default: LOGMAN_MSG_A_FMT("Unsupported socketcall op: {}", call); break; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(sendmsg, [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, const struct msghdr32* msg, int flags) -> uint64_t { + return SendMsg(sockfd, msg, flags); + }); + + REGISTER_SYSCALL_IMPL_X32(sendmmsg, + [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, compat_ptr msgvec, uint32_t vlen, + int flags) -> uint64_t { return SendMMsg(sockfd, msgvec, vlen, flags); }); + + REGISTER_SYSCALL_IMPL_X32(recvmmsg, + [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, compat_ptr msgvec, uint32_t vlen, int flags, + timespec32* timeout_ts) -> uint64_t { + struct timespec tp64 {}; + struct timespec* timed_ptr {}; + if (timeout_ts) { + tp64 = *timeout_ts; + timed_ptr = &tp64; + } - REGISTER_SYSCALL_IMPL_X32(recvmmsg_time64, [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, compat_ptr msgvec, uint32_t vlen, int flags, struct timespec *timeout_ts) -> uint64_t { - return RecvMMsg(sockfd, msgvec, vlen, flags, timeout_ts); - }); + uint64_t Result = RecvMMsg(sockfd, msgvec, vlen, flags, timed_ptr); - REGISTER_SYSCALL_IMPL_X32(recvmsg, [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, struct msghdr32 *msg, int flags) -> uint64_t { - return RecvMsg(sockfd, msg, flags); - }); + if (timeout_ts) { + *timeout_ts = tp64; + } - REGISTER_SYSCALL_IMPL_X32(setsockopt, [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, int level, int optname, compat_ptr optval, socklen_t optlen) -> uint64_t { - return SetSockOpt(sockfd, level, optname, optval, optlen); - }); + return Result; + }); - REGISTER_SYSCALL_IMPL_X32(getsockopt, [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, int level, int optname, compat_ptr optval, compat_ptr optlen) -> uint64_t { - return GetSockOpt(sockfd, level, optname, optval, optlen); - }); - } + REGISTER_SYSCALL_IMPL_X32(recvmmsg_time64, + [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, compat_ptr msgvec, uint32_t vlen, int flags, + struct timespec* timeout_ts) -> uint64_t { return RecvMMsg(sockfd, msgvec, vlen, flags, timeout_ts); }); + + REGISTER_SYSCALL_IMPL_X32(recvmsg, [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, struct msghdr32* msg, int flags) -> uint64_t { + return RecvMsg(sockfd, msg, flags); + }); + + REGISTER_SYSCALL_IMPL_X32(setsockopt, + [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, int level, int optname, compat_ptr optval, + socklen_t optlen) -> uint64_t { return SetSockOpt(sockfd, level, optname, optval, optlen); }); + + REGISTER_SYSCALL_IMPL_X32(getsockopt, + [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, int level, int optname, compat_ptr optval, + compat_ptr optlen) -> uint64_t { return GetSockOpt(sockfd, level, optname, optval, optlen); }); } +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Stubs.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Stubs.cpp index f276211db1..a07274b218 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Stubs.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Stubs.cpp @@ -14,24 +14,22 @@ tags: LinuxSyscalls|syscalls-x86-32 #include #include -#define SYSCALL_STUB(name) do { ERROR_AND_DIE_FMT("Syscall: " #name " stub!"); return -ENOSYS; } while(0) +#define SYSCALL_STUB(name) \ + do { \ + ERROR_AND_DIE_FMT("Syscall: " #name " stub!"); \ + return -ENOSYS; \ + } while (0) namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } namespace FEX::HLE::x32 { - void RegisterStubs(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X32(readdir, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - SYSCALL_STUB(readdir); - }); - - REGISTER_SYSCALL_IMPL_X32(vm86old, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - return -ENOSYS; - }); - - REGISTER_SYSCALL_IMPL_X32(vm86, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - return -ENOSYS; - }); - } +void RegisterStubs(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X32(readdir, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { SYSCALL_STUB(readdir); }); + + REGISTER_SYSCALL_IMPL_X32(vm86old, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { return -ENOSYS; }); + + REGISTER_SYSCALL_IMPL_X32(vm86, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { return -ENOSYS; }); } +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Syscalls.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Syscalls.cpp index 73aa821403..91f4177990 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Syscalls.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Syscalls.cpp @@ -24,96 +24,99 @@ tags: LinuxSyscalls|syscalls-x86-32 #include namespace FEX::HLE::x32 { - void RegisterEpoll(FEX::HLE::SyscallHandler *Handler); - void RegisterFD(FEX::HLE::SyscallHandler *Handler); - void RegisterFS(FEX::HLE::SyscallHandler *Handler); - void RegisterInfo(FEX::HLE::SyscallHandler *Handler); - void RegisterIO(FEX::HLE::SyscallHandler *Handler); - void RegisterMemory(FEX::HLE::SyscallHandler *Handler); - void RegisterMsg(FEX::HLE::SyscallHandler *Handler); - void RegisterNotImplemented(FEX::HLE::SyscallHandler *Handler); - void RegisterSched(FEX::HLE::SyscallHandler *Handler); - void RegisterSemaphore(FEX::HLE::SyscallHandler *Handler); - void RegisterSignals(FEX::HLE::SyscallHandler *Handler); - void RegisterSocket(FEX::HLE::SyscallHandler *Handler); - void RegisterStubs(FEX::HLE::SyscallHandler *Handler); - void RegisterThread(FEX::HLE::SyscallHandler *Handler); - void RegisterTime(FEX::HLE::SyscallHandler *Handler); - void RegisterTimer(FEX::HLE::SyscallHandler *Handler); +void RegisterEpoll(FEX::HLE::SyscallHandler* Handler); +void RegisterFD(FEX::HLE::SyscallHandler* Handler); +void RegisterFS(FEX::HLE::SyscallHandler* Handler); +void RegisterInfo(FEX::HLE::SyscallHandler* Handler); +void RegisterIO(FEX::HLE::SyscallHandler* Handler); +void RegisterMemory(FEX::HLE::SyscallHandler* Handler); +void RegisterMsg(FEX::HLE::SyscallHandler* Handler); +void RegisterNotImplemented(FEX::HLE::SyscallHandler* Handler); +void RegisterSched(FEX::HLE::SyscallHandler* Handler); +void RegisterSemaphore(FEX::HLE::SyscallHandler* Handler); +void RegisterSignals(FEX::HLE::SyscallHandler* Handler); +void RegisterSocket(FEX::HLE::SyscallHandler* Handler); +void RegisterStubs(FEX::HLE::SyscallHandler* Handler); +void RegisterThread(FEX::HLE::SyscallHandler* Handler); +void RegisterTime(FEX::HLE::SyscallHandler* Handler); +void RegisterTimer(FEX::HLE::SyscallHandler* Handler); - x32SyscallHandler::x32SyscallHandler(FEXCore::Context::Context *ctx, FEX::HLE::SignalDelegator *_SignalDelegation, fextl::unique_ptr Allocator) - : SyscallHandler{ctx, _SignalDelegation}, AllocHandler{std::move(Allocator)} { - OSABI = FEXCore::HLE::SyscallOSABI::OS_LINUX32; - RegisterSyscallHandlers(); - } +x32SyscallHandler::x32SyscallHandler(FEXCore::Context::Context* ctx, FEX::HLE::SignalDelegator* _SignalDelegation, + fextl::unique_ptr Allocator) + : SyscallHandler {ctx, _SignalDelegation} + , AllocHandler {std::move(Allocator)} { + OSABI = FEXCore::HLE::SyscallOSABI::OS_LINUX32; + RegisterSyscallHandlers(); +} - void x32SyscallHandler::RegisterSyscallHandlers() { - auto cvt = [](auto in) { - union { - decltype(in) val; - void *raw; - } raw; - raw.val = in; - return raw.raw; - }; +void x32SyscallHandler::RegisterSyscallHandlers() { + auto cvt = [](auto in) { + union { + decltype(in) val; + void* raw; + } raw; + raw.val = in; + return raw.raw; + }; - Definitions.resize(FEX::HLE::x32::SYSCALL_x86_MAX, SyscallFunctionDefinition { - .NumArgs = 255, - .Ptr = cvt(&UnimplementedSyscall), - }); + Definitions.resize(FEX::HLE::x32::SYSCALL_x86_MAX, SyscallFunctionDefinition { + .NumArgs = 255, + .Ptr = cvt(&UnimplementedSyscall), + }); - FEX::HLE::RegisterEpoll(this); - FEX::HLE::RegisterFD(this); - FEX::HLE::RegisterFS(this); - FEX::HLE::RegisterInfo(this); - FEX::HLE::RegisterIO(this); - FEX::HLE::RegisterIOUring(this); - FEX::HLE::RegisterKey(this); - FEX::HLE::RegisterMemory(this); - FEX::HLE::RegisterMsg(this); - FEX::HLE::RegisterNamespace(this); - FEX::HLE::RegisterSched(this); - FEX::HLE::RegisterSemaphore(this); - FEX::HLE::RegisterSHM(this); - FEX::HLE::RegisterSignals(this); - FEX::HLE::RegisterSocket(this); - FEX::HLE::RegisterThread(this); - FEX::HLE::RegisterTime(this); - FEX::HLE::RegisterTimer(this); - FEX::HLE::RegisterNotImplemented(this); - FEX::HLE::RegisterStubs(this); + FEX::HLE::RegisterEpoll(this); + FEX::HLE::RegisterFD(this); + FEX::HLE::RegisterFS(this); + FEX::HLE::RegisterInfo(this); + FEX::HLE::RegisterIO(this); + FEX::HLE::RegisterIOUring(this); + FEX::HLE::RegisterKey(this); + FEX::HLE::RegisterMemory(this); + FEX::HLE::RegisterMsg(this); + FEX::HLE::RegisterNamespace(this); + FEX::HLE::RegisterSched(this); + FEX::HLE::RegisterSemaphore(this); + FEX::HLE::RegisterSHM(this); + FEX::HLE::RegisterSignals(this); + FEX::HLE::RegisterSocket(this); + FEX::HLE::RegisterThread(this); + FEX::HLE::RegisterTime(this); + FEX::HLE::RegisterTimer(this); + FEX::HLE::RegisterNotImplemented(this); + FEX::HLE::RegisterStubs(this); - // 32bit specific - FEX::HLE::x32::RegisterEpoll(this); - FEX::HLE::x32::RegisterFD(this); - FEX::HLE::x32::RegisterFS(this); - FEX::HLE::x32::RegisterInfo(this); - FEX::HLE::x32::RegisterIO(this); - FEX::HLE::x32::RegisterMemory(this); - FEX::HLE::x32::RegisterMsg(this); - FEX::HLE::x32::RegisterNotImplemented(this); - FEX::HLE::x32::RegisterSched(this); - FEX::HLE::x32::RegisterSemaphore(this); - FEX::HLE::x32::RegisterSignals(this); - FEX::HLE::x32::RegisterSocket(this); - FEX::HLE::x32::RegisterStubs(this); - FEX::HLE::x32::RegisterThread(this); - FEX::HLE::x32::RegisterTime(this); - FEX::HLE::x32::RegisterTimer(this); + // 32bit specific + FEX::HLE::x32::RegisterEpoll(this); + FEX::HLE::x32::RegisterFD(this); + FEX::HLE::x32::RegisterFS(this); + FEX::HLE::x32::RegisterInfo(this); + FEX::HLE::x32::RegisterIO(this); + FEX::HLE::x32::RegisterMemory(this); + FEX::HLE::x32::RegisterMsg(this); + FEX::HLE::x32::RegisterNotImplemented(this); + FEX::HLE::x32::RegisterSched(this); + FEX::HLE::x32::RegisterSemaphore(this); + FEX::HLE::x32::RegisterSignals(this); + FEX::HLE::x32::RegisterSocket(this); + FEX::HLE::x32::RegisterStubs(this); + FEX::HLE::x32::RegisterThread(this); + FEX::HLE::x32::RegisterTime(this); + FEX::HLE::x32::RegisterTimer(this); - FEX::HLE::x32::InitializeStaticIoctlHandlers(); + FEX::HLE::x32::InitializeStaticIoctlHandlers(); #if PRINT_MISSING_SYSCALLS - for (auto &Syscall: SyscallNames) { - if (Definitions[Syscall.first].Ptr == cvt(&UnimplementedSyscall)) { - LogMan::Msg::DFmt("Unimplemented syscall: {}: {}", Syscall.first, Syscall.second); - } + for (auto& Syscall : SyscallNames) { + if (Definitions[Syscall.first].Ptr == cvt(&UnimplementedSyscall)) { + LogMan::Msg::DFmt("Unimplemented syscall: {}: {}", Syscall.first, Syscall.second); } -#endif - } - - fextl::unique_ptr CreateHandler(FEXCore::Context::Context *ctx, FEX::HLE::SignalDelegator *_SignalDelegation, fextl::unique_ptr Allocator) { - return fextl::make_unique(ctx, _SignalDelegation, std::move(Allocator)); } +#endif +} +fextl::unique_ptr +CreateHandler(FEXCore::Context::Context* ctx, FEX::HLE::SignalDelegator* _SignalDelegation, fextl::unique_ptr Allocator) { + return fextl::make_unique(ctx, _SignalDelegation, std::move(Allocator)); } + +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Thread.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Thread.cpp index 9bbfbdfefa..5e63322c28 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Thread.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Thread.cpp @@ -35,402 +35,393 @@ ARG_TO_STR(FEX::HLE::x32::compat_ptr, "%x") ARG_TO_STR(FEX::HLE::x32::compat_ptr, "%x") namespace FEX::HLE::x32 { - // The kernel only gives 32-bit userspace 3 TLS segments - // Depending on if the host kernel is 32-bit or 64-bit then the TLS index assigned is different - // - // Host kernel x86_64, valid TLS enries: 12,13,14 - // Host kernel x86, valid TLS enries: 6,7,8 - // Since we are claiming to be a 64-bit kernel, use the 64-bit range - // - // 6/12 = glibc - // 7/13 = wine fs - // 8/14 = etc - constexpr uint32_t TLS_NextEntry = 12; - constexpr uint32_t TLS_MaxEntry = TLS_NextEntry+3; - - uint64_t SetThreadArea(FEXCore::Core::CpuStateFrame *Frame, void *tls) { - struct x32::user_desc* u_info = reinterpret_cast(tls); - if (u_info->entry_number == -1) { - for (uint32_t i = TLS_NextEntry; i < TLS_MaxEntry; ++i) { - auto GDT = &Frame->State.gdt[i]; - if (GDT->base == 0) { - // If the base is zero then it isn't present with our setup - u_info->entry_number = i; - break; - } - } - - if (u_info->entry_number == -1) { - // Couldn't find a slot. Return empty handed - return -ESRCH; +// The kernel only gives 32-bit userspace 3 TLS segments +// Depending on if the host kernel is 32-bit or 64-bit then the TLS index assigned is different +// +// Host kernel x86_64, valid TLS enries: 12,13,14 +// Host kernel x86, valid TLS enries: 6,7,8 +// Since we are claiming to be a 64-bit kernel, use the 64-bit range +// +// 6/12 = glibc +// 7/13 = wine fs +// 8/14 = etc +constexpr uint32_t TLS_NextEntry = 12; +constexpr uint32_t TLS_MaxEntry = TLS_NextEntry + 3; + +uint64_t SetThreadArea(FEXCore::Core::CpuStateFrame* Frame, void* tls) { + struct x32::user_desc* u_info = reinterpret_cast(tls); + if (u_info->entry_number == -1) { + for (uint32_t i = TLS_NextEntry; i < TLS_MaxEntry; ++i) { + auto GDT = &Frame->State.gdt[i]; + if (GDT->base == 0) { + // If the base is zero then it isn't present with our setup + u_info->entry_number = i; + break; } } - // Now we need to update the thread's GDT to handle this change - auto GDT = &Frame->State.gdt[u_info->entry_number]; - GDT->base = u_info->base_addr; - - // With the segment register optimization we need to check all of the segment registers and update. - const auto GetEntry = [](auto value) { - return value >> 3; - }; - if (GetEntry(Frame->State.cs_idx) == u_info->entry_number) { - Frame->State.cs_cached = GDT->base; - } - if (GetEntry(Frame->State.ds_idx) == u_info->entry_number) { - Frame->State.ds_cached = GDT->base; - } - if (GetEntry(Frame->State.es_idx) == u_info->entry_number) { - Frame->State.es_cached = GDT->base; - } - if (GetEntry(Frame->State.fs_idx) == u_info->entry_number) { - Frame->State.fs_cached = GDT->base; - } - if (GetEntry(Frame->State.gs_idx) == u_info->entry_number) { - Frame->State.gs_cached = GDT->base; - } - if (GetEntry(Frame->State.ss_idx) == u_info->entry_number) { - Frame->State.ss_cached = GDT->base; + if (u_info->entry_number == -1) { + // Couldn't find a slot. Return empty handed + return -ESRCH; } - return 0; } - void AdjustRipForNewThread(FEXCore::Core::CpuStateFrame *Frame) { - Frame->State.rip += 2; + // Now we need to update the thread's GDT to handle this change + auto GDT = &Frame->State.gdt[u_info->entry_number]; + GDT->base = u_info->base_addr; + + // With the segment register optimization we need to check all of the segment registers and update. + const auto GetEntry = [](auto value) { + return value >> 3; + }; + if (GetEntry(Frame->State.cs_idx) == u_info->entry_number) { + Frame->State.cs_cached = GDT->base; + } + if (GetEntry(Frame->State.ds_idx) == u_info->entry_number) { + Frame->State.ds_cached = GDT->base; + } + if (GetEntry(Frame->State.es_idx) == u_info->entry_number) { + Frame->State.es_cached = GDT->base; + } + if (GetEntry(Frame->State.fs_idx) == u_info->entry_number) { + Frame->State.fs_cached = GDT->base; } + if (GetEntry(Frame->State.gs_idx) == u_info->entry_number) { + Frame->State.gs_cached = GDT->base; + } + if (GetEntry(Frame->State.ss_idx) == u_info->entry_number) { + Frame->State.ss_cached = GDT->base; + } + return 0; +} - void RegisterThread(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X32(sigreturn, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - FEX::HLE::_SyscallHandler->GetSignalDelegator()->HandleSignalHandlerReturn(false); - FEX_UNREACHABLE; - }); - - REGISTER_SYSCALL_IMPL_X32(clone, ([](FEXCore::Core::CpuStateFrame *Frame, uint32_t flags, void *stack, pid_t *parent_tid, void *tls, pid_t *child_tid) -> uint64_t { - FEX::HLE::clone3_args args { - .Type = TypeOfClone::TYPE_CLONE2, - .args = { - .flags = flags & ~CSIGNAL, // This no longer contains CSIGNAL - .pidfd = reinterpret_cast(parent_tid), // For clone, pidfd is duplicated here - .child_tid = reinterpret_cast(child_tid), - .parent_tid = reinterpret_cast(parent_tid), - .exit_signal = flags & CSIGNAL, - .stack = reinterpret_cast(stack), - .stack_size = 0, // This syscall isn't able to see the stack size - .tls = reinterpret_cast(tls), - .set_tid = 0, // This syscall isn't able to select TIDs - .set_tid_size = 0, - .cgroup = 0, // This syscall can't select cgroups - } - }; - return CloneHandler(Frame, &args); - })); - - REGISTER_SYSCALL_IMPL_X32(waitpid, [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, int32_t *status, int32_t options) -> uint64_t { - uint64_t Result = ::waitpid(pid, status, options); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(nice, [](FEXCore::Core::CpuStateFrame *Frame, int inc) -> uint64_t { - uint64_t Result = ::nice(inc); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(set_thread_area, [](FEXCore::Core::CpuStateFrame *Frame, struct user_desc *u_info) -> uint64_t { - return SetThreadArea(Frame, u_info); - }); - - REGISTER_SYSCALL_IMPL_X32(get_thread_area, [](FEXCore::Core::CpuStateFrame *Frame, struct user_desc *u_info) -> uint64_t { - // Index to fetch comes from the user_desc - uint32_t Entry = u_info->entry_number; - if (Entry < TLS_NextEntry || Entry > TLS_MaxEntry) { - return -EINVAL; - } +void AdjustRipForNewThread(FEXCore::Core::CpuStateFrame* Frame) { + Frame->State.rip += 2; +} - const auto &GDT = &Frame->State.gdt[Entry]; +void RegisterThread(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X32(sigreturn, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + FEX::HLE::_SyscallHandler->GetSignalDelegator()->HandleSignalHandlerReturn(false); + FEX_UNREACHABLE; + }); + + REGISTER_SYSCALL_IMPL_X32( + clone, ([](FEXCore::Core::CpuStateFrame* Frame, uint32_t flags, void* stack, pid_t* parent_tid, void* tls, pid_t* child_tid) -> uint64_t { + FEX::HLE::clone3_args args {.Type = TypeOfClone::TYPE_CLONE2, + .args = { + .flags = flags & ~CSIGNAL, // This no longer contains CSIGNAL + .pidfd = reinterpret_cast(parent_tid), // For clone, pidfd is duplicated here + .child_tid = reinterpret_cast(child_tid), + .parent_tid = reinterpret_cast(parent_tid), + .exit_signal = flags & CSIGNAL, + .stack = reinterpret_cast(stack), + .stack_size = 0, // This syscall isn't able to see the stack size + .tls = reinterpret_cast(tls), + .set_tid = 0, // This syscall isn't able to select TIDs + .set_tid_size = 0, + .cgroup = 0, // This syscall can't select cgroups + }}; + return CloneHandler(Frame, &args); + })); + + REGISTER_SYSCALL_IMPL_X32(waitpid, [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, int32_t* status, int32_t options) -> uint64_t { + uint64_t Result = ::waitpid(pid, status, options); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(nice, [](FEXCore::Core::CpuStateFrame* Frame, int inc) -> uint64_t { + uint64_t Result = ::nice(inc); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32( + set_thread_area, [](FEXCore::Core::CpuStateFrame* Frame, struct user_desc* u_info) -> uint64_t { return SetThreadArea(Frame, u_info); }); + + REGISTER_SYSCALL_IMPL_X32(get_thread_area, [](FEXCore::Core::CpuStateFrame* Frame, struct user_desc* u_info) -> uint64_t { + // Index to fetch comes from the user_desc + uint32_t Entry = u_info->entry_number; + if (Entry < TLS_NextEntry || Entry > TLS_MaxEntry) { + return -EINVAL; + } - memset(u_info, 0, sizeof(*u_info)); + const auto& GDT = &Frame->State.gdt[Entry]; - // FEX only stores base instead of the full GDT - u_info->base_addr = GDT->base; + memset(u_info, 0, sizeof(*u_info)); - // Fill the rest of the structure with expected data (even if wrong at the moment) - if (u_info->base_addr) { - u_info->limit = 0xF'FFFF; - u_info->seg_32bit = 1; - u_info->limit_in_pages = 1; - u_info->useable = 1; - } - else { - u_info->read_exec_only = 1; - u_info->seg_not_present = 1; - } - return 0; - }); + // FEX only stores base instead of the full GDT + u_info->base_addr = GDT->base; - REGISTER_SYSCALL_IMPL_X32(set_robust_list, [](FEXCore::Core::CpuStateFrame *Frame, struct robust_list_head *head, size_t len) -> uint64_t { - if (len != 12) { - // Return invalid if the passed in length doesn't match what's expected. - return -EINVAL; - } + // Fill the rest of the structure with expected data (even if wrong at the moment) + if (u_info->base_addr) { + u_info->limit = 0xF'FFFF; + u_info->seg_32bit = 1; + u_info->limit_in_pages = 1; + u_info->useable = 1; + } else { + u_info->read_exec_only = 1; + u_info->seg_not_present = 1; + } + return 0; + }); - auto Thread = Frame->Thread; - // Retain the robust list head but don't give it to the kernel - // The kernel would break if it tried parsing a 32bit robust list from a 64bit process - Thread->ThreadManager.robust_list_head = reinterpret_cast(head); - return 0; - }); - - REGISTER_SYSCALL_IMPL_X32(get_robust_list, [](FEXCore::Core::CpuStateFrame *Frame, int pid, struct robust_list_head **head, uint32_t *len_ptr) -> uint64_t { - auto Thread = Frame->Thread; - // Give the robust list back to the application - // Steam specifically checks to make sure the robust list is set - *(uint32_t*)head = (uint32_t)Thread->ThreadManager.robust_list_head; - *len_ptr = 12; - return 0; - }); - - REGISTER_SYSCALL_IMPL_X32(futex, [](FEXCore::Core::CpuStateFrame *Frame, int *uaddr, int futex_op, int val, const timespec32 *timeout, int *uaddr2, uint32_t val3) -> uint64_t { - void* timeout_ptr = (void*)timeout; - struct timespec tp64{}; - int cmd = futex_op & FUTEX_CMD_MASK; - if (timeout && - (cmd == FUTEX_WAIT || - cmd == FUTEX_LOCK_PI || - cmd == FUTEX_WAIT_BITSET || - cmd == FUTEX_WAIT_REQUEUE_PI)) { - // timeout argument is only handled as timespec in these cases - // Otherwise just an integer - tp64 = *timeout; - timeout_ptr = &tp64; - } + REGISTER_SYSCALL_IMPL_X32(set_robust_list, [](FEXCore::Core::CpuStateFrame* Frame, struct robust_list_head* head, size_t len) -> uint64_t { + if (len != 12) { + // Return invalid if the passed in length doesn't match what's expected. + return -EINVAL; + } - uint64_t Result = syscall(SYSCALL_DEF(futex), - uaddr, - futex_op, - val, - timeout_ptr, - uaddr2, - val3); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(getgroups32, getgroups, [](FEXCore::Core::CpuStateFrame *Frame, int size, gid_t list[]) -> uint64_t { - uint64_t Result = ::getgroups(size, list); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setgroups32, setgroups, [](FEXCore::Core::CpuStateFrame *Frame, size_t size, const gid_t *list) -> uint64_t { - uint64_t Result = ::setgroups(size, list); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(getuid32, getuid, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = ::getuid(); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(getgid32, getgid, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = ::getgid(); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setuid32, setuid, [](FEXCore::Core::CpuStateFrame *Frame, uid_t uid) -> uint64_t { - uint64_t Result = ::setuid(uid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setgid32, setgid, [](FEXCore::Core::CpuStateFrame *Frame, gid_t gid) -> uint64_t { - uint64_t Result = ::setgid(gid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(geteuid32, geteuid, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = ::geteuid(); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(getegid32, getegid, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = ::getegid(); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setfsuid32, setfsuid, [](FEXCore::Core::CpuStateFrame *Frame, uid_t fsuid) -> uint64_t { - uint64_t Result = ::setfsuid(fsuid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setfsgid32, setfsgid, [](FEXCore::Core::CpuStateFrame *Frame, uid_t fsgid) -> uint64_t { - uint64_t Result = ::setfsgid(fsgid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setreuid32, setreuid, [](FEXCore::Core::CpuStateFrame *Frame, uid_t ruid, uid_t euid) -> uint64_t { - uint64_t Result = ::setreuid(ruid, euid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setresuid32, setresuid, [](FEXCore::Core::CpuStateFrame *Frame, uid_t ruid, uid_t euid, uid_t suid) -> uint64_t { - uint64_t Result = ::setresuid(ruid, euid, suid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(getresuid32, getresuid, [](FEXCore::Core::CpuStateFrame *Frame, uid_t *ruid, uid_t *euid, uid_t *suid) -> uint64_t { - uint64_t Result = ::getresuid(ruid, euid, suid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setresgid32, setresgid, [](FEXCore::Core::CpuStateFrame *Frame, gid_t rgid, gid_t egid, gid_t sgid) -> uint64_t { - uint64_t Result = ::setresgid(rgid, egid, sgid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(getresgid32, getresgid, [](FEXCore::Core::CpuStateFrame *Frame, gid_t *rgid, gid_t *egid, gid_t *sgid) -> uint64_t { - uint64_t Result = ::getresgid(rgid, egid, sgid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setregid32, setregid, [](FEXCore::Core::CpuStateFrame *Frame, gid_t rgid, gid_t egid) -> uint64_t { - uint64_t Result = ::setregid(rgid, egid); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(sigaltstack, [](FEXCore::Core::CpuStateFrame *Frame, const compat_ptr ss, compat_ptr old_ss) -> uint64_t { - stack_t ss64{}; - stack_t old64{}; - - stack_t *ss64_ptr{}; - stack_t *old64_ptr{}; - - if (ss) { - ss64 = *ss; - ss64_ptr = &ss64; - } + auto Thread = Frame->Thread; + // Retain the robust list head but don't give it to the kernel + // The kernel would break if it tried parsing a 32bit robust list from a 64bit process + Thread->ThreadManager.robust_list_head = reinterpret_cast(head); + return 0; + }); + + REGISTER_SYSCALL_IMPL_X32( + get_robust_list, [](FEXCore::Core::CpuStateFrame* Frame, int pid, struct robust_list_head** head, uint32_t* len_ptr) -> uint64_t { + auto Thread = Frame->Thread; + // Give the robust list back to the application + // Steam specifically checks to make sure the robust list is set + *(uint32_t*)head = (uint32_t)Thread->ThreadManager.robust_list_head; + *len_ptr = 12; + return 0; + }); + + REGISTER_SYSCALL_IMPL_X32( + futex, [](FEXCore::Core::CpuStateFrame* Frame, int* uaddr, int futex_op, int val, const timespec32* timeout, int* uaddr2, uint32_t val3) -> uint64_t { + void* timeout_ptr = (void*)timeout; + struct timespec tp64 {}; + int cmd = futex_op & FUTEX_CMD_MASK; + if (timeout && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || cmd == FUTEX_WAIT_BITSET || cmd == FUTEX_WAIT_REQUEUE_PI)) { + // timeout argument is only handled as timespec in these cases + // Otherwise just an integer + tp64 = *timeout; + timeout_ptr = &tp64; + } - if (old_ss) { - old64 = *old_ss; - old64_ptr = &old64; - } - uint64_t Result = FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSigAltStack(ss64_ptr, old64_ptr); + uint64_t Result = syscall(SYSCALL_DEF(futex), uaddr, futex_op, val, timeout_ptr, uaddr2, val3); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(getgroups32, getgroups, [](FEXCore::Core::CpuStateFrame* Frame, int size, gid_t list[]) -> uint64_t { + uint64_t Result = ::getgroups(size, list); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setgroups32, setgroups, [](FEXCore::Core::CpuStateFrame* Frame, size_t size, const gid_t* list) -> uint64_t { + uint64_t Result = ::setgroups(size, list); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(getuid32, getuid, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = ::getuid(); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(getgid32, getgid, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = ::getgid(); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setuid32, setuid, [](FEXCore::Core::CpuStateFrame* Frame, uid_t uid) -> uint64_t { + uint64_t Result = ::setuid(uid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setgid32, setgid, [](FEXCore::Core::CpuStateFrame* Frame, gid_t gid) -> uint64_t { + uint64_t Result = ::setgid(gid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(geteuid32, geteuid, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = ::geteuid(); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(getegid32, getegid, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = ::getegid(); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setfsuid32, setfsuid, [](FEXCore::Core::CpuStateFrame* Frame, uid_t fsuid) -> uint64_t { + uint64_t Result = ::setfsuid(fsuid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setfsgid32, setfsgid, [](FEXCore::Core::CpuStateFrame* Frame, uid_t fsgid) -> uint64_t { + uint64_t Result = ::setfsgid(fsgid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setreuid32, setreuid, [](FEXCore::Core::CpuStateFrame* Frame, uid_t ruid, uid_t euid) -> uint64_t { + uint64_t Result = ::setreuid(ruid, euid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setresuid32, setresuid, [](FEXCore::Core::CpuStateFrame* Frame, uid_t ruid, uid_t euid, uid_t suid) -> uint64_t { + uint64_t Result = ::setresuid(ruid, euid, suid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(getresuid32, getresuid, + [](FEXCore::Core::CpuStateFrame* Frame, uid_t* ruid, uid_t* euid, uid_t* suid) -> uint64_t { + uint64_t Result = ::getresuid(ruid, euid, suid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setresgid32, setresgid, [](FEXCore::Core::CpuStateFrame* Frame, gid_t rgid, gid_t egid, gid_t sgid) -> uint64_t { + uint64_t Result = ::setresgid(rgid, egid, sgid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(getresgid32, getresgid, + [](FEXCore::Core::CpuStateFrame* Frame, gid_t* rgid, gid_t* egid, gid_t* sgid) -> uint64_t { + uint64_t Result = ::getresgid(rgid, egid, sgid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(setregid32, setregid, [](FEXCore::Core::CpuStateFrame* Frame, gid_t rgid, gid_t egid) -> uint64_t { + uint64_t Result = ::setregid(rgid, egid); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32( + sigaltstack, [](FEXCore::Core::CpuStateFrame* Frame, const compat_ptr ss, compat_ptr old_ss) -> uint64_t { + stack_t ss64 {}; + stack_t old64 {}; + + stack_t* ss64_ptr {}; + stack_t* old64_ptr {}; + + if (ss) { + ss64 = *ss; + ss64_ptr = &ss64; + } - if (Result == 0 && old_ss) { - *old_ss = old64; - } - return Result; - }); + if (old_ss) { + old64 = *old_ss; + old64_ptr = &old64; + } + uint64_t Result = FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSigAltStack(ss64_ptr, old64_ptr); - // launch a new process under fex - // currently does not propagate argv[0] correctly - REGISTER_SYSCALL_IMPL_X32(execve, [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname, uint32_t *argv, uint32_t *envp) -> uint64_t { - fextl::vector Args; - fextl::vector Envp; + if (Result == 0 && old_ss) { + *old_ss = old64; + } + return Result; + }); + + // launch a new process under fex + // currently does not propagate argv[0] correctly + REGISTER_SYSCALL_IMPL_X32(execve, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, uint32_t* argv, uint32_t* envp) -> uint64_t { + fextl::vector Args; + fextl::vector Envp; + + if (argv) { + for (int i = 0; argv[i]; i++) { + Args.push_back(reinterpret_cast(static_cast(argv[i]))); + } - if (argv) { - for (int i = 0; argv[i]; i++) { - Args.push_back(reinterpret_cast(static_cast(argv[i]))); - } + Args.push_back(nullptr); + } - Args.push_back(nullptr); + if (envp) { + for (int i = 0; envp[i]; i++) { + Envp.push_back(reinterpret_cast(static_cast(envp[i]))); } + Envp.push_back(nullptr); + } - if (envp) { - for (int i = 0; envp[i]; i++) { - Envp.push_back(reinterpret_cast(static_cast(envp[i]))); - } - Envp.push_back(nullptr); - } + auto* const* ArgsPtr = argv ? const_cast(Args.data()) : nullptr; + auto* const* EnvpPtr = envp ? const_cast(Envp.data()) : nullptr; - auto* const* ArgsPtr = argv ? const_cast(Args.data()) : nullptr; - auto* const* EnvpPtr = envp ? const_cast(Envp.data()) : nullptr; + FEX::HLE::ExecveAtArgs AtArgs = FEX::HLE::ExecveAtArgs::Empty(); - FEX::HLE::ExecveAtArgs AtArgs = FEX::HLE::ExecveAtArgs::Empty(); + return FEX::HLE::ExecveHandler(pathname, ArgsPtr, EnvpPtr, AtArgs); + }); - return FEX::HLE::ExecveHandler(pathname, ArgsPtr, EnvpPtr, AtArgs); - }); + REGISTER_SYSCALL_IMPL_X32( + execveat, ([](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, uint32_t* argv, uint32_t* envp, int flags) -> uint64_t { + fextl::vector Args; + fextl::vector Envp; - REGISTER_SYSCALL_IMPL_X32(execveat, ([](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, uint32_t *argv, uint32_t *envp, int flags) -> uint64_t { - fextl::vector Args; - fextl::vector Envp; + if (argv) { + for (int i = 0; argv[i]; i++) { + Args.push_back(reinterpret_cast(static_cast(argv[i]))); + } - if (argv) { - for (int i = 0; argv[i]; i++) { - Args.push_back(reinterpret_cast(static_cast(argv[i]))); - } + Args.push_back(nullptr); + } - Args.push_back(nullptr); + if (envp) { + for (int i = 0; envp[i]; i++) { + Envp.push_back(reinterpret_cast(static_cast(envp[i]))); } + Envp.push_back(nullptr); + } - if (envp) { - for (int i = 0; envp[i]; i++) { - Envp.push_back(reinterpret_cast(static_cast(envp[i]))); - } - Envp.push_back(nullptr); - } + FEX::HLE::ExecveAtArgs AtArgs { + .dirfd = dirfd, + .flags = flags, + }; + + auto* const* ArgsPtr = argv ? const_cast(Args.data()) : nullptr; + auto* const* EnvpPtr = envp ? const_cast(Envp.data()) : nullptr; + return FEX::HLE::ExecveHandler(pathname, ArgsPtr, EnvpPtr, AtArgs); + })); - FEX::HLE::ExecveAtArgs AtArgs { - .dirfd = dirfd, - .flags = flags, - }; + REGISTER_SYSCALL_IMPL_X32(wait4, [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, int* wstatus, int options, struct rusage_32* rusage) -> uint64_t { + struct rusage usage64 {}; + struct rusage* usage64_p {}; - auto* const* ArgsPtr = argv ? const_cast(Args.data()) : nullptr; - auto* const* EnvpPtr = envp ? const_cast(Envp.data()) : nullptr; - return FEX::HLE::ExecveHandler(pathname, ArgsPtr, EnvpPtr, AtArgs); - })); + if (rusage) { + usage64 = *rusage; + usage64_p = &usage64; + } + uint64_t Result = ::wait4(pid, wstatus, options, usage64_p); + if (rusage) { + *rusage = usage64; + } + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X32(wait4, [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, int *wstatus, int options, struct rusage_32 *rusage) -> uint64_t { - struct rusage usage64{}; - struct rusage *usage64_p{}; + REGISTER_SYSCALL_IMPL_X32(waitid, + [](FEXCore::Core::CpuStateFrame* Frame, int which, pid_t upid, compat_ptr info, + int options, struct rusage_32* rusage) -> uint64_t { + struct rusage usage64 {}; + struct rusage* usage64_p {}; - if (rusage) { - usage64 = *rusage; - usage64_p = &usage64; - } - uint64_t Result = ::wait4(pid, wstatus, options, usage64_p); - if (rusage) { - *rusage = usage64; - } - SYSCALL_ERRNO(); - }); + siginfo_t info64 {}; + siginfo_t* info64_p {}; + + if (rusage) { + usage64 = *rusage; + usage64_p = &usage64; + } - REGISTER_SYSCALL_IMPL_X32(waitid, [](FEXCore::Core::CpuStateFrame *Frame, int which, pid_t upid, compat_ptr info, int options, struct rusage_32 *rusage) -> uint64_t { - struct rusage usage64{}; - struct rusage *usage64_p{}; + if (info) { + info64_p = &info64; + } - siginfo_t info64{}; - siginfo_t *info64_p{}; + uint64_t Result = ::syscall(SYSCALL_DEF(waitid), which, upid, info64_p, options, usage64_p); + if (Result != -1) { if (rusage) { - usage64 = *rusage; - usage64_p = &usage64; + *rusage = usage64; } if (info) { - info64_p = &info64; + *info = info64; } + } - uint64_t Result = ::syscall(SYSCALL_DEF(waitid), which, upid, info64_p, options, usage64_p); - - if (Result != -1) { - if (rusage) { - *rusage = usage64; - } - - if (info) { - *info = info64; - } - } + SYSCALL_ERRNO(); + }); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(futex_time64, futex, [](FEXCore::Core::CpuStateFrame *Frame, int *uaddr, int futex_op, int val, const struct timespec *timeout, int *uaddr2, uint32_t val3) -> uint64_t { - uint64_t Result = syscall(SYSCALL_DEF(futex), - uaddr, - futex_op, - val, - timeout, - uaddr2, - val3); - SYSCALL_ERRNO(); - }); - } + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(futex_time64, futex, + [](FEXCore::Core::CpuStateFrame* Frame, int* uaddr, int futex_op, int val, + const struct timespec* timeout, int* uaddr2, uint32_t val3) -> uint64_t { + uint64_t Result = syscall(SYSCALL_DEF(futex), uaddr, futex_op, val, timeout, uaddr2, val3); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Time.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Time.cpp index 1f6b4c2667..e4a1d08bf6 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Time.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Time.cpp @@ -25,239 +25,249 @@ ARG_TO_STR(FEX::HLE::x32::compat_ptr, "%lx") struct timespec; namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } namespace FEX::HLE::x32 { - void RegisterTime(FEX::HLE::SyscallHandler *Handler) { - - REGISTER_SYSCALL_IMPL_X32(time, [](FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::x32::old_time32_t *tloc) -> uint64_t { - time_t Host{}; - uint64_t Result = ::time(&Host); - - if (tloc) { - // On 32-bit this truncates - *tloc = (FEX::HLE::x32::old_time32_t)Host; - } - - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(times, [](FEXCore::Core::CpuStateFrame *Frame, struct FEX::HLE::x32::compat_tms *buf) -> uint64_t { - struct tms Host{}; - uint64_t Result = ::times(&Host); - if (buf) { - *buf = Host; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(utime, [](FEXCore::Core::CpuStateFrame *Frame, char* filename, const FEX::HLE::x32::old_utimbuf32* times) -> uint64_t { - struct utimbuf Host{}; - struct utimbuf *Host_p{}; - if (times) { - Host = *times; - Host_p = &Host; - } - uint64_t Result = ::utime(filename, Host_p); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(gettimeofday, [](FEXCore::Core::CpuStateFrame *Frame, timeval32 *tv, struct timezone *tz) -> uint64_t { - struct timeval tv64{}; - struct timeval *tv_ptr{}; - if (tv) { - tv_ptr = &tv64; - } - - uint64_t Result = ::gettimeofday(tv_ptr, tz); - - if (tv) { - *tv = tv64; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(settimeofday, [](FEXCore::Core::CpuStateFrame *Frame, const timeval32 *tv, const struct timezone *tz) -> uint64_t { - struct timeval tv64{}; - struct timeval *tv_ptr{}; - if (tv) { - tv64 = *tv; - tv_ptr = &tv64; - } - - const uint64_t Result = ::settimeofday(tv_ptr, tz); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(nanosleep, [](FEXCore::Core::CpuStateFrame *Frame, const timespec32 *req, timespec32 *rem) -> uint64_t { - struct timespec rem64{}; - struct timespec *rem64_ptr{}; - - if (rem) { - rem64 = *rem; - rem64_ptr = &rem64; - } - - uint64_t Result = 0; - if (req) { - const struct timespec req64 = *req; - Result = ::nanosleep(&req64, rem64_ptr); - } else { - Result = ::nanosleep(nullptr, rem64_ptr); - } - - if (rem) { - *rem = rem64; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(clock_gettime, [](FEXCore::Core::CpuStateFrame *Frame, clockid_t clk_id, timespec32 *tp) -> uint64_t { - struct timespec tp64{}; - uint64_t Result = ::clock_gettime(clk_id, &tp64); - if (tp) { - *tp = tp64; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(clock_getres, [](FEXCore::Core::CpuStateFrame *Frame, clockid_t clk_id, timespec32 *tp) -> uint64_t { - struct timespec tp64{}; - uint64_t Result = ::clock_getres(clk_id, &tp64); - if (tp) { - *tp = tp64; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(clock_nanosleep, [](FEXCore::Core::CpuStateFrame *Frame, clockid_t clockid, int flags, const timespec32 *request, timespec32 *remain) -> uint64_t { - struct timespec req64{}; - struct timespec *req64_ptr{}; - - struct timespec rem64{}; - struct timespec *rem64_ptr{}; - - if (request) { - req64 = *request; - req64_ptr = &req64; - } - - if (remain) { - rem64 = *remain; - rem64_ptr = &rem64; - } - - // Can't use glibc helper here since it does additional validation and data munging that breaks games. - uint64_t Result = ::syscall(SYSCALL_DEF(clock_nanosleep), clockid, flags, req64_ptr, rem64_ptr); - - if (remain && - (flags & TIMER_ABSTIME) == 0) { - // Remain is completely ignored if TIMER_ABSTIME is set. - *remain = rem64; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(clock_settime, [](FEXCore::Core::CpuStateFrame *Frame, clockid_t clockid, const timespec32 *tp) -> uint64_t { - if (!tp) { - // clock_settime is required to pass a timespec. - return -EFAULT; - } - - uint64_t Result = 0; - const struct timespec tp64 = *tp; - Result = ::clock_settime(clockid, &tp64); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(futimesat, [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, const timeval32 times[2]) -> uint64_t { - uint64_t Result = 0; - if (times) { - struct timeval times64[2]{}; - times64[0] = times[0]; - times64[1] = times[1]; - Result = ::syscall(SYSCALL_DEF(futimesat), dirfd, pathname, times64); - } else { - Result = ::syscall(SYSCALL_DEF(futimesat), dirfd, pathname, nullptr); - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(utimensat, [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, const compat_ptr times, int flags) -> uint64_t { - uint64_t Result = 0; - if (times) { - timespec times64[2]{}; - times64[0] = times[0]; - times64[1] = times[1]; - Result = ::syscall(SYSCALL_DEF(utimensat), dirfd, pathname, times64, flags); - } else { - Result = ::syscall(SYSCALL_DEF(utimensat), dirfd, pathname, nullptr, flags); - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(clock_gettime64, clock_gettime, [](FEXCore::Core::CpuStateFrame *Frame, clockid_t clk_id, timespec *tp) -> uint64_t { - uint64_t Result = ::clock_gettime(clk_id, tp); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(clock_adjtime64, clock_adjtime, [](FEXCore::Core::CpuStateFrame *Frame, clockid_t clk_id, struct timex *buf) -> uint64_t { - uint64_t Result = ::clock_adjtime(clk_id, buf); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(clock_settime64, clock_settime, [](FEXCore::Core::CpuStateFrame *Frame, clockid_t clockid, const struct timespec *tp) -> uint64_t { - uint64_t Result = ::clock_settime(clockid, tp); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(clock_getres_time64, clock_getres, [](FEXCore::Core::CpuStateFrame *Frame, clockid_t clk_id, timespec *tp) -> uint64_t { - uint64_t Result = ::clock_getres(clk_id, tp); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(clock_nanosleep_time64, clock_nanosleep, [](FEXCore::Core::CpuStateFrame *Frame, clockid_t clockid, int flags, const struct timespec *request, struct timespec *remain) -> uint64_t { - uint64_t Result = ::clock_nanosleep(clockid, flags, request, remain); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(utimensat_time64, utimensat, [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, const struct timespec times[2], int flags) -> uint64_t { - uint64_t Result = ::utimensat(dirfd, pathname, times, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(utimes, [](FEXCore::Core::CpuStateFrame *Frame, const char *filename, const timeval32 times[2]) -> uint64_t { - uint64_t Result = 0; - if (times) { - struct timeval times64[2]{}; - times64[0] = times[0]; - times64[1] = times[1]; - Result = ::utimes(filename, times64); - } else { - Result = ::utimes(filename, nullptr); - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(adjtimex, [](FEXCore::Core::CpuStateFrame *Frame, compat_ptr buf) -> uint64_t { - struct timex Host{}; - Host = *buf; - uint64_t Result = ::adjtimex(&Host); - if (Result != -1) { - *buf = Host; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(clock_adjtime, [](FEXCore::Core::CpuStateFrame *Frame, clockid_t clk_id, compat_ptr buf) -> uint64_t { - struct timex Host{}; - Host = *buf; - uint64_t Result = ::clock_adjtime(clk_id, &Host); - if (Result != -1) { - *buf = Host; - } - SYSCALL_ERRNO(); - }); - } +void RegisterTime(FEX::HLE::SyscallHandler* Handler) { + + REGISTER_SYSCALL_IMPL_X32(time, [](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::x32::old_time32_t* tloc) -> uint64_t { + time_t Host {}; + uint64_t Result = ::time(&Host); + + if (tloc) { + // On 32-bit this truncates + *tloc = (FEX::HLE::x32::old_time32_t)Host; + } + + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(times, [](FEXCore::Core::CpuStateFrame* Frame, struct FEX::HLE::x32::compat_tms* buf) -> uint64_t { + struct tms Host {}; + uint64_t Result = ::times(&Host); + if (buf) { + *buf = Host; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(utime, [](FEXCore::Core::CpuStateFrame* Frame, char* filename, const FEX::HLE::x32::old_utimbuf32* times) -> uint64_t { + struct utimbuf Host {}; + struct utimbuf* Host_p {}; + if (times) { + Host = *times; + Host_p = &Host; + } + uint64_t Result = ::utime(filename, Host_p); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(gettimeofday, [](FEXCore::Core::CpuStateFrame* Frame, timeval32* tv, struct timezone* tz) -> uint64_t { + struct timeval tv64 {}; + struct timeval* tv_ptr {}; + if (tv) { + tv_ptr = &tv64; + } + + uint64_t Result = ::gettimeofday(tv_ptr, tz); + + if (tv) { + *tv = tv64; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(settimeofday, [](FEXCore::Core::CpuStateFrame* Frame, const timeval32* tv, const struct timezone* tz) -> uint64_t { + struct timeval tv64 {}; + struct timeval* tv_ptr {}; + if (tv) { + tv64 = *tv; + tv_ptr = &tv64; + } + + const uint64_t Result = ::settimeofday(tv_ptr, tz); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(nanosleep, [](FEXCore::Core::CpuStateFrame* Frame, const timespec32* req, timespec32* rem) -> uint64_t { + struct timespec rem64 {}; + struct timespec* rem64_ptr {}; + + if (rem) { + rem64 = *rem; + rem64_ptr = &rem64; + } + + uint64_t Result = 0; + if (req) { + const struct timespec req64 = *req; + Result = ::nanosleep(&req64, rem64_ptr); + } else { + Result = ::nanosleep(nullptr, rem64_ptr); + } + + if (rem) { + *rem = rem64; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(clock_gettime, [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clk_id, timespec32* tp) -> uint64_t { + struct timespec tp64 {}; + uint64_t Result = ::clock_gettime(clk_id, &tp64); + if (tp) { + *tp = tp64; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(clock_getres, [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clk_id, timespec32* tp) -> uint64_t { + struct timespec tp64 {}; + uint64_t Result = ::clock_getres(clk_id, &tp64); + if (tp) { + *tp = tp64; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32( + clock_nanosleep, [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clockid, int flags, const timespec32* request, timespec32* remain) -> uint64_t { + struct timespec req64 {}; + struct timespec* req64_ptr {}; + + struct timespec rem64 {}; + struct timespec* rem64_ptr {}; + + if (request) { + req64 = *request; + req64_ptr = &req64; + } + + if (remain) { + rem64 = *remain; + rem64_ptr = &rem64; + } + + // Can't use glibc helper here since it does additional validation and data munging that breaks games. + uint64_t Result = ::syscall(SYSCALL_DEF(clock_nanosleep), clockid, flags, req64_ptr, rem64_ptr); + + if (remain && (flags & TIMER_ABSTIME) == 0) { + // Remain is completely ignored if TIMER_ABSTIME is set. + *remain = rem64; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(clock_settime, [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clockid, const timespec32* tp) -> uint64_t { + if (!tp) { + // clock_settime is required to pass a timespec. + return -EFAULT; + } + + uint64_t Result = 0; + const struct timespec tp64 = *tp; + Result = ::clock_settime(clockid, &tp64); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(futimesat, [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, const timeval32 times[2]) -> uint64_t { + uint64_t Result = 0; + if (times) { + struct timeval times64[2] {}; + times64[0] = times[0]; + times64[1] = times[1]; + Result = ::syscall(SYSCALL_DEF(futimesat), dirfd, pathname, times64); + } else { + Result = ::syscall(SYSCALL_DEF(futimesat), dirfd, pathname, nullptr); + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32( + utimensat, [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, const compat_ptr times, int flags) -> uint64_t { + uint64_t Result = 0; + if (times) { + timespec times64[2] {}; + times64[0] = times[0]; + times64[1] = times[1]; + Result = ::syscall(SYSCALL_DEF(utimensat), dirfd, pathname, times64, flags); + } else { + Result = ::syscall(SYSCALL_DEF(utimensat), dirfd, pathname, nullptr, flags); + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(clock_gettime64, clock_gettime, + [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clk_id, timespec* tp) -> uint64_t { + uint64_t Result = ::clock_gettime(clk_id, tp); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(clock_adjtime64, clock_adjtime, + [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clk_id, struct timex* buf) -> uint64_t { + uint64_t Result = ::clock_adjtime(clk_id, buf); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(clock_settime64, clock_settime, + [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clockid, const struct timespec* tp) -> uint64_t { + uint64_t Result = ::clock_settime(clockid, tp); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(clock_getres_time64, clock_getres, + [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clk_id, timespec* tp) -> uint64_t { + uint64_t Result = ::clock_getres(clk_id, tp); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL( + clock_nanosleep_time64, clock_nanosleep, + [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clockid, int flags, const struct timespec* request, struct timespec* remain) -> uint64_t { + uint64_t Result = ::clock_nanosleep(clockid, flags, request, remain); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL( + utimensat_time64, utimensat, + [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, const struct timespec times[2], int flags) -> uint64_t { + uint64_t Result = ::utimensat(dirfd, pathname, times, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(utimes, [](FEXCore::Core::CpuStateFrame* Frame, const char* filename, const timeval32 times[2]) -> uint64_t { + uint64_t Result = 0; + if (times) { + struct timeval times64[2] {}; + times64[0] = times[0]; + times64[1] = times[1]; + Result = ::utimes(filename, times64); + } else { + Result = ::utimes(filename, nullptr); + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(adjtimex, [](FEXCore::Core::CpuStateFrame* Frame, compat_ptr buf) -> uint64_t { + struct timex Host {}; + Host = *buf; + uint64_t Result = ::adjtimex(&Host); + if (Result != -1) { + *buf = Host; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(clock_adjtime, + [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clk_id, compat_ptr buf) -> uint64_t { + struct timex Host {}; + Host = *buf; + uint64_t Result = ::clock_adjtime(clk_id, &Host); + if (Result != -1) { + *buf = Host; + } + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Timer.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Timer.cpp index 9acac543c6..a7c482d66a 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Timer.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Timer.cpp @@ -18,92 +18,96 @@ tags: LinuxSyscalls|syscalls-x86-32 #include namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } ARG_TO_STR(FEX::HLE::x32::compat_ptr, "%lx") namespace FEX::HLE::x32 { - void RegisterTimer(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X32(timer_settime, [](FEXCore::Core::CpuStateFrame *Frame, - kernel_timer_t timerid, - int flags, - const FEX::HLE::x32::old_itimerspec32 *new_value, - FEX::HLE::x32::old_itimerspec32 *old_value) -> uint64_t { - itimerspec new_value_host{}; - itimerspec old_value_host{}; - itimerspec *old_value_host_p{}; - - new_value_host = *new_value; - if (old_value) { - old_value_host_p = &old_value_host; - } - uint64_t Result = ::syscall(SYSCALL_DEF(timer_settime), timerid, flags, &new_value_host, old_value_host_p); - if (Result != -1 && old_value) { - *old_value = old_value_host; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(timer_gettime, [](FEXCore::Core::CpuStateFrame *Frame, - kernel_timer_t timerid, - FEX::HLE::x32::old_itimerspec32 *curr_value) -> uint64_t { - itimerspec curr_value_host{}; - uint64_t Result = ::syscall(SYSCALL_DEF(timer_gettime), timerid, curr_value_host); - *curr_value = curr_value_host; - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(timer_settime64, timer_settime, [](FEXCore::Core::CpuStateFrame *Frame, kernel_timer_t timerid, int flags, const struct itimerspec *new_value, struct itimerspec *old_value) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(timer_settime), timerid, flags, new_value, old_value); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(timer_gettime64, timer_gettime, [](FEXCore::Core::CpuStateFrame *Frame, kernel_timer_t timerid, struct itimerspec *curr_value) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(timer_gettime), timerid, curr_value); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(getitimer, [](FEXCore::Core::CpuStateFrame *Frame, int which, FEX::HLE::x32::itimerval32 *curr_value) -> uint64_t { - itimerval val{}; - itimerval *val_p{}; - if (curr_value) { - val_p = &val; - } - uint64_t Result = ::getitimer(which, val_p); - if (curr_value) { - *curr_value = val; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(setitimer, [](FEXCore::Core::CpuStateFrame *Frame, int which, const FEX::HLE::x32::itimerval32 *new_value, FEX::HLE::x32::itimerval32 *old_value) -> uint64_t { - itimerval val{}; - itimerval old{}; - itimerval *val_p{}; - itimerval *old_p{}; - - if (new_value) { - val = *new_value; - val_p = &val; - } - - if (old_value) { - old_p = &old; - } - - uint64_t Result = ::setitimer(which, val_p, old_p); - - if (old_value) { - *old_value = old; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X32(timer_create, [](FEXCore::Core::CpuStateFrame *Frame, clockid_t clockid, compat_ptr sevp, kernel_timer_t *timerid) -> uint64_t { - sigevent Host = *sevp; - uint64_t Result = ::syscall(SYSCALL_DEF(timer_create), clockid, &Host, timerid); - SYSCALL_ERRNO(); - }); - } +void RegisterTimer(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X32(timer_settime, + [](FEXCore::Core::CpuStateFrame* Frame, kernel_timer_t timerid, int flags, + const FEX::HLE::x32::old_itimerspec32* new_value, FEX::HLE::x32::old_itimerspec32* old_value) -> uint64_t { + itimerspec new_value_host {}; + itimerspec old_value_host {}; + itimerspec* old_value_host_p {}; + + new_value_host = *new_value; + if (old_value) { + old_value_host_p = &old_value_host; + } + uint64_t Result = ::syscall(SYSCALL_DEF(timer_settime), timerid, flags, &new_value_host, old_value_host_p); + if (Result != -1 && old_value) { + *old_value = old_value_host; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32( + timer_gettime, [](FEXCore::Core::CpuStateFrame* Frame, kernel_timer_t timerid, FEX::HLE::x32::old_itimerspec32* curr_value) -> uint64_t { + itimerspec curr_value_host {}; + uint64_t Result = ::syscall(SYSCALL_DEF(timer_gettime), timerid, curr_value_host); + *curr_value = curr_value_host; + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL(timer_settime64, timer_settime, + [](FEXCore::Core::CpuStateFrame* Frame, kernel_timer_t timerid, int flags, + const struct itimerspec* new_value, struct itimerspec* old_value) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(timer_settime), timerid, flags, new_value, old_value); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32_PASS_MANUAL( + timer_gettime64, timer_gettime, [](FEXCore::Core::CpuStateFrame* Frame, kernel_timer_t timerid, struct itimerspec* curr_value) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(timer_gettime), timerid, curr_value); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(getitimer, [](FEXCore::Core::CpuStateFrame* Frame, int which, FEX::HLE::x32::itimerval32* curr_value) -> uint64_t { + itimerval val {}; + itimerval* val_p {}; + if (curr_value) { + val_p = &val; + } + uint64_t Result = ::getitimer(which, val_p); + if (curr_value) { + *curr_value = val; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32(setitimer, + [](FEXCore::Core::CpuStateFrame* Frame, int which, const FEX::HLE::x32::itimerval32* new_value, + FEX::HLE::x32::itimerval32* old_value) -> uint64_t { + itimerval val {}; + itimerval old {}; + itimerval* val_p {}; + itimerval* old_p {}; + + if (new_value) { + val = *new_value; + val_p = &val; + } + + if (old_value) { + old_p = &old; + } + + uint64_t Result = ::setitimer(which, val_p, old_p); + + if (old_value) { + *old_value = old; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X32( + timer_create, + [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clockid, compat_ptr sevp, kernel_timer_t* timerid) -> uint64_t { + sigevent Host = *sevp; + uint64_t Result = ::syscall(SYSCALL_DEF(timer_create), clockid, &Host, timerid); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x32 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/EPoll.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/EPoll.cpp index 86f3b4065b..08129bd0f7 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/EPoll.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/EPoll.cpp @@ -22,47 +22,61 @@ tags: LinuxSyscalls|syscalls-x86-64 struct timespec; namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } namespace FEX::HLE::x64 { - void RegisterEpoll(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X64(epoll_wait, [](FEXCore::Core::CpuStateFrame *Frame, int epfd, FEX::HLE::epoll_event_x86 *events, int maxevents, int timeout) -> uint64_t { - fextl::vector Events(std::max(0, maxevents)); - uint64_t Result = ::syscall(SYSCALL_DEF(epoll_pwait), epfd, Events.data(), maxevents, timeout, nullptr, 8); +void RegisterEpoll(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X64( + epoll_wait, [](FEXCore::Core::CpuStateFrame* Frame, int epfd, FEX::HLE::epoll_event_x86* events, int maxevents, int timeout) -> uint64_t { + fextl::vector Events(std::max(0, maxevents)); + uint64_t Result = ::syscall(SYSCALL_DEF(epoll_pwait), epfd, Events.data(), maxevents, timeout, nullptr, 8); - if (Result != -1) { - for (size_t i = 0; i < Result; ++i) { - events[i] = Events[i]; - } + if (Result != -1) { + for (size_t i = 0; i < Result; ++i) { + events[i] = Events[i]; } - SYSCALL_ERRNO(); - }); + } + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X64(epoll_ctl, [](FEXCore::Core::CpuStateFrame *Frame, int epfd, int op, int fd, FEX::HLE::epoll_event_x86 *event) -> uint64_t { - struct epoll_event Event; - struct epoll_event *EventPtr{}; - if (event) { - Event = *event; - EventPtr = &Event; - } - uint64_t Result = ::syscall(SYSCALL_DEF(epoll_ctl), epfd, op, fd, EventPtr); - if (Result != -1 && event) { - *event = Event; + REGISTER_SYSCALL_IMPL_X64(epoll_ctl, [](FEXCore::Core::CpuStateFrame* Frame, int epfd, int op, int fd, FEX::HLE::epoll_event_x86* event) -> uint64_t { + struct epoll_event Event; + struct epoll_event* EventPtr {}; + if (event) { + Event = *event; + EventPtr = &Event; + } + uint64_t Result = ::syscall(SYSCALL_DEF(epoll_ctl), epfd, op, fd, EventPtr); + if (Result != -1 && event) { + *event = Event; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64(epoll_pwait, + [](FEXCore::Core::CpuStateFrame* Frame, int epfd, FEX::HLE::epoll_event_x86* events, int maxevent, int timeout, + const uint64_t* sigmask, size_t sigsetsize) -> uint64_t { + fextl::vector Events(std::max(0, maxevent)); + + uint64_t Result = ::syscall(SYSCALL_DEF(epoll_pwait), epfd, Events.data(), maxevent, timeout, sigmask, sigsetsize); + + if (Result != -1) { + for (size_t i = 0; i < Result; ++i) { + events[i] = Events[i]; } - SYSCALL_ERRNO(); - }); + } + + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X64(epoll_pwait, [](FEXCore::Core::CpuStateFrame *Frame, int epfd, FEX::HLE::epoll_event_x86 *events, int maxevent, int timeout, const uint64_t* sigmask, size_t sigsetsize) -> uint64_t { + if (Handler->IsHostKernelVersionAtLeast(5, 11, 0)) { + REGISTER_SYSCALL_IMPL_X64(epoll_pwait2, + [](FEXCore::Core::CpuStateFrame* Frame, int epfd, FEX::HLE::epoll_event_x86* events, int maxevent, + timespec* timeout, const uint64_t* sigmask, size_t sigsetsize) -> uint64_t { fextl::vector Events(std::max(0, maxevent)); - uint64_t Result = ::syscall(SYSCALL_DEF(epoll_pwait), - epfd, - Events.data(), - maxevent, - timeout, - sigmask, - sigsetsize); + uint64_t Result = ::syscall(SYSCALL_DEF(epoll_pwait2), epfd, Events.data(), maxevent, timeout, sigmask, sigsetsize); if (Result != -1) { for (size_t i = 0; i < Result; ++i) { @@ -72,30 +86,8 @@ namespace FEX::HLE::x64 { SYSCALL_ERRNO(); }); - - if (Handler->IsHostKernelVersionAtLeast(5, 11, 0)) { - REGISTER_SYSCALL_IMPL_X64(epoll_pwait2, [](FEXCore::Core::CpuStateFrame *Frame, int epfd, FEX::HLE::epoll_event_x86 *events, int maxevent, timespec *timeout, const uint64_t* sigmask, size_t sigsetsize) -> uint64_t { - fextl::vector Events(std::max(0, maxevent)); - - uint64_t Result = ::syscall(SYSCALL_DEF(epoll_pwait2), - epfd, - Events.data(), - maxevent, - timeout, - sigmask, - sigsetsize); - - if (Result != -1) { - for (size_t i = 0; i < Result; ++i) { - events[i] = Events[i]; - } - } - - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL_X64(epoll_pwait2, UnimplementedSyscallSafe); - } + } else { + REGISTER_SYSCALL_IMPL_X64(epoll_pwait2, UnimplementedSyscallSafe); } } +} // namespace FEX::HLE::x64 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/FD.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/FD.cpp index 8c3d64af50..da63832134 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/FD.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/FD.cpp @@ -28,232 +28,225 @@ tags: LinuxSyscalls|syscalls-x86-64 #include namespace FEX::HLE::x64 { - void RegisterFD(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X64_PASS(poll, [](FEXCore::Core::CpuStateFrame *Frame, struct pollfd *fds, nfds_t nfds, int timeout) -> uint64_t { - uint64_t Result = ::poll(fds, nfds, timeout); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(select, [](FEXCore::Core::CpuStateFrame *Frame, int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout) -> uint64_t { - uint64_t Result = ::select(nfds, readfds, writefds, exceptfds, timeout); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64(fcntl, [](FEXCore::Core::CpuStateFrame *Frame, int fd, int cmd, uint64_t arg) -> uint64_t { - uint64_t Result{}; - switch (cmd) { - case F_GETFL: - Result = ::fcntl(fd, cmd, arg); - if (Result != -1) { - Result = FEX::HLE::RemapToX86Flags(Result); - } - break; - case F_SETFL: - Result = ::fcntl(fd, cmd, FEX::HLE::RemapFromX86Flags(arg)); - break; - default: - Result = ::fcntl(fd, cmd, arg); - break; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(futimesat, [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, const struct timeval times[2]) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(futimesat), dirfd, pathname, times); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(utimensat, [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, const struct timespec times[2], int flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(utimensat), dirfd, pathname, times, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(pselect6, [](FEXCore::Core::CpuStateFrame *Frame, int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timespec *timeout, const void *sigmaskpack) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(pselect6), nfds, readfds, writefds, exceptfds, timeout, sigmaskpack); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64(stat, [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname, FEX::HLE::x64::guest_stat *buf) -> uint64_t { - struct stat host_stat; - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Stat(pathname, &host_stat); - if (Result != -1) { - *buf = host_stat; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64(fstat, [](FEXCore::Core::CpuStateFrame *Frame, int fd, FEX::HLE::x64::guest_stat *buf) -> uint64_t { - struct stat host_stat; - uint64_t Result = ::fstat(fd, &host_stat); - if (Result != -1) { - *buf = host_stat; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64(lstat, [](FEXCore::Core::CpuStateFrame *Frame, const char *path, FEX::HLE::x64::guest_stat *buf) -> uint64_t { - struct stat host_stat; - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Lstat(path, &host_stat); - if (Result != -1) { - *buf = host_stat; - } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(readv, [](FEXCore::Core::CpuStateFrame *Frame, int fd, const struct iovec *iov, int iovcnt) -> uint64_t { - uint64_t Result = ::readv(fd, iov, iovcnt); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(writev, [](FEXCore::Core::CpuStateFrame *Frame, int fd, const struct iovec *iov, int iovcnt) -> uint64_t { - uint64_t Result = ::writev(fd, iov, iovcnt); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(readahead, [](FEXCore::Core::CpuStateFrame *Frame, int fd, off64_t offset, size_t count) -> uint64_t { - uint64_t Result = ::readahead(fd, offset, count); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64(newfstatat, [](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, FEX::HLE::x64::guest_stat *buf, int flag) -> uint64_t { - struct stat host_stat; - uint64_t Result = FEX::HLE::_SyscallHandler->FM.NewFSStatAt(dirfd, pathname, &host_stat, flag); +void RegisterFD(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X64_PASS(poll, [](FEXCore::Core::CpuStateFrame* Frame, struct pollfd* fds, nfds_t nfds, int timeout) -> uint64_t { + uint64_t Result = ::poll(fds, nfds, timeout); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS( + select, [](FEXCore::Core::CpuStateFrame* Frame, int nfds, fd_set* readfds, fd_set* writefds, fd_set* exceptfds, struct timeval* timeout) -> uint64_t { + uint64_t Result = ::select(nfds, readfds, writefds, exceptfds, timeout); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64(fcntl, [](FEXCore::Core::CpuStateFrame* Frame, int fd, int cmd, uint64_t arg) -> uint64_t { + uint64_t Result {}; + switch (cmd) { + case F_GETFL: + Result = ::fcntl(fd, cmd, arg); if (Result != -1) { - *buf = host_stat; + Result = FEX::HLE::RemapToX86Flags(Result); } - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(vmsplice, [](FEXCore::Core::CpuStateFrame *Frame, int fd, const struct iovec *iov, unsigned long nr_segs, unsigned int flags) -> uint64_t { - uint64_t Result = ::vmsplice(fd, iov, nr_segs, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(preadv, [](FEXCore::Core::CpuStateFrame *Frame, - int fd, - const struct iovec *iov, - uint64_t vlen, - uint64_t pos_l, - uint64_t pos_h) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(preadv), fd, iov, vlen, pos_l, pos_h); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(pwritev, [](FEXCore::Core::CpuStateFrame *Frame, - int fd, - const struct iovec *iov, - uint64_t vlen, - uint64_t pos_l, - uint64_t pos_h) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(pwritev), fd, iov, vlen, pos_l, pos_h); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(preadv2, [](FEXCore::Core::CpuStateFrame *Frame, - int fd, - const struct iovec *iov, - uint64_t vlen, - uint64_t pos_l, - uint64_t pos_h, - int flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(preadv2), fd, iov, vlen, pos_l, pos_h, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(pwritev2, [](FEXCore::Core::CpuStateFrame *Frame, - int fd, - const struct iovec *iov, - uint64_t vlen, - uint64_t pos_l, - uint64_t pos_h, - int flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(pwritev2), fd, iov, vlen, pos_l, pos_h, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(pread_64, [](FEXCore::Core::CpuStateFrame *Frame, int fd, void *buf, size_t count, off_t offset) -> uint64_t { - uint64_t Result = ::pread64(fd, buf, count, offset); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(pwrite_64, [](FEXCore::Core::CpuStateFrame *Frame, int fd, void *buf, size_t count, off_t offset) -> uint64_t { - uint64_t Result = ::pwrite64(fd, buf, count, offset); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(process_vm_readv, [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, const struct iovec *local_iov, unsigned long liovcnt, const struct iovec *remote_iov, unsigned long riovcnt, unsigned long flags) -> uint64_t { - uint64_t Result = ::process_vm_readv(pid, local_iov, liovcnt, remote_iov, riovcnt, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(process_vm_writev, [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, const struct iovec *local_iov, unsigned long liovcnt, const struct iovec *remote_iov, unsigned long riovcnt, unsigned long flags) -> uint64_t { - uint64_t Result = ::process_vm_writev(pid, local_iov, liovcnt, remote_iov, riovcnt, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(ppoll, [](FEXCore::Core::CpuStateFrame *Frame, struct pollfd *fds, nfds_t nfds, struct timespec *timeout_ts, const uint64_t *sigmask, size_t sigsetsize) -> uint64_t { - // glibc wrapper doesn't allow timeout_ts to be modified like the kernel does - int Result = ::syscall(SYSCALL_DEF(ppoll), fds, nfds, timeout_ts, sigmask, sigsetsize); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64(getdents, [](FEXCore::Core::CpuStateFrame *Frame, int fd, void *dirp, uint32_t count) -> uint64_t { - return GetDentsEmulation(fd, reinterpret_cast(dirp), count); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(getdents64, [](FEXCore::Core::CpuStateFrame *Frame, int fd, void *dirp, uint32_t count) -> uint64_t { - uint64_t Result = syscall(SYSCALL_DEF(getdents64), - static_cast(fd), - reinterpret_cast(dirp), - static_cast(count)); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(sendfile, [](FEXCore::Core::CpuStateFrame *Frame, int out_fd, int in_fd, off_t *offset, size_t count) -> uint64_t { - uint64_t Result = ::sendfile(out_fd, in_fd, offset, count); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(dup, [](FEXCore::Core::CpuStateFrame *Frame, int oldfd) -> uint64_t { - uint64_t Result = ::dup(oldfd); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(dup2, [](FEXCore::Core::CpuStateFrame *Frame, int oldfd, int newfd) -> uint64_t { - uint64_t Result = ::dup2(oldfd, newfd); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64(statfs, [](FEXCore::Core::CpuStateFrame *Frame, const char *path, struct statfs *buf) -> uint64_t { - uint64_t Result = FEX::HLE::_SyscallHandler->FM.Statfs(path, buf); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(fstatfs, [](FEXCore::Core::CpuStateFrame *Frame, int fd, struct statfs *buf) -> uint64_t { - uint64_t Result = ::fstatfs(fd, buf); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(sync_file_range, [](FEXCore::Core::CpuStateFrame *Frame, int fd, off64_t offset, off64_t nbytes, unsigned int flags) -> uint64_t { - // Flags don't need remapped - uint64_t Result = ::syscall(SYSCALL_DEF(sync_file_range), fd, offset, nbytes, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(fallocate, [](FEXCore::Core::CpuStateFrame *Frame, int fd, int mode, off_t offset, off_t len) -> uint64_t { - uint64_t Result = ::fallocate(fd, mode, offset, len); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(timerfd_settime, [](FEXCore::Core::CpuStateFrame *Frame, int fd, int flags, const struct itimerspec *new_value, struct itimerspec *old_value) -> uint64_t { - // Flags don't need remapped - uint64_t Result = ::timerfd_settime(fd, flags, new_value, old_value); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(timerfd_gettime, [](FEXCore::Core::CpuStateFrame *Frame, int fd, struct itimerspec *curr_value) -> uint64_t { - uint64_t Result = ::timerfd_gettime(fd, curr_value); - SYSCALL_ERRNO(); - }); - } + break; + case F_SETFL: Result = ::fcntl(fd, cmd, FEX::HLE::RemapFromX86Flags(arg)); break; + default: Result = ::fcntl(fd, cmd, arg); break; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS( + futimesat, [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, const struct timeval times[2]) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(futimesat), dirfd, pathname, times); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS( + utimensat, [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, const struct timespec times[2], int flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(utimensat), dirfd, pathname, times, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(pselect6, + [](FEXCore::Core::CpuStateFrame* Frame, int nfds, fd_set* readfds, fd_set* writefds, fd_set* exceptfds, + const struct timespec* timeout, const void* sigmaskpack) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(pselect6), nfds, readfds, writefds, exceptfds, timeout, sigmaskpack); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64(stat, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, FEX::HLE::x64::guest_stat* buf) -> uint64_t { + struct stat host_stat; + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Stat(pathname, &host_stat); + if (Result != -1) { + *buf = host_stat; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64(fstat, [](FEXCore::Core::CpuStateFrame* Frame, int fd, FEX::HLE::x64::guest_stat* buf) -> uint64_t { + struct stat host_stat; + uint64_t Result = ::fstat(fd, &host_stat); + if (Result != -1) { + *buf = host_stat; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64(lstat, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, FEX::HLE::x64::guest_stat* buf) -> uint64_t { + struct stat host_stat; + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Lstat(path, &host_stat); + if (Result != -1) { + *buf = host_stat; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(readv, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec* iov, int iovcnt) -> uint64_t { + uint64_t Result = ::readv(fd, iov, iovcnt); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(writev, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec* iov, int iovcnt) -> uint64_t { + uint64_t Result = ::writev(fd, iov, iovcnt); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(readahead, [](FEXCore::Core::CpuStateFrame* Frame, int fd, off64_t offset, size_t count) -> uint64_t { + uint64_t Result = ::readahead(fd, offset, count); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64( + newfstatat, [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, FEX::HLE::x64::guest_stat* buf, int flag) -> uint64_t { + struct stat host_stat; + uint64_t Result = FEX::HLE::_SyscallHandler->FM.NewFSStatAt(dirfd, pathname, &host_stat, flag); + if (Result != -1) { + *buf = host_stat; + } + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS( + vmsplice, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec* iov, unsigned long nr_segs, unsigned int flags) -> uint64_t { + uint64_t Result = ::vmsplice(fd, iov, nr_segs, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS( + preadv, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec* iov, uint64_t vlen, uint64_t pos_l, uint64_t pos_h) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(preadv), fd, iov, vlen, pos_l, pos_h); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS( + pwritev, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec* iov, uint64_t vlen, uint64_t pos_l, uint64_t pos_h) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(pwritev), fd, iov, vlen, pos_l, pos_h); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(preadv2, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec* iov, uint64_t vlen, uint64_t pos_l, + uint64_t pos_h, int flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(preadv2), fd, iov, vlen, pos_l, pos_h, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(pwritev2, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec* iov, uint64_t vlen, uint64_t pos_l, + uint64_t pos_h, int flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(pwritev2), fd, iov, vlen, pos_l, pos_h, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(pread_64, [](FEXCore::Core::CpuStateFrame* Frame, int fd, void* buf, size_t count, off_t offset) -> uint64_t { + uint64_t Result = ::pread64(fd, buf, count, offset); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(pwrite_64, [](FEXCore::Core::CpuStateFrame* Frame, int fd, void* buf, size_t count, off_t offset) -> uint64_t { + uint64_t Result = ::pwrite64(fd, buf, count, offset); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(process_vm_readv, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, const struct iovec* local_iov, unsigned long liovcnt, + const struct iovec* remote_iov, unsigned long riovcnt, unsigned long flags) -> uint64_t { + uint64_t Result = ::process_vm_readv(pid, local_iov, liovcnt, remote_iov, riovcnt, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(process_vm_writev, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, const struct iovec* local_iov, unsigned long liovcnt, + const struct iovec* remote_iov, unsigned long riovcnt, unsigned long flags) -> uint64_t { + uint64_t Result = ::process_vm_writev(pid, local_iov, liovcnt, remote_iov, riovcnt, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(ppoll, + [](FEXCore::Core::CpuStateFrame* Frame, struct pollfd* fds, nfds_t nfds, struct timespec* timeout_ts, + const uint64_t* sigmask, size_t sigsetsize) -> uint64_t { + // glibc wrapper doesn't allow timeout_ts to be modified like the kernel does + int Result = ::syscall(SYSCALL_DEF(ppoll), fds, nfds, timeout_ts, sigmask, sigsetsize); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64(getdents, [](FEXCore::Core::CpuStateFrame* Frame, int fd, void* dirp, uint32_t count) -> uint64_t { + return GetDentsEmulation(fd, reinterpret_cast(dirp), count); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(getdents64, [](FEXCore::Core::CpuStateFrame* Frame, int fd, void* dirp, uint32_t count) -> uint64_t { + uint64_t Result = syscall(SYSCALL_DEF(getdents64), static_cast(fd), reinterpret_cast(dirp), static_cast(count)); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(sendfile, [](FEXCore::Core::CpuStateFrame* Frame, int out_fd, int in_fd, off_t* offset, size_t count) -> uint64_t { + uint64_t Result = ::sendfile(out_fd, in_fd, offset, count); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(dup, [](FEXCore::Core::CpuStateFrame* Frame, int oldfd) -> uint64_t { + uint64_t Result = ::dup(oldfd); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(dup2, [](FEXCore::Core::CpuStateFrame* Frame, int oldfd, int newfd) -> uint64_t { + uint64_t Result = ::dup2(oldfd, newfd); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64(statfs, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, struct statfs* buf) -> uint64_t { + uint64_t Result = FEX::HLE::_SyscallHandler->FM.Statfs(path, buf); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(fstatfs, [](FEXCore::Core::CpuStateFrame* Frame, int fd, struct statfs* buf) -> uint64_t { + uint64_t Result = ::fstatfs(fd, buf); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS( + sync_file_range, [](FEXCore::Core::CpuStateFrame* Frame, int fd, off64_t offset, off64_t nbytes, unsigned int flags) -> uint64_t { + // Flags don't need remapped + uint64_t Result = ::syscall(SYSCALL_DEF(sync_file_range), fd, offset, nbytes, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(fallocate, [](FEXCore::Core::CpuStateFrame* Frame, int fd, int mode, off_t offset, off_t len) -> uint64_t { + uint64_t Result = ::fallocate(fd, mode, offset, len); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS( + timerfd_settime, + [](FEXCore::Core::CpuStateFrame* Frame, int fd, int flags, const struct itimerspec* new_value, struct itimerspec* old_value) -> uint64_t { + // Flags don't need remapped + uint64_t Result = ::timerfd_settime(fd, flags, new_value, old_value); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(timerfd_gettime, [](FEXCore::Core::CpuStateFrame* Frame, int fd, struct itimerspec* curr_value) -> uint64_t { + uint64_t Result = ::timerfd_gettime(fd, curr_value); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x64 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/IO.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/IO.cpp index 5859310aa1..6575b603c6 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/IO.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/IO.cpp @@ -14,19 +14,23 @@ tags: LinuxSyscalls|syscalls-x86-64 #include namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } namespace FEX::HLE::x64 { - void RegisterIO(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X64_PASS(io_getevents, [](FEXCore::Core::CpuStateFrame *Frame, aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(io_getevents), ctx_id, min_nr, nr, events, timeout); - SYSCALL_ERRNO(); - }); +void RegisterIO(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X64_PASS(io_getevents, + [](FEXCore::Core::CpuStateFrame* Frame, aio_context_t ctx_id, long min_nr, long nr, + struct io_event* events, struct timespec* timeout) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(io_getevents), ctx_id, min_nr, nr, events, timeout); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X64_PASS(io_pgetevents, [](FEXCore::Core::CpuStateFrame *Frame, aio_context_t ctx_id, long min_nr, long nr, struct io_event *events, struct timespec *timeout, const struct io_sigset *usig) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(io_pgetevents), ctx_id, min_nr, nr, events, timeout, usig); - SYSCALL_ERRNO(); - }); - } + REGISTER_SYSCALL_IMPL_X64_PASS(io_pgetevents, + [](FEXCore::Core::CpuStateFrame* Frame, aio_context_t ctx_id, long min_nr, long nr, + struct io_event* events, struct timespec* timeout, const struct io_sigset* usig) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(io_pgetevents), ctx_id, min_nr, nr, events, timeout, usig); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x64 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Info.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Info.cpp index c746044870..0d9e9be3a2 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Info.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Info.cpp @@ -15,38 +15,37 @@ tags: LinuxSyscalls|syscalls-x86-64 #include namespace FEX::HLE::x64 { - void RegisterInfo(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; - - REGISTER_SYSCALL_IMPL_X64_PASS(sysinfo, [](FEXCore::Core::CpuStateFrame *Frame, struct sysinfo *info) -> uint64_t { - uint64_t Result = ::sysinfo(info); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(getrusage, [](FEXCore::Core::CpuStateFrame *Frame, int who, struct rusage *usage) -> uint64_t { - uint64_t Result = ::getrusage(who, usage); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(getrlimit, [](FEXCore::Core::CpuStateFrame *Frame, int resource, struct rlimit *rlim) -> uint64_t { - uint64_t Result = ::getrlimit(resource, rlim); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(setrlimit, [](FEXCore::Core::CpuStateFrame *Frame, int resource, const struct rlimit *rlim) -> uint64_t { - uint64_t Result = ::setrlimit(resource, rlim); - SYSCALL_ERRNO(); +void RegisterInfo(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; + + REGISTER_SYSCALL_IMPL_X64_PASS(sysinfo, [](FEXCore::Core::CpuStateFrame* Frame, struct sysinfo* info) -> uint64_t { + uint64_t Result = ::sysinfo(info); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(getrusage, [](FEXCore::Core::CpuStateFrame* Frame, int who, struct rusage* usage) -> uint64_t { + uint64_t Result = ::getrusage(who, usage); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(getrlimit, [](FEXCore::Core::CpuStateFrame* Frame, int resource, struct rlimit* rlim) -> uint64_t { + uint64_t Result = ::getrlimit(resource, rlim); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(setrlimit, [](FEXCore::Core::CpuStateFrame* Frame, int resource, const struct rlimit* rlim) -> uint64_t { + uint64_t Result = ::setrlimit(resource, rlim); + SYSCALL_ERRNO(); + }); + + if (Handler->IsHostKernelVersionAtLeast(6, 6, 0)) { + REGISTER_SYSCALL_IMPL_X64_PASS_FLAGS(map_shadow_stack, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, uint64_t addr, uint64_t size, uint32_t flags) -> uint64_t { + // Claim that shadow stack isn't supported. + return -EOPNOTSUPP; }); - - if (Handler->IsHostKernelVersionAtLeast(6, 6, 0)) { - REGISTER_SYSCALL_IMPL_X64_PASS_FLAGS(map_shadow_stack, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, uint64_t addr, uint64_t size, uint32_t flags) -> uint64_t { - // Claim that shadow stack isn't supported. - return -EOPNOTSUPP; - }); - } - else { - REGISTER_SYSCALL_IMPL_X64(map_shadow_stack, UnimplementedSyscallSafe); - } + } else { + REGISTER_SYSCALL_IMPL_X64(map_shadow_stack, UnimplementedSyscallSafe); } } +} // namespace FEX::HLE::x64 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl.cpp index d2dc68bda5..7550861ee4 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl.cpp @@ -12,14 +12,14 @@ tags: LinuxSyscalls|syscalls-x86-64 #include namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } namespace FEX::HLE::x64 { - void RegisterIoctl(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X64_PASS(ioctl, [](FEXCore::Core::CpuStateFrame *Frame, int fd, uint64_t request, void *args) -> uint64_t { - uint64_t Result = ::ioctl(fd, request, args); - SYSCALL_ERRNO(); - }); - } +void RegisterIoctl(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X64_PASS(ioctl, [](FEXCore::Core::CpuStateFrame* Frame, int fd, uint64_t request, void* args) -> uint64_t { + uint64_t Result = ::ioctl(fd, request, args); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x64 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Memory.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Memory.cpp index 99ac8dfa9e..894b66f7bf 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Memory.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Memory.cpp @@ -23,127 +23,128 @@ tags: LinuxSyscalls|syscalls-x86-64 namespace FEX::HLE::x64 { - void *x64SyscallHandler::GuestMmap(FEXCore::Core::InternalThreadState *Thread, void *addr, size_t length, int prot, int flags, int fd, off_t offset) { - uint64_t Result{}; - - bool Map32Bit = flags & FEX::HLE::X86_64_MAP_32BIT; - if (Map32Bit) { - Result = (uint64_t)Get32BitAllocator()->Mmap(addr, length, prot,flags, fd, offset); - if (FEX::HLE::HasSyscallError(Result)) { - errno = -Result; - Result = -1; - } - } else { - Result = reinterpret_cast(::mmap(reinterpret_cast(addr), length, prot, flags, fd, offset)); - } - - if (Result != -1) { - FEX::HLE::_SyscallHandler->TrackMmap(Thread, (uintptr_t)Result, length, prot, flags, fd, offset); +void* x64SyscallHandler::GuestMmap(FEXCore::Core::InternalThreadState* Thread, void* addr, size_t length, int prot, int flags, int fd, off_t offset) { + uint64_t Result {}; + + bool Map32Bit = flags & FEX::HLE::X86_64_MAP_32BIT; + if (Map32Bit) { + Result = (uint64_t)Get32BitAllocator()->Mmap(addr, length, prot, flags, fd, offset); + if (FEX::HLE::HasSyscallError(Result)) { + errno = -Result; + Result = -1; } + } else { + Result = reinterpret_cast(::mmap(reinterpret_cast(addr), length, prot, flags, fd, offset)); + } - return reinterpret_cast(Result); + if (Result != -1) { + FEX::HLE::_SyscallHandler->TrackMmap(Thread, (uintptr_t)Result, length, prot, flags, fd, offset); } - int x64SyscallHandler::GuestMunmap(FEXCore::Core::InternalThreadState *Thread, void *addr, uint64_t length) { - uint64_t Result{}; - if (reinterpret_cast(addr) < 0x1'0000'0000ULL) { - Result = Get32BitAllocator()->Munmap(addr, length); - - if (FEX::HLE::HasSyscallError(Result)) { - errno = -Result; - Result = -1; - } - } else { - Result = ::munmap(addr, length); - } + return reinterpret_cast(Result); +} - if (Result != -1) { - FEX::HLE::_SyscallHandler->TrackMunmap(Thread, reinterpret_cast(addr), length); +int x64SyscallHandler::GuestMunmap(FEXCore::Core::InternalThreadState* Thread, void* addr, uint64_t length) { + uint64_t Result {}; + if (reinterpret_cast(addr) < 0x1'0000'0000ULL) { + Result = Get32BitAllocator()->Munmap(addr, length); + + if (FEX::HLE::HasSyscallError(Result)) { + errno = -Result; + Result = -1; } + } else { + Result = ::munmap(addr, length); + } - return Result; + if (Result != -1) { + FEX::HLE::_SyscallHandler->TrackMunmap(Thread, reinterpret_cast(addr), length); } - void RegisterMemory(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; + return Result; +} - REGISTER_SYSCALL_IMPL_X64_FLAGS(mmap, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, void *addr, size_t length, int prot, int flags, int fd, off_t offset) -> uint64_t { - uint64_t Result = (uint64_t) static_cast(FEX::HLE::_SyscallHandler)-> - GuestMmap(Frame->Thread, addr, length, prot, flags, fd, offset); +void RegisterMemory(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_X64_FLAGS( + mmap, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, void* addr, size_t length, int prot, int flags, int fd, off_t offset) -> uint64_t { + uint64_t Result = + (uint64_t) static_cast(FEX::HLE::_SyscallHandler)->GuestMmap(Frame->Thread, addr, length, prot, flags, fd, offset); - REGISTER_SYSCALL_IMPL_X64_FLAGS(munmap, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, void *addr, size_t length) -> uint64_t { - uint64_t Result = static_cast(FEX::HLE::_SyscallHandler)-> - GuestMunmap(Frame->Thread, addr, length); + SYSCALL_ERRNO(); + }); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_X64_FLAGS(munmap, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, void* addr, size_t length) -> uint64_t { + uint64_t Result = static_cast(FEX::HLE::_SyscallHandler)->GuestMunmap(Frame->Thread, addr, length); - REGISTER_SYSCALL_IMPL_X64_FLAGS(mremap, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, void *old_address, size_t old_size, size_t new_size, int flags, void *new_address) -> uint64_t { - uint64_t Result = reinterpret_cast(::mremap(old_address, old_size, new_size, flags, new_address)); + SYSCALL_ERRNO(); + }); - if (Result != -1) { - FEX::HLE::_SyscallHandler->TrackMremap(Frame->Thread, (uintptr_t)old_address, old_size, new_size, flags, Result); - } - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_X64_FLAGS( + mremap, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, void* old_address, size_t old_size, size_t new_size, int flags, void* new_address) -> uint64_t { + uint64_t Result = reinterpret_cast(::mremap(old_address, old_size, new_size, flags, new_address)); - REGISTER_SYSCALL_IMPL_X64_FLAGS(mprotect, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, void *addr, size_t len, int prot) -> uint64_t { - uint64_t Result = ::mprotect(addr, len, prot); + if (Result != -1) { + FEX::HLE::_SyscallHandler->TrackMremap(Frame->Thread, (uintptr_t)old_address, old_size, new_size, flags, Result); + } + SYSCALL_ERRNO(); + }); - if (Result != -1) { - FEX::HLE::_SyscallHandler->TrackMprotect(Frame->Thread, (uintptr_t)addr, len, prot); - } - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_X64_FLAGS(mprotect, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, void* addr, size_t len, int prot) -> uint64_t { + uint64_t Result = ::mprotect(addr, len, prot); - REGISTER_SYSCALL_IMPL_X64_PASS_FLAGS(mlockall, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int flags) -> uint64_t { - uint64_t Result = ::mlockall(flags); - SYSCALL_ERRNO(); - }); + if (Result != -1) { + FEX::HLE::_SyscallHandler->TrackMprotect(Frame->Thread, (uintptr_t)addr, len, prot); + } + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X64_PASS_FLAGS(munlockall, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { - uint64_t Result = ::munlockall(); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_X64_PASS_FLAGS(mlockall, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int flags) -> uint64_t { + uint64_t Result = ::mlockall(flags); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X64_FLAGS(_shmat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int shmid, const void *shmaddr, int shmflg) -> uint64_t { - uint64_t Result = reinterpret_cast(shmat(shmid, shmaddr, shmflg)); + REGISTER_SYSCALL_IMPL_X64_PASS_FLAGS(munlockall, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { + uint64_t Result = ::munlockall(); + SYSCALL_ERRNO(); + }); - if (Result != -1) { - FEX::HLE::_SyscallHandler->TrackShmat(Frame->Thread, shmid, Result, shmflg); - } - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_X64_FLAGS(_shmat, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int shmid, const void* shmaddr, int shmflg) -> uint64_t { + uint64_t Result = reinterpret_cast(shmat(shmid, shmaddr, shmflg)); - REGISTER_SYSCALL_IMPL_X64_FLAGS(_shmdt, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, const void *shmaddr) -> uint64_t { - uint64_t Result = ::shmdt(shmaddr); + if (Result != -1) { + FEX::HLE::_SyscallHandler->TrackShmat(Frame->Thread, shmid, Result, shmflg); + } + SYSCALL_ERRNO(); + }); - if (Result != -1) { - FEX::HLE::_SyscallHandler->TrackShmdt(Frame->Thread, (uintptr_t)shmaddr); - } - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_X64_FLAGS(_shmdt, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, const void* shmaddr) -> uint64_t { + uint64_t Result = ::shmdt(shmaddr); - if (Handler->IsHostKernelVersionAtLeast(5, 10, 0)) { - REGISTER_SYSCALL_IMPL_X64_PASS_FLAGS(process_madvise, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int pidfd, const struct iovec *iovec, size_t vlen, int advice, unsigned int flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(process_madvise), pidfd, iovec, vlen, advice, flags); - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL_X64(process_madvise, UnimplementedSyscallSafe); + if (Result != -1) { + FEX::HLE::_SyscallHandler->TrackShmdt(Frame->Thread, (uintptr_t)shmaddr); } + SYSCALL_ERRNO(); + }); + + if (Handler->IsHostKernelVersionAtLeast(5, 10, 0)) { + REGISTER_SYSCALL_IMPL_X64_PASS_FLAGS( + process_madvise, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int pidfd, const struct iovec* iovec, size_t vlen, int advice, unsigned int flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(process_madvise), pidfd, iovec, vlen, advice, flags); + SYSCALL_ERRNO(); + }); + } else { + REGISTER_SYSCALL_IMPL_X64(process_madvise, UnimplementedSyscallSafe); } } +} // namespace FEX::HLE::x64 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Msg.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Msg.cpp index dd17eb70b9..2a0740634f 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Msg.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Msg.cpp @@ -15,34 +15,40 @@ tags: LinuxSyscalls|syscalls-x86-64 #include namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } namespace FEX::HLE::x64 { - void RegisterMsg(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X64_PASS(mq_timedsend, [](FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::mqd_t mqdes, const char *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(mq_timedsend), mqdes, msg_ptr, msg_len, msg_prio, abs_timeout); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(mq_timedreceive, [](FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::mqd_t mqdes, char *msg_ptr, size_t msg_len, unsigned int *msg_prio, const struct timespec *abs_timeout) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(mq_timedreceive), mqdes, msg_ptr, msg_len, msg_prio, abs_timeout); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(mq_open, [](FEXCore::Core::CpuStateFrame *Frame, const char *name, int oflag, mode_t mode, struct mq_attr *attr) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(mq_open), name, oflag, mode, attr); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(mq_notify, [](FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::mqd_t mqdes, const struct sigevent *sevp) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(mq_notify), mqdes, sevp); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(mq_getsetattr, [](FEXCore::Core::CpuStateFrame *Frame, FEX::HLE::mqd_t mqdes, struct mq_attr *newattr, struct mq_attr *oldattr) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(mq_getsetattr), mqdes, newattr, oldattr); - SYSCALL_ERRNO(); - }); - } +void RegisterMsg(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X64_PASS(mq_timedsend, + [](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::mqd_t mqdes, const char* msg_ptr, size_t msg_len, + unsigned int msg_prio, const struct timespec* abs_timeout) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(mq_timedsend), mqdes, msg_ptr, msg_len, msg_prio, abs_timeout); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(mq_timedreceive, + [](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::mqd_t mqdes, char* msg_ptr, size_t msg_len, + unsigned int* msg_prio, const struct timespec* abs_timeout) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(mq_timedreceive), mqdes, msg_ptr, msg_len, msg_prio, abs_timeout); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS( + mq_open, [](FEXCore::Core::CpuStateFrame* Frame, const char* name, int oflag, mode_t mode, struct mq_attr* attr) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(mq_open), name, oflag, mode, attr); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(mq_notify, [](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::mqd_t mqdes, const struct sigevent* sevp) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(mq_notify), mqdes, sevp); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS( + mq_getsetattr, [](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::mqd_t mqdes, struct mq_attr* newattr, struct mq_attr* oldattr) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(mq_getsetattr), mqdes, newattr, oldattr); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x64 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/NotImplemented.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/NotImplemented.cpp index 4e7e80d744..b9d30b5353 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/NotImplemented.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/NotImplemented.cpp @@ -13,26 +13,26 @@ tags: LinuxSyscalls|syscalls-x86-64 #include namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } namespace FEX::HLE::x64 { -#define REGISTER_SYSCALL_NOT_IMPL_X64(name) REGISTER_SYSCALL_IMPL_X64(name, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { \ - LogMan::Msg::DFmt("Using deprecated/removed syscall: " #name); \ - return -ENOSYS; \ -}); -#define REGISTER_SYSCALL_NO_PERM_X64(name) REGISTER_SYSCALL_IMPL_X64(name, [](FEXCore::Core::CpuStateFrame *Frame) -> uint64_t { \ - return -EPERM; \ -}); +#define REGISTER_SYSCALL_NOT_IMPL_X64(name) \ + REGISTER_SYSCALL_IMPL_X64(name, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { \ + LogMan::Msg::DFmt("Using deprecated/removed syscall: " #name); \ + return -ENOSYS; \ + }); +#define REGISTER_SYSCALL_NO_PERM_X64(name) \ + REGISTER_SYSCALL_IMPL_X64(name, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { return -EPERM; }); - // these are removed/not implemented in the linux kernel we present - void RegisterNotImplemented(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_NOT_IMPL_X64(tuxcall); - REGISTER_SYSCALL_NOT_IMPL_X64(security); - REGISTER_SYSCALL_NOT_IMPL_X64(set_thread_area); - REGISTER_SYSCALL_NOT_IMPL_X64(get_thread_area); - REGISTER_SYSCALL_NOT_IMPL_X64(epoll_ctl_old); - REGISTER_SYSCALL_NOT_IMPL_X64(epoll_wait_old); - REGISTER_SYSCALL_NO_PERM_X64(kexec_file_load); - } +// these are removed/not implemented in the linux kernel we present +void RegisterNotImplemented(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_NOT_IMPL_X64(tuxcall); + REGISTER_SYSCALL_NOT_IMPL_X64(security); + REGISTER_SYSCALL_NOT_IMPL_X64(set_thread_area); + REGISTER_SYSCALL_NOT_IMPL_X64(get_thread_area); + REGISTER_SYSCALL_NOT_IMPL_X64(epoll_ctl_old); + REGISTER_SYSCALL_NOT_IMPL_X64(epoll_wait_old); + REGISTER_SYSCALL_NO_PERM_X64(kexec_file_load); } +} // namespace FEX::HLE::x64 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Sched.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Sched.cpp index d1ef574fe1..b434ef797a 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Sched.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Sched.cpp @@ -13,14 +13,14 @@ tags: LinuxSyscalls|syscalls-x86-64 #include namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } namespace FEX::HLE::x64 { - void RegisterSched(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X64_PASS(sched_rr_get_interval, [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, struct timespec *tp) -> uint64_t { - uint64_t Result = ::sched_rr_get_interval(pid, tp); - SYSCALL_ERRNO(); - }); - } +void RegisterSched(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X64_PASS(sched_rr_get_interval, [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, struct timespec* tp) -> uint64_t { + uint64_t Result = ::sched_rr_get_interval(pid, tp); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x64 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Semaphore.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Semaphore.cpp index 415391d9fc..b0a574ae2d 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Semaphore.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Semaphore.cpp @@ -16,77 +16,74 @@ tags: LinuxSyscalls|syscalls-x86-64 #include namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } ARG_TO_STR(FEX::HLE::x64::semun, "%lx") namespace FEX::HLE::x64 { - void RegisterSemaphore(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X64_PASS(semop, [](FEXCore::Core::CpuStateFrame *Frame, int semid, struct sembuf *sops, size_t nsops) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(semop), semid, sops, nsops); - SYSCALL_ERRNO(); - }); +void RegisterSemaphore(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X64_PASS(semop, [](FEXCore::Core::CpuStateFrame* Frame, int semid, struct sembuf* sops, size_t nsops) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(semop), semid, sops, nsops); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X64_PASS(semtimedop, [](FEXCore::Core::CpuStateFrame *Frame, int semid, struct sembuf *sops, size_t nsops, const struct timespec *timeout) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(semtimedop), semid, sops, nsops, timeout); - SYSCALL_ERRNO(); - }); + REGISTER_SYSCALL_IMPL_X64_PASS( + semtimedop, [](FEXCore::Core::CpuStateFrame* Frame, int semid, struct sembuf* sops, size_t nsops, const struct timespec* timeout) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(semtimedop), semid, sops, nsops, timeout); + SYSCALL_ERRNO(); + }); - REGISTER_SYSCALL_IMPL_X64(semctl, [](FEXCore::Core::CpuStateFrame *Frame, int semid, int semnum, int cmd, FEX::HLE::x64::semun semun) -> uint64_t { - uint64_t Result{}; - switch (cmd) { - case IPC_SET: { - struct semid64_ds buf{}; - buf = *semun.buf; - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf); - if (Result != -1) { - *semun.buf = buf; - } - break; - } - case SEM_STAT: - case SEM_STAT_ANY: - case IPC_STAT: { - struct semid64_ds buf{}; - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf); - if (Result != -1) { - *semun.buf = buf; - } - break; - } - case SEM_INFO: - case IPC_INFO: { - struct fex_seminfo si{}; - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &si); - if (Result != -1) { - memcpy(semun.__buf, &si, sizeof(si)); - } - break; - } - case GETALL: - case SETALL: { - // ptr is just a int32_t* in this case - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun.array); - break; - } - case SETVAL: { - // ptr is just a int32_t in this case - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun.val); - break; - } - case IPC_RMID: - case GETPID: - case GETNCNT: - case GETZCNT: - case GETVAL: - Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun); - break; - default: - LOGMAN_MSG_A_FMT("Unhandled semctl cmd: {}", cmd); - return -EINVAL; + REGISTER_SYSCALL_IMPL_X64(semctl, [](FEXCore::Core::CpuStateFrame* Frame, int semid, int semnum, int cmd, FEX::HLE::x64::semun semun) -> uint64_t { + uint64_t Result {}; + switch (cmd) { + case IPC_SET: { + struct semid64_ds buf {}; + buf = *semun.buf; + Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf); + if (Result != -1) { + *semun.buf = buf; } - SYSCALL_ERRNO(); - }); - } + break; + } + case SEM_STAT: + case SEM_STAT_ANY: + case IPC_STAT: { + struct semid64_ds buf {}; + Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf); + if (Result != -1) { + *semun.buf = buf; + } + break; + } + case SEM_INFO: + case IPC_INFO: { + struct fex_seminfo si {}; + Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &si); + if (Result != -1) { + memcpy(semun.__buf, &si, sizeof(si)); + } + break; + } + case GETALL: + case SETALL: { + // ptr is just a int32_t* in this case + Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun.array); + break; + } + case SETVAL: { + // ptr is just a int32_t in this case + Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun.val); + break; + } + case IPC_RMID: + case GETPID: + case GETNCNT: + case GETZCNT: + case GETVAL: Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun); break; + default: LOGMAN_MSG_A_FMT("Unhandled semctl cmd: {}", cmd); return -EINVAL; + } + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x64 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Signals.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Signals.cpp index 4c68ffb50f..c3b2ded878 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Signals.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Signals.cpp @@ -19,38 +19,41 @@ tags: LinuxSyscalls|syscalls-x86-64 #include namespace FEX::HLE::x64 { - void RegisterSignals(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X64(rt_sigaction, [](FEXCore::Core::CpuStateFrame *Frame, int signum, const GuestSigAction *act, GuestSigAction *oldact, size_t sigsetsize) -> uint64_t { - if (sigsetsize != 8) { - return -EINVAL; - } - - return FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSignalHandler(signum, act, oldact); - }); - - REGISTER_SYSCALL_IMPL_X64(rt_sigtimedwait, [](FEXCore::Core::CpuStateFrame *Frame, uint64_t *set, siginfo_t *info, const struct timespec* timeout, size_t sigsetsize) -> uint64_t { - return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigTimedWait(set, info, timeout, sigsetsize); - }); - - if (Handler->IsHostKernelVersionAtLeast(5, 1, 0)) { - REGISTER_SYSCALL_IMPL_X64_PASS(pidfd_send_signal, [](FEXCore::Core::CpuStateFrame *Frame, int pidfd, int sig, siginfo_t *info, unsigned int flags) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(pidfd_send_signal), pidfd, sig, info, flags); - SYSCALL_ERRNO(); - }); - } - else { - REGISTER_SYSCALL_IMPL_X64(pidfd_send_signal, UnimplementedSyscallSafe); +void RegisterSignals(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X64( + rt_sigaction, [](FEXCore::Core::CpuStateFrame* Frame, int signum, const GuestSigAction* act, GuestSigAction* oldact, size_t sigsetsize) -> uint64_t { + if (sigsetsize != 8) { + return -EINVAL; } - REGISTER_SYSCALL_IMPL_X64_PASS(rt_sigqueueinfo, [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, int sig, siginfo_t *info) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(rt_sigqueueinfo), pid, sig, info); - SYSCALL_ERRNO(); - }); + return FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSignalHandler(signum, act, oldact); + }); + + REGISTER_SYSCALL_IMPL_X64( + rt_sigtimedwait, + [](FEXCore::Core::CpuStateFrame* Frame, uint64_t* set, siginfo_t* info, const struct timespec* timeout, size_t sigsetsize) -> uint64_t { + return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigTimedWait(set, info, timeout, sigsetsize); + }); - REGISTER_SYSCALL_IMPL_X64_PASS(rt_tgsigqueueinfo, [](FEXCore::Core::CpuStateFrame *Frame, pid_t tgid, pid_t tid, int sig, siginfo_t *info) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(rt_tgsigqueueinfo), tgid, tid, sig, info); + if (Handler->IsHostKernelVersionAtLeast(5, 1, 0)) { + REGISTER_SYSCALL_IMPL_X64_PASS( + pidfd_send_signal, [](FEXCore::Core::CpuStateFrame* Frame, int pidfd, int sig, siginfo_t* info, unsigned int flags) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(pidfd_send_signal), pidfd, sig, info, flags); SYSCALL_ERRNO(); }); + } else { + REGISTER_SYSCALL_IMPL_X64(pidfd_send_signal, UnimplementedSyscallSafe); } -} + REGISTER_SYSCALL_IMPL_X64_PASS(rt_sigqueueinfo, [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, int sig, siginfo_t* info) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(rt_sigqueueinfo), pid, sig, info); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(rt_tgsigqueueinfo, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t tgid, pid_t tid, int sig, siginfo_t* info) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(rt_tgsigqueueinfo), tgid, tid, sig, info); + SYSCALL_ERRNO(); + }); +} +} // namespace FEX::HLE::x64 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Socket.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Socket.cpp index 7de85cae1b..1192cd541d 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Socket.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Socket.cpp @@ -12,44 +12,49 @@ tags: LinuxSyscalls|syscalls-x86-64 #include namespace FEXCore::Core { - struct CpuStateFrame; +struct CpuStateFrame; } namespace FEX::HLE::x64 { - void RegisterSocket(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X64_PASS(accept, [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, struct sockaddr *addr, socklen_t *addrlen) -> uint64_t { - uint64_t Result = ::accept(sockfd, addr, addrlen); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(recvmmsg, [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags, struct timespec *timeout) -> uint64_t { - uint64_t Result = ::recvmmsg(sockfd, msgvec, vlen, flags, timeout); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(sendmmsg, [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, struct mmsghdr *msgvec, uint32_t vlen, int flags) -> uint64_t { - uint64_t Result = ::sendmmsg(sockfd, msgvec, vlen, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(sendmsg, [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, const struct msghdr *msg, int flags) -> uint64_t { - uint64_t Result = ::sendmsg(sockfd, msg, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(recvmsg, [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, struct msghdr *msg, int flags) -> uint64_t { - uint64_t Result = ::recvmsg(sockfd, msg, flags); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(setsockopt, [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, int level, int optname, const void *optval, socklen_t optlen) -> uint64_t { - uint64_t Result = ::setsockopt(sockfd, level, optname, optval, optlen); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(getsockopt, [](FEXCore::Core::CpuStateFrame *Frame, int sockfd, int level, int optname, void *optval, socklen_t *optlen) -> uint64_t { - uint64_t Result = ::getsockopt(sockfd, level, optname, optval, optlen); - SYSCALL_ERRNO(); - }); - } +void RegisterSocket(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X64_PASS(accept, [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, struct sockaddr* addr, socklen_t* addrlen) -> uint64_t { + uint64_t Result = ::accept(sockfd, addr, addrlen); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(recvmmsg, + [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, struct mmsghdr* msgvec, unsigned int vlen, int flags, + struct timespec* timeout) -> uint64_t { + uint64_t Result = ::recvmmsg(sockfd, msgvec, vlen, flags, timeout); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS( + sendmmsg, [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, struct mmsghdr* msgvec, uint32_t vlen, int flags) -> uint64_t { + uint64_t Result = ::sendmmsg(sockfd, msgvec, vlen, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(sendmsg, [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, const struct msghdr* msg, int flags) -> uint64_t { + uint64_t Result = ::sendmsg(sockfd, msg, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(recvmsg, [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, struct msghdr* msg, int flags) -> uint64_t { + uint64_t Result = ::recvmsg(sockfd, msg, flags); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS( + setsockopt, [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, int level, int optname, const void* optval, socklen_t optlen) -> uint64_t { + uint64_t Result = ::setsockopt(sockfd, level, optname, optval, optlen); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS( + getsockopt, [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, int level, int optname, void* optval, socklen_t* optlen) -> uint64_t { + uint64_t Result = ::getsockopt(sockfd, level, optname, optval, optlen); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x64 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Syscalls.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Syscalls.cpp index 1b24c4ff5d..c609a9ddb7 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Syscalls.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Syscalls.cpp @@ -12,109 +12,109 @@ tags: LinuxSyscalls|syscalls-x86-64 #include namespace FEX::HLE::x64 { - void RegisterEpoll(FEX::HLE::SyscallHandler *Handler); - void RegisterFD(FEX::HLE::SyscallHandler *Handler); - void RegisterInfo(FEX::HLE::SyscallHandler *Handler); - void RegisterIO(FEX::HLE::SyscallHandler *Handler); - void RegisterIoctl(FEX::HLE::SyscallHandler *Handler); - void RegisterMemory(FEX::HLE::SyscallHandler *Handler); - void RegisterMsg(FEX::HLE::SyscallHandler *Handler); - void RegisterSched(FEX::HLE::SyscallHandler *Handler); - void RegisterSocket(FEX::HLE::SyscallHandler *Handler); - void RegisterSemaphore(FEX::HLE::SyscallHandler *Handler); - void RegisterSignals(FEX::HLE::SyscallHandler *Handler); - void RegisterThread(FEX::HLE::SyscallHandler *Handler); - void RegisterTime(FEX::HLE::SyscallHandler *Handler); - void RegisterNotImplemented(FEX::HLE::SyscallHandler *Handler); +void RegisterEpoll(FEX::HLE::SyscallHandler* Handler); +void RegisterFD(FEX::HLE::SyscallHandler* Handler); +void RegisterInfo(FEX::HLE::SyscallHandler* Handler); +void RegisterIO(FEX::HLE::SyscallHandler* Handler); +void RegisterIoctl(FEX::HLE::SyscallHandler* Handler); +void RegisterMemory(FEX::HLE::SyscallHandler* Handler); +void RegisterMsg(FEX::HLE::SyscallHandler* Handler); +void RegisterSched(FEX::HLE::SyscallHandler* Handler); +void RegisterSocket(FEX::HLE::SyscallHandler* Handler); +void RegisterSemaphore(FEX::HLE::SyscallHandler* Handler); +void RegisterSignals(FEX::HLE::SyscallHandler* Handler); +void RegisterThread(FEX::HLE::SyscallHandler* Handler); +void RegisterTime(FEX::HLE::SyscallHandler* Handler); +void RegisterNotImplemented(FEX::HLE::SyscallHandler* Handler); - x64SyscallHandler::x64SyscallHandler(FEXCore::Context::Context *ctx, FEX::HLE::SignalDelegator *_SignalDelegation) - : SyscallHandler {ctx, _SignalDelegation} { - OSABI = FEXCore::HLE::SyscallOSABI::OS_LINUX64; +x64SyscallHandler::x64SyscallHandler(FEXCore::Context::Context* ctx, FEX::HLE::SignalDelegator* _SignalDelegation) + : SyscallHandler {ctx, _SignalDelegation} { + OSABI = FEXCore::HLE::SyscallOSABI::OS_LINUX64; - RegisterSyscallHandlers(); - } + RegisterSyscallHandlers(); +} - void x64SyscallHandler::RegisterSyscallHandlers() { - auto cvt = [](auto in) { - union { - decltype(in) val; - void *raw; - } raw; - raw.val = in; - return raw.raw; - }; +void x64SyscallHandler::RegisterSyscallHandlers() { + auto cvt = [](auto in) { + union { + decltype(in) val; + void* raw; + } raw; + raw.val = in; + return raw.raw; + }; - Definitions.resize(FEX::HLE::x64::SYSCALL_x64_MAX, SyscallFunctionDefinition { - .NumArgs = 255, - .Ptr = cvt(&UnimplementedSyscall), - }); + Definitions.resize(FEX::HLE::x64::SYSCALL_x64_MAX, SyscallFunctionDefinition { + .NumArgs = 255, + .Ptr = cvt(&UnimplementedSyscall), + }); - FEX::HLE::RegisterEpoll(this); - FEX::HLE::RegisterFD(this); - FEX::HLE::RegisterFS(this); - FEX::HLE::RegisterInfo(this); - FEX::HLE::RegisterIO(this); - FEX::HLE::RegisterIOUring(this); - FEX::HLE::RegisterKey(this); - FEX::HLE::RegisterMemory(this); - FEX::HLE::RegisterMsg(this); - FEX::HLE::RegisterNamespace(this); - FEX::HLE::RegisterSched(this); - FEX::HLE::RegisterSemaphore(this); - FEX::HLE::RegisterSHM(this); - FEX::HLE::RegisterSignals(this); - FEX::HLE::RegisterSocket(this); - FEX::HLE::RegisterThread(this); - FEX::HLE::RegisterTime(this); - FEX::HLE::RegisterTimer(this); - FEX::HLE::RegisterNotImplemented(this); - FEX::HLE::RegisterStubs(this); + FEX::HLE::RegisterEpoll(this); + FEX::HLE::RegisterFD(this); + FEX::HLE::RegisterFS(this); + FEX::HLE::RegisterInfo(this); + FEX::HLE::RegisterIO(this); + FEX::HLE::RegisterIOUring(this); + FEX::HLE::RegisterKey(this); + FEX::HLE::RegisterMemory(this); + FEX::HLE::RegisterMsg(this); + FEX::HLE::RegisterNamespace(this); + FEX::HLE::RegisterSched(this); + FEX::HLE::RegisterSemaphore(this); + FEX::HLE::RegisterSHM(this); + FEX::HLE::RegisterSignals(this); + FEX::HLE::RegisterSocket(this); + FEX::HLE::RegisterThread(this); + FEX::HLE::RegisterTime(this); + FEX::HLE::RegisterTimer(this); + FEX::HLE::RegisterNotImplemented(this); + FEX::HLE::RegisterStubs(this); - // 64bit specific - FEX::HLE::x64::RegisterEpoll(this); - FEX::HLE::x64::RegisterFD(this); - FEX::HLE::x64::RegisterInfo(this); - FEX::HLE::x64::RegisterIO(this); - FEX::HLE::x64::RegisterIoctl(this); - FEX::HLE::x64::RegisterMemory(this); - FEX::HLE::x64::RegisterMsg(this); - FEX::HLE::x64::RegisterSched(this); - FEX::HLE::x64::RegisterSocket(this); - FEX::HLE::x64::RegisterSemaphore(this); - FEX::HLE::x64::RegisterSignals(this); - FEX::HLE::x64::RegisterThread(this); - FEX::HLE::x64::RegisterTime(this); - FEX::HLE::x64::RegisterNotImplemented(this); + // 64bit specific + FEX::HLE::x64::RegisterEpoll(this); + FEX::HLE::x64::RegisterFD(this); + FEX::HLE::x64::RegisterInfo(this); + FEX::HLE::x64::RegisterIO(this); + FEX::HLE::x64::RegisterIoctl(this); + FEX::HLE::x64::RegisterMemory(this); + FEX::HLE::x64::RegisterMsg(this); + FEX::HLE::x64::RegisterSched(this); + FEX::HLE::x64::RegisterSocket(this); + FEX::HLE::x64::RegisterSemaphore(this); + FEX::HLE::x64::RegisterSignals(this); + FEX::HLE::x64::RegisterThread(this); + FEX::HLE::x64::RegisterTime(this); + FEX::HLE::x64::RegisterNotImplemented(this); - // x86-64 has a gap of syscalls in the range of [335, 424) where there aren't any - // These are defined that these must return -ENOSYS - // This allows x86-64 to start using the common syscall numbers - // Fill the gap to ensure that FEX doesn't assert - constexpr int SYSCALL_GAP_BEGIN = 335; - constexpr int SYSCALL_GAP_END = 424; - for (int SyscallNumber = SYSCALL_GAP_BEGIN; SyscallNumber < SYSCALL_GAP_END; ++SyscallNumber) { - auto &Def = Definitions.at(SyscallNumber); - Def.Ptr = cvt(&UnimplementedSyscallSafe); - Def.NumArgs = 0; - Def.Flags = FEXCore::IR::SyscallFlags::DEFAULT; - // This will allow our syscall optimization code to make this code more optimal - // Unlikely to hit a hot path though - Def.HostSyscallNumber = SYSCALL_DEF(MAX); + // x86-64 has a gap of syscalls in the range of [335, 424) where there aren't any + // These are defined that these must return -ENOSYS + // This allows x86-64 to start using the common syscall numbers + // Fill the gap to ensure that FEX doesn't assert + constexpr int SYSCALL_GAP_BEGIN = 335; + constexpr int SYSCALL_GAP_END = 424; + for (int SyscallNumber = SYSCALL_GAP_BEGIN; SyscallNumber < SYSCALL_GAP_END; ++SyscallNumber) { + auto& Def = Definitions.at(SyscallNumber); + Def.Ptr = cvt(&UnimplementedSyscallSafe); + Def.NumArgs = 0; + Def.Flags = FEXCore::IR::SyscallFlags::DEFAULT; + // This will allow our syscall optimization code to make this code more optimal + // Unlikely to hit a hot path though + Def.HostSyscallNumber = SYSCALL_DEF(MAX); #ifdef DEBUG_STRACE - Def.StraceFmt = "Invalid"; + Def.StraceFmt = "Invalid"; #endif - } + } #if PRINT_MISSING_SYSCALLS - for (auto &Syscall: SyscallNames) { - if (Definitions[Syscall.first].Ptr == cvt(&UnimplementedSyscall)) { - LogMan::Msg::DFmt("Unimplemented syscall: {}", Syscall.second); - } + for (auto& Syscall : SyscallNames) { + if (Definitions[Syscall.first].Ptr == cvt(&UnimplementedSyscall)) { + LogMan::Msg::DFmt("Unimplemented syscall: {}", Syscall.second); } -#endif } +#endif +} - fextl::unique_ptr CreateHandler(FEXCore::Context::Context *ctx, FEX::HLE::SignalDelegator *_SignalDelegation) { - return fextl::make_unique(ctx, _SignalDelegation); - } +fextl::unique_ptr CreateHandler(FEXCore::Context::Context* ctx, FEX::HLE::SignalDelegator* _SignalDelegation) { + return fextl::make_unique(ctx, _SignalDelegation); } +} // namespace FEX::HLE::x64 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Thread.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Thread.cpp index 1947d2da34..b2a7f80d83 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Thread.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Thread.cpp @@ -24,25 +24,27 @@ tags: LinuxSyscalls|syscalls-x86-64 #include namespace FEX::HLE::x64 { - uint64_t SetThreadArea(FEXCore::Core::CpuStateFrame *Frame, void *tls) { - Frame->State.fs_cached = reinterpret_cast(tls); - return 0; - } +uint64_t SetThreadArea(FEXCore::Core::CpuStateFrame* Frame, void* tls) { + Frame->State.fs_cached = reinterpret_cast(tls); + return 0; +} - void AdjustRipForNewThread(FEXCore::Core::CpuStateFrame *Frame) { - Frame->State.rip += 2; - } +void AdjustRipForNewThread(FEXCore::Core::CpuStateFrame* Frame) { + Frame->State.rip += 2; +} - void RegisterThread(FEX::HLE::SyscallHandler *Handler) { - using namespace FEXCore::IR; +void RegisterThread(FEX::HLE::SyscallHandler* Handler) { + using namespace FEXCore::IR; - REGISTER_SYSCALL_IMPL_X64_FLAGS(clone, SyscallFlags::DEFAULT, - ([](FEXCore::Core::CpuStateFrame *Frame, uint32_t flags, void *stack, pid_t *parent_tid, pid_t *child_tid, void *tls) -> uint64_t { - FEX::HLE::clone3_args args { - .Type = TypeOfClone::TYPE_CLONE2, - .args = { + REGISTER_SYSCALL_IMPL_X64_FLAGS( + clone, SyscallFlags::DEFAULT, + ([](FEXCore::Core::CpuStateFrame* Frame, uint32_t flags, void* stack, pid_t* parent_tid, pid_t* child_tid, void* tls) -> uint64_t { + FEX::HLE::clone3_args args { + .Type = TypeOfClone::TYPE_CLONE2, + .args = + { .flags = flags, // CSIGNAL is contained in here - .pidfd = 0, // For clone, pidfd is duplicated here + .pidfd = 0, // For clone, pidfd is duplicated here .child_tid = reinterpret_cast(child_tid), .parent_tid = reinterpret_cast(parent_tid), .exit_signal = flags & CSIGNAL, @@ -53,120 +55,117 @@ namespace FEX::HLE::x64 { .set_tid_size = 0, .cgroup = 0, // This syscall can't select cgroups }, - }; - return CloneHandler(Frame, &args); - })); - - REGISTER_SYSCALL_IMPL_X64_PASS_FLAGS(futex, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int *uaddr, int futex_op, int val, const struct timespec *timeout, int *uaddr2, uint32_t val3) -> uint64_t { - uint64_t Result = syscall(SYSCALL_DEF(futex), - uaddr, - futex_op, - val, - timeout, - uaddr2, - val3); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_FLAGS(set_robust_list, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, struct robust_list_head *head, size_t len) -> uint64_t { - auto Thread = Frame->Thread; - Thread->ThreadManager.robust_list_head = reinterpret_cast(head); + }; + return CloneHandler(Frame, &args); + })); + + REGISTER_SYSCALL_IMPL_X64_PASS_FLAGS(futex, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int* uaddr, int futex_op, int val, + const struct timespec* timeout, int* uaddr2, uint32_t val3) -> uint64_t { + uint64_t Result = syscall(SYSCALL_DEF(futex), uaddr, futex_op, val, timeout, uaddr2, val3); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_FLAGS(set_robust_list, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, struct robust_list_head* head, size_t len) -> uint64_t { + auto Thread = Frame->Thread; + Thread->ThreadManager.robust_list_head = reinterpret_cast(head); #ifdef TERMUX_BUILD - // Termux/Android doesn't support `set_robust_list` syscall. - // The seccomp filter that the OS installs explicitly blocks this syscall from working - // glibc uses this syscall for tls and thread data so almost every application uses it - // Return success since we have stored the pointer ourselves. - return 0; + // Termux/Android doesn't support `set_robust_list` syscall. + // The seccomp filter that the OS installs explicitly blocks this syscall from working + // glibc uses this syscall for tls and thread data so almost every application uses it + // Return success since we have stored the pointer ourselves. + return 0; #else uint64_t Result = ::syscall(SYSCALL_DEF(set_robust_list), head, len); SYSCALL_ERRNO(); #endif - }); - - REGISTER_SYSCALL_IMPL_X64_PASS_FLAGS(get_robust_list, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int pid, struct robust_list_head **head, size_t *len_ptr) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(get_robust_list), pid, head, len_ptr); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64(sigaltstack, [](FEXCore::Core::CpuStateFrame *Frame, const stack_t *ss, stack_t *old_ss) -> uint64_t { - return FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSigAltStack(ss, old_ss); - }); - - // launch a new process under fex - // currently does not propagate argv[0] correctly - REGISTER_SYSCALL_IMPL_X64_FLAGS(execve, SyscallFlags::DEFAULT, - [](FEXCore::Core::CpuStateFrame *Frame, const char *pathname, char *const argv[], char *const envp[]) -> uint64_t { - fextl::vector Args; - fextl::vector Envp; - - if (argv) { - for (int i = 0; argv[i]; i++) { - Args.push_back(argv[i]); - } - - Args.push_back(nullptr); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS_FLAGS(get_robust_list, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int pid, struct robust_list_head** head, size_t* len_ptr) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(get_robust_list), pid, head, len_ptr); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64(sigaltstack, [](FEXCore::Core::CpuStateFrame* Frame, const stack_t* ss, stack_t* old_ss) -> uint64_t { + return FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSigAltStack(ss, old_ss); + }); + + // launch a new process under fex + // currently does not propagate argv[0] correctly + REGISTER_SYSCALL_IMPL_X64_FLAGS(execve, SyscallFlags::DEFAULT, + [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, char* const argv[], char* const envp[]) -> uint64_t { + fextl::vector Args; + fextl::vector Envp; + + if (argv) { + for (int i = 0; argv[i]; i++) { + Args.push_back(argv[i]); } - if (envp) { - for (int i = 0; envp[i]; i++) { - Envp.push_back(envp[i]); - } + Args.push_back(nullptr); + } - Envp.push_back(nullptr); + if (envp) { + for (int i = 0; envp[i]; i++) { + Envp.push_back(envp[i]); } - auto* const* ArgsPtr = argv ? const_cast(Args.data()) : nullptr; - auto* const* EnvpPtr = envp ? const_cast(Envp.data()) : nullptr; + Envp.push_back(nullptr); + } - FEX::HLE::ExecveAtArgs AtArgs = FEX::HLE::ExecveAtArgs::Empty(); + auto* const* ArgsPtr = argv ? const_cast(Args.data()) : nullptr; + auto* const* EnvpPtr = envp ? const_cast(Envp.data()) : nullptr; - return FEX::HLE::ExecveHandler(pathname, ArgsPtr, EnvpPtr, AtArgs); - }); + FEX::HLE::ExecveAtArgs AtArgs = FEX::HLE::ExecveAtArgs::Empty(); - REGISTER_SYSCALL_IMPL_X64_FLAGS(execveat, SyscallFlags::DEFAULT, - ([](FEXCore::Core::CpuStateFrame *Frame, int dirfd, const char *pathname, char *const argv[], char *const envp[], int flags) -> uint64_t { - fextl::vector Args; - fextl::vector Envp; + return FEX::HLE::ExecveHandler(pathname, ArgsPtr, EnvpPtr, AtArgs); + }); - if (argv) { - for (int i = 0; argv[i]; i++) { - Args.push_back(argv[i]); - } + REGISTER_SYSCALL_IMPL_X64_FLAGS( + execveat, SyscallFlags::DEFAULT, + ([](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, char* const argv[], char* const envp[], int flags) -> uint64_t { + fextl::vector Args; + fextl::vector Envp; - Args.push_back(nullptr); + if (argv) { + for (int i = 0; argv[i]; i++) { + Args.push_back(argv[i]); } - if (envp) { - for (int i = 0; envp[i]; i++) { - Envp.push_back(envp[i]); - } + Args.push_back(nullptr); + } - Envp.push_back(nullptr); + if (envp) { + for (int i = 0; envp[i]; i++) { + Envp.push_back(envp[i]); } - FEX::HLE::ExecveAtArgs AtArgs { - .dirfd = dirfd, - .flags = flags, - }; - - auto* const* ArgsPtr = argv ? const_cast(Args.data()) : nullptr; - auto* const* EnvpPtr = envp ? const_cast(Envp.data()) : nullptr; - return FEX::HLE::ExecveHandler(pathname, ArgsPtr, EnvpPtr, AtArgs); - })); - - REGISTER_SYSCALL_IMPL_X64_PASS_FLAGS(wait4, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, pid_t pid, int *wstatus, int options, struct rusage *rusage) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(wait4), pid, wstatus, options, rusage); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS_FLAGS(waitid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, - [](FEXCore::Core::CpuStateFrame *Frame, int which, pid_t upid, siginfo_t *infop, int options, struct rusage *rusage) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(waitid), which, upid, infop, options, rusage); - SYSCALL_ERRNO(); - }); - } + Envp.push_back(nullptr); + } + + FEX::HLE::ExecveAtArgs AtArgs { + .dirfd = dirfd, + .flags = flags, + }; + + auto* const* ArgsPtr = argv ? const_cast(Args.data()) : nullptr; + auto* const* EnvpPtr = envp ? const_cast(Envp.data()) : nullptr; + return FEX::HLE::ExecveHandler(pathname, ArgsPtr, EnvpPtr, AtArgs); + })); + + REGISTER_SYSCALL_IMPL_X64_PASS_FLAGS(wait4, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, int* wstatus, int options, struct rusage* rusage) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(wait4), pid, wstatus, options, rusage); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS_FLAGS( + waitid, SyscallFlags::OPTIMIZETHROUGH | SyscallFlags::NOSYNCSTATEONENTRY, + [](FEXCore::Core::CpuStateFrame* Frame, int which, pid_t upid, siginfo_t* infop, int options, struct rusage* rusage) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(waitid), which, upid, infop, options, rusage); + SYSCALL_ERRNO(); + }); } +} // namespace FEX::HLE::x64 diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Time.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Time.cpp index 4df537c678..9ff35edb94 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Time.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Time.cpp @@ -20,96 +20,102 @@ tags: LinuxSyscalls|syscalls-x86-64 #include namespace FEX::HLE::x64 { - void RegisterTime(FEX::HLE::SyscallHandler *Handler) { - REGISTER_SYSCALL_IMPL_X64_PASS(time, [](FEXCore::Core::CpuStateFrame *Frame, time_t *tloc) -> uint64_t { - uint64_t Result = ::time(tloc); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(times, [](FEXCore::Core::CpuStateFrame *Frame, struct tms *buf) -> uint64_t { - uint64_t Result = ::times(buf); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(utime, [](FEXCore::Core::CpuStateFrame *Frame, char* filename, const struct utimbuf* times) -> uint64_t { - uint64_t Result = ::utime(filename, times); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(gettimeofday, [](FEXCore::Core::CpuStateFrame *Frame, struct timeval *tv, struct timezone *tz) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(gettimeofday), tv, tz); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(nanosleep, [](FEXCore::Core::CpuStateFrame *Frame, const struct timespec *req, struct timespec *rem) -> uint64_t { - uint64_t Result = ::nanosleep(req, rem); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(clock_gettime, [](FEXCore::Core::CpuStateFrame *Frame, clockid_t clk_id, struct timespec *tp) -> uint64_t { - uint64_t Result = ::clock_gettime(clk_id, tp); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(clock_getres, [](FEXCore::Core::CpuStateFrame *Frame, clockid_t clk_id, struct timespec *tp) -> uint64_t { - uint64_t Result = ::clock_getres(clk_id, tp); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(clock_nanosleep, [](FEXCore::Core::CpuStateFrame *Frame, clockid_t clockid, int flags, const struct timespec *request, struct timespec *remain) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(clock_nanosleep), clockid, flags, request, remain); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(clock_settime, [](FEXCore::Core::CpuStateFrame *Frame, clockid_t clockid, const struct timespec *tp) -> uint64_t { - uint64_t Result = ::clock_settime(clockid, tp); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(settimeofday, [](FEXCore::Core::CpuStateFrame *Frame, const struct timeval *tv, const struct timezone *tz) -> uint64_t { - uint64_t Result = ::settimeofday(tv, tz); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(utimes, [](FEXCore::Core::CpuStateFrame *Frame, const char *filename, const struct timeval times[2]) -> uint64_t { - uint64_t Result = ::utimes(filename, times); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(getitimer, [](FEXCore::Core::CpuStateFrame *Frame, int which, struct itimerval *curr_value) -> uint64_t { - uint64_t Result = ::getitimer(which, curr_value); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(setitimer, [](FEXCore::Core::CpuStateFrame *Frame, int which, const struct itimerval *new_value, struct itimerval *old_value) -> uint64_t { - uint64_t Result = ::setitimer(which, new_value, old_value); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(timer_settime, [](FEXCore::Core::CpuStateFrame *Frame, kernel_timer_t timerid, int flags, const struct itimerspec *new_value, struct itimerspec *old_value) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(timer_settime), timerid, flags, new_value, old_value); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(timer_gettime, [](FEXCore::Core::CpuStateFrame *Frame, kernel_timer_t timerid, struct itimerspec *curr_value) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(timer_gettime), timerid, curr_value); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(adjtimex, [](FEXCore::Core::CpuStateFrame *Frame, struct timex *buf) -> uint64_t { - uint64_t Result = ::adjtimex(buf); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(clock_adjtime, [](FEXCore::Core::CpuStateFrame *Frame, clockid_t clk_id, struct timex *buf) -> uint64_t { - uint64_t Result = ::clock_adjtime(clk_id, buf); - SYSCALL_ERRNO(); - }); - - REGISTER_SYSCALL_IMPL_X64_PASS(timer_create, [](FEXCore::Core::CpuStateFrame *Frame, clockid_t clockid, struct sigevent *sevp, kernel_timer_t *timerid) -> uint64_t { - uint64_t Result = ::syscall(SYSCALL_DEF(timer_create), clockid, sevp, timerid); - SYSCALL_ERRNO(); - }); - } +void RegisterTime(FEX::HLE::SyscallHandler* Handler) { + REGISTER_SYSCALL_IMPL_X64_PASS(time, [](FEXCore::Core::CpuStateFrame* Frame, time_t* tloc) -> uint64_t { + uint64_t Result = ::time(tloc); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(times, [](FEXCore::Core::CpuStateFrame* Frame, struct tms* buf) -> uint64_t { + uint64_t Result = ::times(buf); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(utime, [](FEXCore::Core::CpuStateFrame* Frame, char* filename, const struct utimbuf* times) -> uint64_t { + uint64_t Result = ::utime(filename, times); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(gettimeofday, [](FEXCore::Core::CpuStateFrame* Frame, struct timeval* tv, struct timezone* tz) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(gettimeofday), tv, tz); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(nanosleep, [](FEXCore::Core::CpuStateFrame* Frame, const struct timespec* req, struct timespec* rem) -> uint64_t { + uint64_t Result = ::nanosleep(req, rem); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(clock_gettime, [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clk_id, struct timespec* tp) -> uint64_t { + uint64_t Result = ::clock_gettime(clk_id, tp); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(clock_getres, [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clk_id, struct timespec* tp) -> uint64_t { + uint64_t Result = ::clock_getres(clk_id, tp); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS( + clock_nanosleep, + [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clockid, int flags, const struct timespec* request, struct timespec* remain) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(clock_nanosleep), clockid, flags, request, remain); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(clock_settime, [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clockid, const struct timespec* tp) -> uint64_t { + uint64_t Result = ::clock_settime(clockid, tp); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(settimeofday, [](FEXCore::Core::CpuStateFrame* Frame, const struct timeval* tv, const struct timezone* tz) -> uint64_t { + uint64_t Result = ::settimeofday(tv, tz); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(utimes, [](FEXCore::Core::CpuStateFrame* Frame, const char* filename, const struct timeval times[2]) -> uint64_t { + uint64_t Result = ::utimes(filename, times); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(getitimer, [](FEXCore::Core::CpuStateFrame* Frame, int which, struct itimerval* curr_value) -> uint64_t { + uint64_t Result = ::getitimer(which, curr_value); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS( + setitimer, [](FEXCore::Core::CpuStateFrame* Frame, int which, const struct itimerval* new_value, struct itimerval* old_value) -> uint64_t { + uint64_t Result = ::setitimer(which, new_value, old_value); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(timer_settime, + [](FEXCore::Core::CpuStateFrame* Frame, kernel_timer_t timerid, int flags, + const struct itimerspec* new_value, struct itimerspec* old_value) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(timer_settime), timerid, flags, new_value, old_value); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS( + timer_gettime, [](FEXCore::Core::CpuStateFrame* Frame, kernel_timer_t timerid, struct itimerspec* curr_value) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(timer_gettime), timerid, curr_value); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(adjtimex, [](FEXCore::Core::CpuStateFrame* Frame, struct timex* buf) -> uint64_t { + uint64_t Result = ::adjtimex(buf); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS(clock_adjtime, [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clk_id, struct timex* buf) -> uint64_t { + uint64_t Result = ::clock_adjtime(clk_id, buf); + SYSCALL_ERRNO(); + }); + + REGISTER_SYSCALL_IMPL_X64_PASS( + timer_create, [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clockid, struct sigevent* sevp, kernel_timer_t* timerid) -> uint64_t { + uint64_t Result = ::syscall(SYSCALL_DEF(timer_create), clockid, sevp, timerid); + SYSCALL_ERRNO(); + }); } - +} // namespace FEX::HLE::x64 diff --git a/Source/Tools/LinuxEmulation/VDSO_Emulation.cpp b/Source/Tools/LinuxEmulation/VDSO_Emulation.cpp index 75f05c82ad..402c1a114d 100644 --- a/Source/Tools/LinuxEmulation/VDSO_Emulation.cpp +++ b/Source/Tools/LinuxEmulation/VDSO_Emulation.cpp @@ -19,586 +19,586 @@ #include namespace FEX::VDSO { - FEXCore::Context::VDSOSigReturn VDSOPointers{}; - namespace VDSOHandlers { - using TimeType = decltype(::time)*; - using GetTimeOfDayType = decltype(::gettimeofday)*; - using ClockGetTimeType = decltype(::clock_gettime)*; - using ClockGetResType = decltype(::clock_getres)*; - using GetCPUType = decltype(FHU::Syscalls::getcpu)*; - - TimeType TimePtr; - GetTimeOfDayType GetTimeOfDayPtr; - ClockGetTimeType ClockGetTimePtr; - ClockGetResType ClockGetResPtr; - GetCPUType GetCPUPtr; +FEXCore::Context::VDSOSigReturn VDSOPointers {}; +namespace VDSOHandlers { + using TimeType = decltype(::time)*; + using GetTimeOfDayType = decltype(::gettimeofday)*; + using ClockGetTimeType = decltype(::clock_gettime)*; + using ClockGetResType = decltype(::clock_getres)*; + using GetCPUType = decltype(FHU::Syscalls::getcpu)*; + + TimeType TimePtr; + GetTimeOfDayType GetTimeOfDayPtr; + ClockGetTimeType ClockGetTimePtr; + ClockGetResType ClockGetResPtr; + GetCPUType GetCPUPtr; +} // namespace VDSOHandlers + +using HandlerPtr = void (*)(void*); +namespace x64 { + static uint64_t SyscallRet(uint64_t Result) { + if (Result == -1) { + return -errno; + } + return Result; } - - using HandlerPtr = void(*)(void*); - namespace x64 { - static uint64_t SyscallRet(uint64_t Result) { - if (Result == -1) { - return -errno; - } - return Result; + // glibc handlers + namespace glibc { + static void time(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + time_t* a_0; + uint64_t rv; + }* args = reinterpret_cast(ArgsRV); + + uint64_t Result = ::time(args->a_0); + args->rv = SyscallRet(Result); } - // glibc handlers - namespace glibc { - static void time(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - time_t *a_0; - uint64_t rv; - } *args = reinterpret_cast(ArgsRV); - - uint64_t Result = ::time(args->a_0); - args->rv = SyscallRet(Result); - } - static void gettimeofday(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - struct timeval *tv; - struct timezone *tz; - int rv; - } *args = reinterpret_cast(ArgsRV); + static void gettimeofday(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + struct timeval* tv; + struct timezone* tz; + int rv; + }* args = reinterpret_cast(ArgsRV); - int Result = ::gettimeofday(args->tv, args->tz); - args->rv = SyscallRet(Result); - } + int Result = ::gettimeofday(args->tv, args->tz); + args->rv = SyscallRet(Result); + } - static void clock_gettime(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - clockid_t clk_id; - struct timespec *tp; - int rv; - } *args = reinterpret_cast(ArgsRV); + static void clock_gettime(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + clockid_t clk_id; + struct timespec* tp; + int rv; + }* args = reinterpret_cast(ArgsRV); - int Result = ::clock_gettime(args->clk_id, args->tp); - args->rv = SyscallRet(Result); - } + int Result = ::clock_gettime(args->clk_id, args->tp); + args->rv = SyscallRet(Result); + } - static void clock_getres(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - clockid_t clk_id; - struct timespec *tp; - int rv; - } *args = reinterpret_cast(ArgsRV); + static void clock_getres(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + clockid_t clk_id; + struct timespec* tp; + int rv; + }* args = reinterpret_cast(ArgsRV); - int Result = ::clock_getres(args->clk_id, args->tp); - args->rv = SyscallRet(Result); - } + int Result = ::clock_getres(args->clk_id, args->tp); + args->rv = SyscallRet(Result); + } - static void getcpu(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - uint32_t *cpu; - uint32_t *node; - int rv; - } *args = reinterpret_cast(ArgsRV); + static void getcpu(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + uint32_t* cpu; + uint32_t* node; + int rv; + }* args = reinterpret_cast(ArgsRV); - int Result = FHU::Syscalls::getcpu(args->cpu, args->node); - args->rv = SyscallRet(Result); - } + int Result = FHU::Syscalls::getcpu(args->cpu, args->node); + args->rv = SyscallRet(Result); } + } // namespace glibc - namespace VDSO { - // VDSO handlers - static void time(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - time_t *a_0; - uint64_t rv; - } *args = reinterpret_cast(ArgsRV); + namespace VDSO { + // VDSO handlers + static void time(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + time_t* a_0; + uint64_t rv; + }* args = reinterpret_cast(ArgsRV); - args->rv = VDSOHandlers::TimePtr(args->a_0); - } + args->rv = VDSOHandlers::TimePtr(args->a_0); + } - static void gettimeofday(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - struct timeval *tv; - struct timezone *tz; - int rv; - } *args = reinterpret_cast(ArgsRV); + static void gettimeofday(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + struct timeval* tv; + struct timezone* tz; + int rv; + }* args = reinterpret_cast(ArgsRV); - args->rv = VDSOHandlers::GetTimeOfDayPtr(args->tv, args->tz); - } + args->rv = VDSOHandlers::GetTimeOfDayPtr(args->tv, args->tz); + } - static void clock_gettime(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - clockid_t clk_id; - struct timespec *tp; - int rv; - } *args = reinterpret_cast(ArgsRV); + static void clock_gettime(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + clockid_t clk_id; + struct timespec* tp; + int rv; + }* args = reinterpret_cast(ArgsRV); - args->rv = VDSOHandlers::ClockGetTimePtr(args->clk_id, args->tp); - } + args->rv = VDSOHandlers::ClockGetTimePtr(args->clk_id, args->tp); + } - static void clock_getres(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - clockid_t clk_id; - struct timespec *tp; - int rv; - } *args = reinterpret_cast(ArgsRV); + static void clock_getres(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + clockid_t clk_id; + struct timespec* tp; + int rv; + }* args = reinterpret_cast(ArgsRV); - args->rv = VDSOHandlers::ClockGetResPtr(args->clk_id, args->tp); - } + args->rv = VDSOHandlers::ClockGetResPtr(args->clk_id, args->tp); + } - static void getcpu(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - uint32_t *cpu; - uint32_t *node; - int rv; - } *args = reinterpret_cast(ArgsRV); + static void getcpu(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + uint32_t* cpu; + uint32_t* node; + int rv; + }* args = reinterpret_cast(ArgsRV); - args->rv = VDSOHandlers::GetCPUPtr(args->cpu, args->node); + args->rv = VDSOHandlers::GetCPUPtr(args->cpu, args->node); + } + } // namespace VDSO + + HandlerPtr Handler_time = FEX::VDSO::x64::glibc::time; + HandlerPtr Handler_gettimeofday = FEX::VDSO::x64::glibc::gettimeofday; + HandlerPtr Handler_clock_gettime = FEX::VDSO::x64::glibc::clock_gettime; + HandlerPtr Handler_clock_getres = FEX::VDSO::x64::glibc::clock_getres; + HandlerPtr Handler_getcpu = FEX::VDSO::x64::glibc::getcpu; +} // namespace x64 +namespace x32 { + namespace glibc { + static int SyscallRet(int Result) { + if (Result == -1) { + return -errno; } + return Result; } - HandlerPtr Handler_time = FEX::VDSO::x64::glibc::time; - HandlerPtr Handler_gettimeofday = FEX::VDSO::x64::glibc::gettimeofday; - HandlerPtr Handler_clock_gettime = FEX::VDSO::x64::glibc::clock_gettime; - HandlerPtr Handler_clock_getres = FEX::VDSO::x64::glibc::clock_getres; - HandlerPtr Handler_getcpu = FEX::VDSO::x64::glibc::getcpu; - } - namespace x32 { - namespace glibc { - static int SyscallRet(int Result) { - if (Result == -1) { - return -errno; - } - return Result; + // glibc handlers + static void time(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + HLE::x32::compat_ptr a_0; + int rv; + }* args = reinterpret_cast(ArgsRV); + + time_t Host {}; + int Result = ::time(&Host); + args->rv = SyscallRet(Result); + if (Result != -1 && args->a_0) { + *args->a_0 = Host; } + } - // glibc handlers - static void time(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - HLE::x32::compat_ptr a_0; - int rv; - } *args = reinterpret_cast(ArgsRV); - - time_t Host{}; - int Result = ::time(&Host); - args->rv = SyscallRet(Result); - if (Result != -1 && args->a_0) { - *args->a_0 = Host; - } + static void gettimeofday(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + HLE::x32::compat_ptr tv; + HLE::x32::compat_ptr tz; + int rv; + }* args = reinterpret_cast(ArgsRV); + + struct timeval tv64 {}; + struct timeval* tv_ptr {}; + if (args->tv) { + tv_ptr = &tv64; } - static void gettimeofday(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - HLE::x32::compat_ptr tv; - HLE::x32::compat_ptr tz; - int rv; - } *args = reinterpret_cast(ArgsRV); - - struct timeval tv64{}; - struct timeval *tv_ptr{}; - if (args->tv) { - tv_ptr = &tv64; - } - - int Result = ::gettimeofday(tv_ptr, args->tz); - args->rv = SyscallRet(Result); + int Result = ::gettimeofday(tv_ptr, args->tz); + args->rv = SyscallRet(Result); - if (Result != -1 && args->tv) { - *args->tv = tv64; - } + if (Result != -1 && args->tv) { + *args->tv = tv64; } + } - static void clock_gettime(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - clockid_t clk_id; - HLE::x32::compat_ptr tp; - int rv; - } *args = reinterpret_cast(ArgsRV); + static void clock_gettime(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + clockid_t clk_id; + HLE::x32::compat_ptr tp; + int rv; + }* args = reinterpret_cast(ArgsRV); - struct timespec tp64{}; - int Result = ::clock_gettime(args->clk_id, &tp64); - args->rv = SyscallRet(Result); + struct timespec tp64 {}; + int Result = ::clock_gettime(args->clk_id, &tp64); + args->rv = SyscallRet(Result); - if (Result != -1 && args->tp) { - *args->tp = tp64; - } + if (Result != -1 && args->tp) { + *args->tp = tp64; } + } - static void clock_gettime64(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - clockid_t clk_id; - HLE::x32::compat_ptr tp; - int rv; - } *args = reinterpret_cast(ArgsRV); + static void clock_gettime64(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + clockid_t clk_id; + HLE::x32::compat_ptr tp; + int rv; + }* args = reinterpret_cast(ArgsRV); - int Result = ::clock_gettime(args->clk_id, args->tp); - args->rv = SyscallRet(Result); - } + int Result = ::clock_gettime(args->clk_id, args->tp); + args->rv = SyscallRet(Result); + } - static void clock_getres(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - clockid_t clk_id; - HLE::x32::compat_ptr tp; - int rv; - } *args = reinterpret_cast(ArgsRV); + static void clock_getres(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + clockid_t clk_id; + HLE::x32::compat_ptr tp; + int rv; + }* args = reinterpret_cast(ArgsRV); - struct timespec tp64{}; + struct timespec tp64 {}; - int Result = ::clock_getres(args->clk_id, &tp64); - args->rv = SyscallRet(Result); + int Result = ::clock_getres(args->clk_id, &tp64); + args->rv = SyscallRet(Result); - if (Result != -1 && args->tp) { - *args->tp = tp64; - } + if (Result != -1 && args->tp) { + *args->tp = tp64; } + } - static void getcpu(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - HLE::x32::compat_ptr cpu; - HLE::x32::compat_ptr node; - int rv; - } *args = reinterpret_cast(ArgsRV); + static void getcpu(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + HLE::x32::compat_ptr cpu; + HLE::x32::compat_ptr node; + int rv; + }* args = reinterpret_cast(ArgsRV); - int Result = ::getcpu(args->cpu, args->node); - args->rv = SyscallRet(Result); - } + int Result = ::getcpu(args->cpu, args->node); + args->rv = SyscallRet(Result); } + } // namespace glibc - namespace VDSO { - static bool SyscallErr(uint64_t Result) { - return Result >= -4095; - } + namespace VDSO { + static bool SyscallErr(uint64_t Result) { + return Result >= -4095; + } - // VDSO handlers - static void time(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - HLE::x32::compat_ptr a_0; - int rv; - } *args = reinterpret_cast(ArgsRV); - - time_t Host{}; - uint64_t Result = VDSOHandlers::TimePtr(&Host); - args->rv = Result; - if (!SyscallErr(Result) && args->a_0) { - *args->a_0 = Host; - } + // VDSO handlers + static void time(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + HLE::x32::compat_ptr a_0; + int rv; + }* args = reinterpret_cast(ArgsRV); + + time_t Host {}; + uint64_t Result = VDSOHandlers::TimePtr(&Host); + args->rv = Result; + if (!SyscallErr(Result) && args->a_0) { + *args->a_0 = Host; } + } - static void gettimeofday(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - HLE::x32::compat_ptr tv; - HLE::x32::compat_ptr tz; - int rv; - } *args = reinterpret_cast(ArgsRV); - - struct timeval tv64{}; - struct timeval *tv_ptr{}; - if (args->tv) { - tv_ptr = &tv64; - } - - uint64_t Result = VDSOHandlers::GetTimeOfDayPtr(tv_ptr, args->tz); - args->rv = Result; - - if (!SyscallErr(Result) && args->tv) { - *args->tv = tv64; - } + static void gettimeofday(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + HLE::x32::compat_ptr tv; + HLE::x32::compat_ptr tz; + int rv; + }* args = reinterpret_cast(ArgsRV); + + struct timeval tv64 {}; + struct timeval* tv_ptr {}; + if (args->tv) { + tv_ptr = &tv64; } - static void clock_gettime(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - clockid_t clk_id; - HLE::x32::compat_ptr tp; - int rv; - } *args = reinterpret_cast(ArgsRV); + uint64_t Result = VDSOHandlers::GetTimeOfDayPtr(tv_ptr, args->tz); + args->rv = Result; - struct timespec tp64{}; - uint64_t Result = VDSOHandlers::ClockGetTimePtr(args->clk_id, &tp64); - args->rv = Result; - - if (!SyscallErr(Result) && args->tp) { - *args->tp = tp64; - } + if (!SyscallErr(Result) && args->tv) { + *args->tv = tv64; } + } - static void clock_gettime64(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - clockid_t clk_id; - HLE::x32::compat_ptr tp; - int rv; - } *args = reinterpret_cast(ArgsRV); + static void clock_gettime(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + clockid_t clk_id; + HLE::x32::compat_ptr tp; + int rv; + }* args = reinterpret_cast(ArgsRV); - args->rv = VDSOHandlers::ClockGetTimePtr(args->clk_id, args->tp); + struct timespec tp64 {}; + uint64_t Result = VDSOHandlers::ClockGetTimePtr(args->clk_id, &tp64); + args->rv = Result; + + if (!SyscallErr(Result) && args->tp) { + *args->tp = tp64; } + } - static void clock_getres(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - clockid_t clk_id; - HLE::x32::compat_ptr tp; - int rv; - } *args = reinterpret_cast(ArgsRV); + static void clock_gettime64(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + clockid_t clk_id; + HLE::x32::compat_ptr tp; + int rv; + }* args = reinterpret_cast(ArgsRV); - struct timespec tp64{}; + args->rv = VDSOHandlers::ClockGetTimePtr(args->clk_id, args->tp); + } - uint64_t Result = VDSOHandlers::ClockGetResPtr(args->clk_id, &tp64); - args->rv = Result; + static void clock_getres(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + clockid_t clk_id; + HLE::x32::compat_ptr tp; + int rv; + }* args = reinterpret_cast(ArgsRV); - if (!SyscallErr(Result) && args->tp) { - *args->tp = tp64; - } - } + struct timespec tp64 {}; - static void getcpu(void* ArgsRV) { - struct __attribute__((packed)) ArgsRV_t { - HLE::x32::compat_ptr cpu; - HLE::x32::compat_ptr node; - int rv; - } *args = reinterpret_cast(ArgsRV); + uint64_t Result = VDSOHandlers::ClockGetResPtr(args->clk_id, &tp64); + args->rv = Result; - args->rv = VDSOHandlers::GetCPUPtr(args->cpu, args->node); + if (!SyscallErr(Result) && args->tp) { + *args->tp = tp64; } } - HandlerPtr Handler_time = FEX::VDSO::x32::glibc::time; - HandlerPtr Handler_gettimeofday = FEX::VDSO::x32::glibc::gettimeofday; - HandlerPtr Handler_clock_gettime = FEX::VDSO::x32::glibc::clock_gettime; - HandlerPtr Handler_clock_gettime64 = FEX::VDSO::x32::glibc::clock_gettime64; - HandlerPtr Handler_clock_getres = FEX::VDSO::x32::glibc::clock_getres; - HandlerPtr Handler_getcpu = FEX::VDSO::x32::glibc::getcpu; - } + static void getcpu(void* ArgsRV) { + struct __attribute__((packed)) ArgsRV_t { + HLE::x32::compat_ptr cpu; + HLE::x32::compat_ptr node; + int rv; + }* args = reinterpret_cast(ArgsRV); - void LoadHostVDSO() { - // dlopen does allocations that FEX can't track. - // Ensure we don't run afoul of the glibc fault allocator. - FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator glibc; - void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); - if (!vdso) { - vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + args->rv = VDSOHandlers::GetCPUPtr(args->cpu, args->node); } + } // namespace VDSO + + HandlerPtr Handler_time = FEX::VDSO::x32::glibc::time; + HandlerPtr Handler_gettimeofday = FEX::VDSO::x32::glibc::gettimeofday; + HandlerPtr Handler_clock_gettime = FEX::VDSO::x32::glibc::clock_gettime; + HandlerPtr Handler_clock_gettime64 = FEX::VDSO::x32::glibc::clock_gettime64; + HandlerPtr Handler_clock_getres = FEX::VDSO::x32::glibc::clock_getres; + HandlerPtr Handler_getcpu = FEX::VDSO::x32::glibc::getcpu; +} // namespace x32 + +void LoadHostVDSO() { + // dlopen does allocations that FEX can't track. + // Ensure we don't run afoul of the glibc fault allocator. + FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator glibc; + void* vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) { + vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + } - if (!vdso) { - // We couldn't load VDSO, fallback to C implementations. Which will still be faster than emulated libc versions. - LogMan::Msg::IFmt("linux-vdso implementation falling back to libc. Consider enabling VDSO in your kernel."); - return; - } + if (!vdso) { + // We couldn't load VDSO, fallback to C implementations. Which will still be faster than emulated libc versions. + LogMan::Msg::IFmt("linux-vdso implementation falling back to libc. Consider enabling VDSO in your kernel."); + return; + } - auto SymbolPtr = dlsym(vdso, "__kernel_time"); - if (!SymbolPtr) { - SymbolPtr = dlsym(vdso, "__vdso_time"); - } - if (SymbolPtr) { - VDSOHandlers::TimePtr = reinterpret_cast(SymbolPtr); - x64::Handler_time = x64::VDSO::time; - x32::Handler_time = x32::VDSO::time; - } + auto SymbolPtr = dlsym(vdso, "__kernel_time"); + if (!SymbolPtr) { + SymbolPtr = dlsym(vdso, "__vdso_time"); + } + if (SymbolPtr) { + VDSOHandlers::TimePtr = reinterpret_cast(SymbolPtr); + x64::Handler_time = x64::VDSO::time; + x32::Handler_time = x32::VDSO::time; + } - SymbolPtr = dlsym(vdso, "__kernel_gettimeofday"); - if (!SymbolPtr) { - SymbolPtr = dlsym(vdso, "__vdso_gettimeofday"); - } + SymbolPtr = dlsym(vdso, "__kernel_gettimeofday"); + if (!SymbolPtr) { + SymbolPtr = dlsym(vdso, "__vdso_gettimeofday"); + } - if (SymbolPtr) { - VDSOHandlers::GetTimeOfDayPtr = reinterpret_cast(SymbolPtr); - x64::Handler_gettimeofday = x64::VDSO::gettimeofday; - x32::Handler_gettimeofday = x32::VDSO::gettimeofday; - } + if (SymbolPtr) { + VDSOHandlers::GetTimeOfDayPtr = reinterpret_cast(SymbolPtr); + x64::Handler_gettimeofday = x64::VDSO::gettimeofday; + x32::Handler_gettimeofday = x32::VDSO::gettimeofday; + } - SymbolPtr = dlsym(vdso, "__kernel_clock_gettime"); - if (!SymbolPtr) { - SymbolPtr = dlsym(vdso, "__vdso_clock_gettime"); - } + SymbolPtr = dlsym(vdso, "__kernel_clock_gettime"); + if (!SymbolPtr) { + SymbolPtr = dlsym(vdso, "__vdso_clock_gettime"); + } - if (SymbolPtr) { - VDSOHandlers::ClockGetTimePtr = reinterpret_cast(SymbolPtr); - x64::Handler_clock_gettime = x64::VDSO::clock_gettime; - x32::Handler_clock_gettime = x32::VDSO::clock_gettime; - x32::Handler_clock_gettime64 = x32::VDSO::clock_gettime64; - } + if (SymbolPtr) { + VDSOHandlers::ClockGetTimePtr = reinterpret_cast(SymbolPtr); + x64::Handler_clock_gettime = x64::VDSO::clock_gettime; + x32::Handler_clock_gettime = x32::VDSO::clock_gettime; + x32::Handler_clock_gettime64 = x32::VDSO::clock_gettime64; + } - SymbolPtr = dlsym(vdso, "__kernel_clock_getres"); - if (!SymbolPtr) { - SymbolPtr = dlsym(vdso, "__vdso_clock_getres"); - } + SymbolPtr = dlsym(vdso, "__kernel_clock_getres"); + if (!SymbolPtr) { + SymbolPtr = dlsym(vdso, "__vdso_clock_getres"); + } - if (SymbolPtr) { - VDSOHandlers::ClockGetResPtr = reinterpret_cast(SymbolPtr); - x64::Handler_clock_getres = x64::VDSO::clock_getres; - x32::Handler_clock_getres = x32::VDSO::clock_getres; - } + if (SymbolPtr) { + VDSOHandlers::ClockGetResPtr = reinterpret_cast(SymbolPtr); + x64::Handler_clock_getres = x64::VDSO::clock_getres; + x32::Handler_clock_getres = x32::VDSO::clock_getres; + } - SymbolPtr = dlsym(vdso, "__kernel_getcpu"); - if (!SymbolPtr) { - SymbolPtr = dlsym(vdso, "__vdso_getcpu"); - } + SymbolPtr = dlsym(vdso, "__kernel_getcpu"); + if (!SymbolPtr) { + SymbolPtr = dlsym(vdso, "__vdso_getcpu"); + } - if (SymbolPtr) { - VDSOHandlers::GetCPUPtr = reinterpret_cast(SymbolPtr); - x64::Handler_getcpu = x64::VDSO::getcpu; - x32::Handler_getcpu = x32::VDSO::getcpu; - } - dlclose(vdso); + if (SymbolPtr) { + VDSOHandlers::GetCPUPtr = reinterpret_cast(SymbolPtr); + x64::Handler_getcpu = x64::VDSO::getcpu; + x32::Handler_getcpu = x32::VDSO::getcpu; } + dlclose(vdso); +} - static fextl::vector VDSODefinitions = { - { - // sha256(libVDSO:time) - { 0x37, 0x63, 0x46, 0xb0, 0x79, 0x06, 0x5f, 0x9d, 0x00, 0xb6, 0x8d, 0xfd, 0x9e, 0x4a, 0x62, 0xcd, 0x1e, 0x6c, 0xcc, 0x22, 0xcd, 0xb2, 0xc0, 0x17, 0x7d, 0x42, 0x6a, 0x40, 0xd1, 0xeb, 0xfa, 0xe0 }, - nullptr, - }, - { - // sha256(libVDSO:gettimeofday) - { 0x77, 0x2a, 0xde, 0x1c, 0x13, 0x2d, 0xe9, 0x48, 0xaf, 0xe0, 0xba, 0xcc, 0x6a, 0x89, 0xff, 0xca, 0x4a, 0xdc, 0xd5, 0x63, 0x2c, 0xc5, 0x62, 0x8b, 0x5d, 0xde, 0x0b, 0x15, 0x35, 0xc6, 0xc7, 0x14 }, - nullptr, - }, - { - // sha256(libVDSO:clock_gettime) - { 0x3c, 0x96, 0x9b, 0x2d, 0xc3, 0xad, 0x2b, 0x3b, 0x9c, 0x4e, 0x4d, 0xca, 0x1c, 0xe8, 0x18, 0x4a, 0x12, 0x8a, 0xe4, 0xc1, 0x56, 0x92, 0x73, 0xce, 0x65, 0x85, 0x5f, 0x65, 0x7e, 0x94, 0x26, 0xbe }, - nullptr, - }, - - { - // sha256(libVDSO:clock_gettime64) - { 0xba, 0xe9, 0x6d, 0x30, 0xc0, 0x68, 0xc6, 0xd7, 0x59, 0x04, 0xf7, 0x10, 0x06, 0x72, 0x88, 0xfd, 0x4c, 0x57, 0x0f, 0x31, 0xa5, 0xea, 0xa9, 0xb9, 0xd3, 0x8d, 0x03, 0x81, 0x50, 0x16, 0x22, 0x71 }, - nullptr, - }, - - { - // sha256(libVDSO:clock_getres) - { 0xe4, 0xa1, 0xf6, 0x23, 0x35, 0xae, 0xb7, 0xb6, 0xb0, 0x37, 0xc5, 0xc3, 0xa3, 0xfd, 0xbf, 0xa2, 0xa1, 0xc8, 0x95, 0x78, 0xe5, 0x76, 0x86, 0xdb, 0x3e, 0x6c, 0x54, 0xd5, 0x02, 0x60, 0xd8, 0x6d }, - nullptr, - }, - { - // sha256(libVDSO:getcpu) - { 0x39, 0x83, 0x39, 0x36, 0x0f, 0x68, 0xd6, 0xfc, 0xc2, 0x3a, 0x97, 0x11, 0x85, 0x09, 0xc7, 0x25, 0xbb, 0x50, 0x49, 0x55, 0x6b, 0x0c, 0x9f, 0x50, 0x37, 0xf5, 0x9d, 0xb0, 0x38, 0x58, 0x57, 0x12 }, - nullptr, - }, - }; - - void LoadGuestVDSOSymbols(bool Is64Bit, char *VDSOBase) { - // We need to load symbols we care about. - if (Is64Bit) { - // We don't care about any 64-bit symbols right now. - return; - } +static fextl::vector VDSODefinitions = { + { + // sha256(libVDSO:time) + {0x37, 0x63, 0x46, 0xb0, 0x79, 0x06, 0x5f, 0x9d, 0x00, 0xb6, 0x8d, 0xfd, 0x9e, 0x4a, 0x62, 0xcd, + 0x1e, 0x6c, 0xcc, 0x22, 0xcd, 0xb2, 0xc0, 0x17, 0x7d, 0x42, 0x6a, 0x40, 0xd1, 0xeb, 0xfa, 0xe0}, + nullptr, + }, + { + // sha256(libVDSO:gettimeofday) + {0x77, 0x2a, 0xde, 0x1c, 0x13, 0x2d, 0xe9, 0x48, 0xaf, 0xe0, 0xba, 0xcc, 0x6a, 0x89, 0xff, 0xca, + 0x4a, 0xdc, 0xd5, 0x63, 0x2c, 0xc5, 0x62, 0x8b, 0x5d, 0xde, 0x0b, 0x15, 0x35, 0xc6, 0xc7, 0x14}, + nullptr, + }, + { + // sha256(libVDSO:clock_gettime) + {0x3c, 0x96, 0x9b, 0x2d, 0xc3, 0xad, 0x2b, 0x3b, 0x9c, 0x4e, 0x4d, 0xca, 0x1c, 0xe8, 0x18, 0x4a, + 0x12, 0x8a, 0xe4, 0xc1, 0x56, 0x92, 0x73, 0xce, 0x65, 0x85, 0x5f, 0x65, 0x7e, 0x94, 0x26, 0xbe}, + nullptr, + }, + + { + // sha256(libVDSO:clock_gettime64) + {0xba, 0xe9, 0x6d, 0x30, 0xc0, 0x68, 0xc6, 0xd7, 0x59, 0x04, 0xf7, 0x10, 0x06, 0x72, 0x88, 0xfd, + 0x4c, 0x57, 0x0f, 0x31, 0xa5, 0xea, 0xa9, 0xb9, 0xd3, 0x8d, 0x03, 0x81, 0x50, 0x16, 0x22, 0x71}, + nullptr, + }, + + { + // sha256(libVDSO:clock_getres) + {0xe4, 0xa1, 0xf6, 0x23, 0x35, 0xae, 0xb7, 0xb6, 0xb0, 0x37, 0xc5, 0xc3, 0xa3, 0xfd, 0xbf, 0xa2, + 0xa1, 0xc8, 0x95, 0x78, 0xe5, 0x76, 0x86, 0xdb, 0x3e, 0x6c, 0x54, 0xd5, 0x02, 0x60, 0xd8, 0x6d}, + nullptr, + }, + { + // sha256(libVDSO:getcpu) + {0x39, 0x83, 0x39, 0x36, 0x0f, 0x68, 0xd6, 0xfc, 0xc2, 0x3a, 0x97, 0x11, 0x85, 0x09, 0xc7, 0x25, + 0xbb, 0x50, 0x49, 0x55, 0x6b, 0x0c, 0x9f, 0x50, 0x37, 0xf5, 0x9d, 0xb0, 0x38, 0x58, 0x57, 0x12}, + nullptr, + }, +}; + +void LoadGuestVDSOSymbols(bool Is64Bit, char* VDSOBase) { + // We need to load symbols we care about. + if (Is64Bit) { + // We don't care about any 64-bit symbols right now. + return; + } - // 32-bit symbol loading. - const Elf32_Ehdr *Header = reinterpret_cast(VDSOBase); + // 32-bit symbol loading. + const Elf32_Ehdr* Header = reinterpret_cast(VDSOBase); - // First walk the section headers to find the symbol table. - const Elf32_Shdr *RawShdrs = - reinterpret_cast(VDSOBase + Header->e_shoff); + // First walk the section headers to find the symbol table. + const Elf32_Shdr* RawShdrs = reinterpret_cast(VDSOBase + Header->e_shoff); - const Elf32_Shdr *StrHeader = &RawShdrs[Header->e_shstrndx]; - char const *SHStrings = VDSOBase + StrHeader->sh_offset; + const Elf32_Shdr* StrHeader = &RawShdrs[Header->e_shstrndx]; + const char* SHStrings = VDSOBase + StrHeader->sh_offset; - const Elf32_Shdr *SymTableHeader{}; - const Elf32_Shdr *StringTableHeader{}; + const Elf32_Shdr* SymTableHeader {}; + const Elf32_Shdr* StringTableHeader {}; - for (size_t i = 0; i < Header->e_shnum; ++i) { - const auto &Header = RawShdrs[i]; - if (Header.sh_type == SHT_SYMTAB && strcmp(&SHStrings[Header.sh_name], ".symtab") == 0) { - SymTableHeader = &Header; - StringTableHeader = &RawShdrs[SymTableHeader->sh_link]; - break; - } + for (size_t i = 0; i < Header->e_shnum; ++i) { + const auto& Header = RawShdrs[i]; + if (Header.sh_type == SHT_SYMTAB && strcmp(&SHStrings[Header.sh_name], ".symtab") == 0) { + SymTableHeader = &Header; + StringTableHeader = &RawShdrs[SymTableHeader->sh_link]; + break; } + } - if (!SymTableHeader) { - // Couldn't find symbol table - return; - } + if (!SymTableHeader) { + // Couldn't find symbol table + return; + } - char const *StrTab = VDSOBase + StringTableHeader->sh_offset; - size_t NumSymbols = SymTableHeader->sh_size / SymTableHeader->sh_entsize; - - for (size_t i = 0; i < NumSymbols; ++i) { - uint64_t offset = SymTableHeader->sh_offset + i * SymTableHeader->sh_entsize; - Elf32_Sym const *Symbol = - reinterpret_cast(VDSOBase + offset); - if (ELF32_ST_VISIBILITY(Symbol->st_other) != STV_HIDDEN && - Symbol->st_value != 0) { - char const * Name = &StrTab[Symbol->st_name]; - if (Name[0] != '\0') { - if (strcmp(Name, "__kernel_sigreturn") == 0) { - VDSOPointers.VDSO_kernel_sigreturn = VDSOBase + Symbol->st_value; - } - else if (strcmp(Name, "__kernel_rt_sigreturn") == 0) { - VDSOPointers.VDSO_kernel_rt_sigreturn = VDSOBase + Symbol->st_value; - } + const char* StrTab = VDSOBase + StringTableHeader->sh_offset; + size_t NumSymbols = SymTableHeader->sh_size / SymTableHeader->sh_entsize; + + for (size_t i = 0; i < NumSymbols; ++i) { + uint64_t offset = SymTableHeader->sh_offset + i * SymTableHeader->sh_entsize; + const Elf32_Sym* Symbol = reinterpret_cast< const Elf32_Sym*>(VDSOBase + offset); + if (ELF32_ST_VISIBILITY(Symbol->st_other) != STV_HIDDEN && Symbol->st_value != 0) { + const char* Name = &StrTab[Symbol->st_name]; + if (Name[0] != '\0') { + if (strcmp(Name, "__kernel_sigreturn") == 0) { + VDSOPointers.VDSO_kernel_sigreturn = VDSOBase + Symbol->st_value; + } else if (strcmp(Name, "__kernel_rt_sigreturn") == 0) { + VDSOPointers.VDSO_kernel_rt_sigreturn = VDSOBase + Symbol->st_value; } } } } +} - void* LoadVDSOThunks(bool Is64Bit, FEX::HLE::SyscallHandler *const Handler) { - void* VDSOBase{}; - FEX_CONFIG_OPT(ThunkGuestLibs, THUNKGUESTLIBS); - FEX_CONFIG_OPT(ThunkGuestLibs32, THUNKGUESTLIBS32); - - fextl::string ThunkGuestPath{}; - if (Is64Bit) { - ThunkGuestPath = fextl::fmt::format("{}/libVDSO-guest.so", ThunkGuestLibs()); - - // Set the Thunk definition pointers for x86-64 - VDSODefinitions[0].ThunkFunction = FEX::VDSO::x64::Handler_time; - VDSODefinitions[1].ThunkFunction = FEX::VDSO::x64::Handler_gettimeofday; - VDSODefinitions[2].ThunkFunction = FEX::VDSO::x64::Handler_clock_gettime; - VDSODefinitions[3].ThunkFunction = FEX::VDSO::x64::Handler_clock_gettime; - VDSODefinitions[4].ThunkFunction = FEX::VDSO::x64::Handler_clock_getres; - VDSODefinitions[5].ThunkFunction = FEX::VDSO::x64::Handler_getcpu; - } - else { - ThunkGuestPath = fextl::fmt::format("{}/libVDSO-guest.so", ThunkGuestLibs32()); - - // Set the Thunk definition pointers for x86 - VDSODefinitions[0].ThunkFunction = FEX::VDSO::x32::Handler_time; - VDSODefinitions[1].ThunkFunction = FEX::VDSO::x32::Handler_gettimeofday; - VDSODefinitions[2].ThunkFunction = FEX::VDSO::x32::Handler_clock_gettime; - VDSODefinitions[3].ThunkFunction = FEX::VDSO::x32::Handler_clock_gettime64; - VDSODefinitions[4].ThunkFunction = FEX::VDSO::x32::Handler_clock_getres; - VDSODefinitions[5].ThunkFunction = FEX::VDSO::x32::Handler_getcpu; - } - - // Load VDSO if we can - int VDSOFD = ::open(ThunkGuestPath.c_str(), O_RDONLY); +void* LoadVDSOThunks(bool Is64Bit, FEX::HLE::SyscallHandler* const Handler) { + void* VDSOBase {}; + FEX_CONFIG_OPT(ThunkGuestLibs, THUNKGUESTLIBS); + FEX_CONFIG_OPT(ThunkGuestLibs32, THUNKGUESTLIBS32); + + fextl::string ThunkGuestPath {}; + if (Is64Bit) { + ThunkGuestPath = fextl::fmt::format("{}/libVDSO-guest.so", ThunkGuestLibs()); + + // Set the Thunk definition pointers for x86-64 + VDSODefinitions[0].ThunkFunction = FEX::VDSO::x64::Handler_time; + VDSODefinitions[1].ThunkFunction = FEX::VDSO::x64::Handler_gettimeofday; + VDSODefinitions[2].ThunkFunction = FEX::VDSO::x64::Handler_clock_gettime; + VDSODefinitions[3].ThunkFunction = FEX::VDSO::x64::Handler_clock_gettime; + VDSODefinitions[4].ThunkFunction = FEX::VDSO::x64::Handler_clock_getres; + VDSODefinitions[5].ThunkFunction = FEX::VDSO::x64::Handler_getcpu; + } else { + ThunkGuestPath = fextl::fmt::format("{}/libVDSO-guest.so", ThunkGuestLibs32()); + + // Set the Thunk definition pointers for x86 + VDSODefinitions[0].ThunkFunction = FEX::VDSO::x32::Handler_time; + VDSODefinitions[1].ThunkFunction = FEX::VDSO::x32::Handler_gettimeofday; + VDSODefinitions[2].ThunkFunction = FEX::VDSO::x32::Handler_clock_gettime; + VDSODefinitions[3].ThunkFunction = FEX::VDSO::x32::Handler_clock_gettime64; + VDSODefinitions[4].ThunkFunction = FEX::VDSO::x32::Handler_clock_getres; + VDSODefinitions[5].ThunkFunction = FEX::VDSO::x32::Handler_getcpu; + } - if (VDSOFD != -1) { - // Get file size - size_t VDSOSize = lseek(VDSOFD, 0, SEEK_END); + // Load VDSO if we can + int VDSOFD = ::open(ThunkGuestPath.c_str(), O_RDONLY); - if (VDSOSize >= 4) { - // Reset to beginning - lseek(VDSOFD, 0, SEEK_SET); - VDSOSize = FEXCore::AlignUp(VDSOSize, 4096); + if (VDSOFD != -1) { + // Get file size + size_t VDSOSize = lseek(VDSOFD, 0, SEEK_END); - // Map the VDSO file to memory - VDSOBase = Handler->GuestMmap(nullptr, nullptr, VDSOSize, PROT_READ, MAP_PRIVATE, VDSOFD, 0); + if (VDSOSize >= 4) { + // Reset to beginning + lseek(VDSOFD, 0, SEEK_SET); + VDSOSize = FEXCore::AlignUp(VDSOSize, 4096); - // Since we found our VDSO thunk library, find our host VDSO function implementations. - LoadHostVDSO(); + // Map the VDSO file to memory + VDSOBase = Handler->GuestMmap(nullptr, nullptr, VDSOSize, PROT_READ, MAP_PRIVATE, VDSOFD, 0); - } - close(VDSOFD); - LoadGuestVDSOSymbols(Is64Bit, reinterpret_cast(VDSOBase)); + // Since we found our VDSO thunk library, find our host VDSO function implementations. + LoadHostVDSO(); } - - return VDSOBase; + close(VDSOFD); + LoadGuestVDSOSymbols(Is64Bit, reinterpret_cast(VDSOBase)); } - uint64_t GetVSyscallEntry(const void* VDSOBase) { - if (!VDSOBase) { - return 0; - } - - // Extract the vsyscall location from the VDSO header. - auto Header = reinterpret_cast(VDSOBase); - - if (Header->e_entry) { - return reinterpret_cast(VDSOBase) + Header->e_entry; - } + return VDSOBase; +} +uint64_t GetVSyscallEntry(const void* VDSOBase) { + if (!VDSOBase) { return 0; } - fextl::vector const& GetVDSOThunkDefinitions() { - return VDSODefinitions; - } + // Extract the vsyscall location from the VDSO header. + auto Header = reinterpret_cast(VDSOBase); - FEXCore::Context::VDSOSigReturn const& GetVDSOSymbols() { - return VDSOPointers; + if (Header->e_entry) { + return reinterpret_cast(VDSOBase) + Header->e_entry; } + + return 0; +} + +const fextl::vector& GetVDSOThunkDefinitions() { + return VDSODefinitions; +} + +const FEXCore::Context::VDSOSigReturn& GetVDSOSymbols() { + return VDSOPointers; } +} // namespace FEX::VDSO diff --git a/Source/Tools/TestHarnessRunner/TestHarnessRunner.cpp b/Source/Tools/TestHarnessRunner/TestHarnessRunner.cpp index 8b8c07dd0f..aa3893b299 100644 --- a/Source/Tools/TestHarnessRunner/TestHarnessRunner.cpp +++ b/Source/Tools/TestHarnessRunner/TestHarnessRunner.cpp @@ -44,11 +44,11 @@ desc: Used to run Assembly tests #include #include -void MsgHandler(LogMan::DebugLevels Level, char const *Message) { +void MsgHandler(LogMan::DebugLevels Level, const char* Message) { fextl::fmt::print("[{}] {}\n", LogMan::DebugLevelStr(Level), Message); } -void AssertHandler(char const *Message) { +void AssertHandler(const char* Message) { fextl::fmt::print("[ASSERT] {}\n", Message); // make sure buffers are flushed @@ -72,7 +72,7 @@ class TestEnvLoader final : public FEXCore::Config::Layer { void Load() override { fextl::unordered_map EnvMap; - for (auto &Option : Env) { + for (auto& Option : Env) { std::string_view Key = Option.first; std::string_view Value_View = Option.second; std::optional Value; @@ -82,21 +82,21 @@ class TestEnvLoader final : public FEXCore::Config::Layer { if (Value) { EnvMap.insert_or_assign(Key, *Value); - } - else { + } else { EnvMap.insert_or_assign(Key, Value_View); } } auto GetVar = [&](const std::string_view id) -> std::optional { const auto it = EnvMap.find(id); - if (it == EnvMap.end()) + if (it == EnvMap.end()) { return std::nullopt; + } return it->second; }; - for (auto &it : EnvConfigLookup) { + for (auto& it : EnvConfigLookup) { if (auto Value = GetVar(it.first); Value) { Set(it.second, *Value); } @@ -106,85 +106,85 @@ class TestEnvLoader final : public FEXCore::Config::Layer { private: fextl::vector> Env; }; -} +} // namespace namespace LongJumpHandler { - static jmp_buf LongJump{}; - static bool DidFault{}; +static jmp_buf LongJump {}; +static bool DidFault {}; #ifndef _WIN32 - void RegisterLongJumpHandler(FEX::HLE::SignalDelegator *Handler) { - Handler->RegisterFrontendHostSignalHandler(SIGSEGV, [](FEXCore::Core::InternalThreadState *Thread, int Signal, void *info, void *ucontext) { - constexpr uint8_t HLT = 0xF4; - if (reinterpret_cast(Thread->CurrentFrame->State.rip)[0] != HLT) { - DidFault = true; - return false; - } - - longjmp(LongJumpHandler::LongJump, 1); +void RegisterLongJumpHandler(FEX::HLE::SignalDelegator* Handler) { + Handler->RegisterFrontendHostSignalHandler( + SIGSEGV, + [](FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) { + constexpr uint8_t HLT = 0xF4; + if (reinterpret_cast(Thread->CurrentFrame->State.rip)[0] != HLT) { + DidFault = true; return false; - }, true); - } + } + + longjmp(LongJumpHandler::LongJump, 1); + return false; + }, + true); +} #else - FEX::DummyHandlers::DummySignalDelegator *Handler; +FEX::DummyHandlers::DummySignalDelegator* Handler; - static void LongJumpHandler() { - longjmp(LongJump, 1); - } +static void LongJumpHandler() { + longjmp(LongJump, 1); +} - LONG WINAPI - VectoredExceptionHandler(struct _EXCEPTION_POINTERS *ExceptionInfo) { - auto Thread = Handler->GetBackingTLSThread(); - PCONTEXT Context; - Context = ExceptionInfo->ContextRecord; - - switch (ExceptionInfo->ExceptionRecord->ExceptionCode) { - case STATUS_DATATYPE_MISALIGNMENT: { - const auto PC = FEX::ArchHelpers::Context::GetPc(Context); - if (!Thread->CPUBackend->IsAddressInCodeBuffer(PC)) { - // Wasn't a sigbus in JIT code - return EXCEPTION_CONTINUE_SEARCH; - } - - const auto Result = FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(true, PC, FEX::ArchHelpers::Context::GetArmGPRs(Context)); - FEX::ArchHelpers::Context::SetPc(Context, PC + Result.second); - return Result.first ? - EXCEPTION_CONTINUE_EXECUTION : - EXCEPTION_CONTINUE_SEARCH; - } - case STATUS_ACCESS_VIOLATION: { - constexpr uint8_t HLT = 0xF4; - if (reinterpret_cast(Thread->CurrentFrame->State.rip)[0] != HLT) { - DidFault = true; - return EXCEPTION_CONTINUE_SEARCH; - } - - FEX::ArchHelpers::Context::SetPc(Context, reinterpret_cast(LongJumpHandler)); - return EXCEPTION_CONTINUE_EXECUTION; - } - default: break; +LONG WINAPI VectoredExceptionHandler(struct _EXCEPTION_POINTERS* ExceptionInfo) { + auto Thread = Handler->GetBackingTLSThread(); + PCONTEXT Context; + Context = ExceptionInfo->ContextRecord; + + switch (ExceptionInfo->ExceptionRecord->ExceptionCode) { + case STATUS_DATATYPE_MISALIGNMENT: { + const auto PC = FEX::ArchHelpers::Context::GetPc(Context); + if (!Thread->CPUBackend->IsAddressInCodeBuffer(PC)) { + // Wasn't a sigbus in JIT code + return EXCEPTION_CONTINUE_SEARCH; } - printf("!Fault!\n"); - printf("\tExceptionCode: 0x%lx\n", ExceptionInfo->ExceptionRecord->ExceptionCode); - printf("\tExceptionFlags: 0x%lx\n", ExceptionInfo->ExceptionRecord->ExceptionFlags); - printf("\tExceptionRecord: 0x%p\n", ExceptionInfo->ExceptionRecord->ExceptionRecord); - printf("\tExceptionAddress: 0x%p\n", ExceptionInfo->ExceptionRecord->ExceptionAddress); - printf("\tNumberParameters: 0x%lx\n", ExceptionInfo->ExceptionRecord->NumberParameters); + const auto Result = FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(true, PC, FEX::ArchHelpers::Context::GetArmGPRs(Context)); + FEX::ArchHelpers::Context::SetPc(Context, PC + Result.second); + return Result.first ? EXCEPTION_CONTINUE_EXECUTION : EXCEPTION_CONTINUE_SEARCH; + } + case STATUS_ACCESS_VIOLATION: { + constexpr uint8_t HLT = 0xF4; + if (reinterpret_cast(Thread->CurrentFrame->State.rip)[0] != HLT) { + DidFault = true; + return EXCEPTION_CONTINUE_SEARCH; + } - return EXCEPTION_CONTINUE_SEARCH; + FEX::ArchHelpers::Context::SetPc(Context, reinterpret_cast(LongJumpHandler)); + return EXCEPTION_CONTINUE_EXECUTION; + } + default: break; } - void RegisterLongJumpHandler(FEX::DummyHandlers::DummySignalDelegator *Handler) { - // Install VEH handler. - AddVectoredExceptionHandler(0, VectoredExceptionHandler); + printf("!Fault!\n"); + printf("\tExceptionCode: 0x%lx\n", ExceptionInfo->ExceptionRecord->ExceptionCode); + printf("\tExceptionFlags: 0x%lx\n", ExceptionInfo->ExceptionRecord->ExceptionFlags); + printf("\tExceptionRecord: 0x%p\n", ExceptionInfo->ExceptionRecord->ExceptionRecord); + printf("\tExceptionAddress: 0x%p\n", ExceptionInfo->ExceptionRecord->ExceptionAddress); + printf("\tNumberParameters: 0x%lx\n", ExceptionInfo->ExceptionRecord->NumberParameters); - LongJumpHandler::Handler = Handler; - } -#endif + return EXCEPTION_CONTINUE_SEARCH; } -int main(int argc, char **argv, char **const envp) { +void RegisterLongJumpHandler(FEX::DummyHandlers::DummySignalDelegator* Handler) { + // Install VEH handler. + AddVectoredExceptionHandler(0, VectoredExceptionHandler); + + LongJumpHandler::Handler = Handler; +} +#endif +} // namespace LongJumpHandler + +int main(int argc, char** argv, char** const envp) { #ifndef _WIN32 auto SBRKPointer = FEXCore::Allocator::DisableSBRKAllocations(); #endif @@ -217,7 +217,7 @@ int main(int argc, char **argv, char **const envp) { return -1; } - FEX::HarnessHelper::HarnessCodeLoader Loader{Filename, ConfigFile}; + FEX::HarnessHelper::HarnessCodeLoader Loader {Filename, ConfigFile}; // Adds in environment options from the test harness config FEXCore::Config::AddLayer(fextl::make_unique(Loader.GetEnvironmentOptions())); @@ -264,20 +264,14 @@ int main(int argc, char **argv, char **const envp) { SupportsAVX = HostFeatures.SupportsAVX; SupportsAVX2 = HostFeatures.SupportsAVX2; - bool TestUnsupported = - (!HostFeatures.Supports3DNow && Loader.Requires3DNow()) || - (!HostFeatures.SupportsSSE4A && Loader.RequiresSSE4A()) || - (!SupportsAVX && Loader.RequiresAVX()) || - (!SupportsAVX2 && Loader.RequiresAVX2()) || - (!HostFeatures.SupportsRAND && Loader.RequiresRAND()) || - (!HostFeatures.SupportsSHA && Loader.RequiresSHA()) || - (!HostFeatures.SupportsCLZERO && Loader.RequiresCLZERO()) || - (!HostFeatures.SupportsBMI1 && Loader.RequiresBMI1()) || - (!HostFeatures.SupportsBMI2 && Loader.RequiresBMI2()) || - (!HostFeatures.SupportsCLWB && Loader.RequiresCLWB()); + bool TestUnsupported = (!HostFeatures.Supports3DNow && Loader.Requires3DNow()) || (!HostFeatures.SupportsSSE4A && Loader.RequiresSSE4A()) || + (!SupportsAVX && Loader.RequiresAVX()) || (!SupportsAVX2 && Loader.RequiresAVX2()) || + (!HostFeatures.SupportsRAND && Loader.RequiresRAND()) || (!HostFeatures.SupportsSHA && Loader.RequiresSHA()) || + (!HostFeatures.SupportsCLZERO && Loader.RequiresCLZERO()) || (!HostFeatures.SupportsBMI1 && Loader.RequiresBMI1()) || + (!HostFeatures.SupportsBMI2 && Loader.RequiresBMI2()) || (!HostFeatures.SupportsCLWB && Loader.RequiresCLWB()); #ifdef _WIN32 - TestUnsupported |= Loader.RequiresLinux(); + TestUnsupported |= Loader.RequiresLinux(); #endif if (TestUnsupported) { @@ -286,8 +280,8 @@ int main(int argc, char **argv, char **const envp) { if (Core != FEXCore::Config::CONFIG_CUSTOM) { #ifndef _WIN32 - auto SyscallHandler = Loader.Is64BitMode() ? FEX::HLE::x64::CreateHandler(CTX.get(), SignalDelegation.get()) - : FEX::HLE::x32::CreateHandler(CTX.get(), SignalDelegation.get(), std::move(Allocator)); + auto SyscallHandler = Loader.Is64BitMode() ? FEX::HLE::x64::CreateHandler(CTX.get(), SignalDelegation.get()) : + FEX::HLE::x32::CreateHandler(CTX.get(), SignalDelegation.get(), std::move(Allocator)); #else auto SyscallHandler = FEX::WindowsHandlers::CreateSyscallHandler(); @@ -362,4 +356,3 @@ int main(int argc, char **argv, char **const envp) { return Passed ? 0 : -1; } - diff --git a/Source/Tools/TestHarnessRunner/TestHarnessRunner/HostRunner.cpp b/Source/Tools/TestHarnessRunner/TestHarnessRunner/HostRunner.cpp index 83130700e1..1e29089e56 100644 --- a/Source/Tools/TestHarnessRunner/TestHarnessRunner/HostRunner.cpp +++ b/Source/Tools/TestHarnessRunner/TestHarnessRunner/HostRunner.cpp @@ -30,14 +30,17 @@ using namespace Xbyak; #ifdef _M_X86_64 -static inline int modify_ldt(int func, void *ldt) { return ::syscall(SYS_modify_ldt, func, ldt, sizeof(struct user_desc)); } +static inline int modify_ldt(int func, void* ldt) { + return ::syscall(SYS_modify_ldt, func, ldt, sizeof(struct user_desc)); +} class x86HostRunner final : public Xbyak::CodeGenerator { public: using AsmDispatch = void (*)(uintptr_t InitialRip, uintptr_t InitialStack); AsmDispatch DispatchPtr; - x86HostRunner() : CodeGenerator(4096) { + x86HostRunner() + : CodeGenerator(4096) { Setup32BitCodeSegment(); DispatchPtr = getCurr(); @@ -89,10 +92,10 @@ class x86HostRunner final : public Xbyak::CodeGenerator { GetCodeSegmentEntryLocation = getCurr(); hlt(); - Label Gate{}; + Label Gate {}; // Patch gate entry point // mov(dword[rip + Gate], edi) - jmp(qword [rip + Gate], LabelType::T_FAR); + jmp(qword[rip + Gate], LabelType::T_FAR); L(Gate); dd(0x1'0000); // This is a 32-bit offset from the start of the gate. We start at 0x1'0000 + 0 @@ -114,14 +117,14 @@ class x86HostRunner final : public Xbyak::CodeGenerator { ready(); } - bool HandleSIGSEGV(FEXCore::Core::CPUState *OutState, int Signal, void *info, void *ucontext) { - ucontext_t *_context = (ucontext_t *)ucontext; - mcontext_t *_mcontext = &_context->uc_mcontext; + bool HandleSIGSEGV(FEXCore::Core::CPUState* OutState, int Signal, void* info, void* ucontext) { + ucontext_t* _context = (ucontext_t*)ucontext; + mcontext_t* _mcontext = &_context->uc_mcontext; // Check our current instruction that we just executed to ensure it was an HLT - uint8_t *Inst{}; + uint8_t* Inst {}; - Inst = reinterpret_cast(_mcontext->gregs[REG_RIP]); + Inst = reinterpret_cast(_mcontext->gregs[REG_RIP]); if (!Is64BitMode()) { if (_mcontext->gregs[REG_RIP] == GetCodeSegmentEntryLocation) { // Backup the CSGSFS register @@ -162,8 +165,7 @@ class x86HostRunner final : public Xbyak::CodeGenerator { const auto* reserved = &xstate->fpstate.sw_reserved; if (reserved->HasExtendedContext() && reserved->HasYMMH()) { for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) { - memcpy(&OutState->xmm.avx.data[i][2], &xstate->ymmh.ymmh_space[i], - sizeof(xstate->ymmh.ymmh_space[0])); + memcpy(&OutState->xmm.avx.data[i][2], &xstate->ymmh.ymmh_space[i], sizeof(xstate->ymmh.ymmh_space[0])); } } @@ -190,8 +192,8 @@ class x86HostRunner final : public Xbyak::CodeGenerator { private: FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE); - int CodeSegmentEntry{}; - int GlobalCodeSegmentEntry{}; + int CodeSegmentEntry {}; + int GlobalCodeSegmentEntry {}; uint64_t GetCodeSegmentEntryLocation; uint64_t ReturningStackLocation; @@ -266,22 +268,21 @@ class x86HostRunner final : public Xbyak::CodeGenerator { } }; -void RunAsHost(fextl::unique_ptr &SignalDelegation, uintptr_t InitialRip, uintptr_t StackPointer, - FEXCore::Core::CPUState *OutputState) { +void RunAsHost(fextl::unique_ptr& SignalDelegation, uintptr_t InitialRip, uintptr_t StackPointer, + FEXCore::Core::CPUState* OutputState) { x86HostRunner runner; SignalDelegation->RegisterHostSignalHandler( SIGSEGV, - [&runner, OutputState](FEXCore::Core::InternalThreadState *Thread, int Signal, void *info, void *ucontext) -> bool { - return runner.HandleSIGSEGV(OutputState, Signal, info, ucontext); - }, - true - ); + [&runner, OutputState](FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) -> bool { + return runner.HandleSIGSEGV(OutputState, Signal, info, ucontext); + }, + true); runner.DispatchPtr(InitialRip, StackPointer); } #else -void RunAsHost(fextl::unique_ptr &SignalDelegation, uintptr_t InitialRip, uintptr_t StackPointer, - FEXCore::Core::CPUState *OutputState) { +void RunAsHost(fextl::unique_ptr& SignalDelegation, uintptr_t InitialRip, uintptr_t StackPointer, + FEXCore::Core::CPUState* OutputState) { LOGMAN_MSG_A_FMT("RunAsHost doesn't exist for this host"); } #endif diff --git a/Source/Tools/pidof/pidof.cpp b/Source/Tools/pidof/pidof.cpp index 568c84c91c..62226329c3 100644 --- a/Source/Tools/pidof/pidof.cpp +++ b/Source/Tools/pidof/pidof.cpp @@ -11,38 +11,28 @@ namespace Config { -bool SingleShot{}; -bool SkipZombie{true}; -bool DoNotDisplay{}; +bool SingleShot {}; +bool SkipZombie {true}; +bool DoNotDisplay {}; std::string Separator {" "}; std::unordered_set OmitPids; std::unordered_set Programs; -void LoadOptions(int argc, char **argv) { - optparse::OptionParser Parser{}; +void LoadOptions(int argc, char** argv) { + optparse::OptionParser Parser {}; - Parser.add_option("-s") - .help("Single shot - Only returns one pid") - .action("store_true") - .set_default(SingleShot); + Parser.add_option("-s").help("Single shot - Only returns one pid").action("store_true").set_default(SingleShot); Parser.add_option("-q") .help("Do not display matched PIDs to stdout. Simply exit with status of true or false if a PID was found") .action("store_true") .set_default(DoNotDisplay); - Parser.add_option("-z") - .help("Try to detect zombie processes - Usually zombie processes are skipped") - .action("store_false") - .set_default(SkipZombie); + Parser.add_option("-z").help("Try to detect zombie processes - Usually zombie processes are skipped").action("store_false").set_default(SkipZombie); - Parser.add_option("-d") - .help("Use a different separator if more than one pid is show - Default is space") - .set_default(Separator); + Parser.add_option("-d").help("Use a different separator if more than one pid is show - Default is space").set_default(Separator); - Parser.add_option("-o") - .help("Ignore processes with matched pids") - .action("append"); + Parser.add_option("-o").help("Ignore processes with matched pids").action("append"); optparse::Values Options = Parser.parse_args(argc, argv); @@ -51,25 +41,27 @@ void LoadOptions(int argc, char **argv) { SkipZombie = Options.get("z"); Separator = Options["d"]; - for (const auto &Omit: Options.all("o")) { - std::istringstream ss{Omit}; + for (const auto& Omit : Options.all("o")) { + std::istringstream ss {Omit}; std::string sub; while (std::getline(ss, sub, ',')) { int64_t pid; auto ConvResult = std::from_chars(sub.data(), sub.data() + sub.size(), pid, 10); // Invalid pid, skip. - if (ConvResult.ec == std::errc::invalid_argument) continue; + if (ConvResult.ec == std::errc::invalid_argument) { + continue; + } OmitPids.emplace(pid); } } - for (const auto &Program : Parser.args()) { + for (const auto& Program : Parser.args()) { Programs.emplace(Program); } } -} +} // namespace Config struct PIDInfo { int64_t pid; @@ -80,20 +72,24 @@ struct PIDInfo { std::vector PIDs; -int main(int argc, char **argv) { +int main(int argc, char** argv) { Config::LoadOptions(argc, argv); // Iterate over all pids, storing the data for investigating afterwards. - for (const auto &Entry : std::filesystem::directory_iterator("/proc/")) { + for (const auto& Entry : std::filesystem::directory_iterator("/proc/")) { // If not a directory then skip. - if (!Entry.is_directory()) continue; + if (!Entry.is_directory()) { + continue; + } auto CMDLinePath = Entry.path() / "cmdline"; auto StatusPath = Entry.path() / "status"; auto ExePath = Entry.path() / "exe"; // If cmdline doesn't exist then skip. - if (!std::filesystem::exists(CMDLinePath)) continue; + if (!std::filesystem::exists(CMDLinePath)) { + continue; + } auto Filename = Entry.path().filename().string(); int64_t pid; @@ -101,25 +97,33 @@ int main(int argc, char **argv) { // If the filename couldn't be converted to a PID then skip. // Happens with folders like `self` and a few other folders in this directory. - if (ConvResult.ec == std::errc::invalid_argument) continue; + if (ConvResult.ec == std::errc::invalid_argument) { + continue; + } std::ostringstream CMDLineData; { std::ifstream fs(CMDLinePath, std::ios_base::in | std::ios_base::binary); - if (!fs.is_open()) continue; + if (!fs.is_open()) { + continue; + } CMDLineData << fs.rdbuf(); // If cmdline was empty then skip. - if (CMDLineData.str().empty()) continue; + if (CMDLineData.str().empty()) { + continue; + } } std::error_code ec; std::string exe_link = std::filesystem::read_symlink(ExePath, ec); // Couldn't read exe path? skip. - if (ec) continue; + if (ec) { + continue; + } // Read state char State; @@ -127,12 +131,16 @@ int main(int argc, char **argv) { { std::ifstream fs(StatusPath, std::ios_base::in | std::ios_base::binary); - if (!fs.is_open()) continue; + if (!fs.is_open()) { + continue; + } std::string Line; while (std::getline(fs, Line)) { - if (fs.eof()) break; + if (fs.eof()) { + break; + } if (Line.find("State") == Line.npos) { continue; @@ -145,44 +153,52 @@ int main(int argc, char **argv) { } PIDs.emplace_back(PIDInfo { - .pid = pid, - .cmdline = CMDLineData.str(), - .exe_link = exe_link, - .State = State, + .pid = pid, + .cmdline = CMDLineData.str(), + .exe_link = exe_link, + .State = State, }); } std::unordered_set MatchedPIDs; - for (const auto &pid : PIDs) { - if (pid.State == 'Z' && Config::SkipZombie) continue; - if (Config::OmitPids.contains(pid.pid)) continue; + for (const auto& pid : PIDs) { + if (pid.State == 'Z' && Config::SkipZombie) { + continue; + } + if (Config::OmitPids.contains(pid.pid)) { + continue; + } std::vector Args; - const char *arg = pid.cmdline.data(); + const char* arg = pid.cmdline.data(); while (arg[0]) { Args.emplace_back(arg); arg += strlen(arg) + 1; } - auto IsFEX = [](auto &Path) { - if (Path.ends_with("FEXInterpreter")) return true; - if (Path.ends_with("FEXLoader")) return true; + auto IsFEX = [](auto& Path) { + if (Path.ends_with("FEXInterpreter")) { + return true; + } + if (Path.ends_with("FEXLoader")) { + return true; + } return false; }; bool IsFEXBin = IsFEX(pid.exe_link) || IsFEX(Args[0]); - if (!IsFEXBin) continue; + if (!IsFEXBin) { + continue; + } auto Arg1 = Args[1]; auto Arg1Program = std::filesystem::path(Arg1).filename(); bool Matched = false; - for (const auto &CompareProgram : Config::Programs) { + for (const auto& CompareProgram : Config::Programs) { auto CompareProgramFilename = std::filesystem::path(CompareProgram).filename(); - if (CompareProgram == Arg1Program || - CompareProgram == Arg1 || - CompareProgramFilename == Arg1Program) { + if (CompareProgram == Arg1Program || CompareProgram == Arg1 || CompareProgramFilename == Arg1Program) { MatchedPIDs.emplace(pid.pid); Matched = true; break; @@ -196,12 +212,11 @@ int main(int argc, char **argv) { if (!MatchedPIDs.empty() && !Config::DoNotDisplay) { bool first = true; - for (const auto &MatchedPID : MatchedPIDs) { + for (const auto& MatchedPID : MatchedPIDs) { if (first) { fmt::print("{}", MatchedPID); first = false; - } - else { + } else { fmt::print("{}{}", Config::Separator, MatchedPID); } } diff --git a/Source/Windows/Common/CPUFeatures.cpp b/Source/Windows/Common/CPUFeatures.cpp index a83db1961a..ec1d13b08e 100644 --- a/Source/Windows/Common/CPUFeatures.cpp +++ b/Source/Windows/Common/CPUFeatures.cpp @@ -4,16 +4,14 @@ #include "CPUFeatures.h" namespace FEX::Windows { -CPUFeatures::CPUFeatures(FEXCore::Context::Context &CTX) { +CPUFeatures::CPUFeatures(FEXCore::Context::Context& CTX) { CpuInfo.ProcessorArchitecture = PROCESSOR_ARCHITECTURE_INTEL; // Baseline FEX feature-set - CpuInfo.ProcessorFeatureBits = CPU_FEATURE_VME | CPU_FEATURE_TSC | CPU_FEATURE_CMOV | CPU_FEATURE_PGE | - CPU_FEATURE_PSE | CPU_FEATURE_MTRR | CPU_FEATURE_CX8 | CPU_FEATURE_MMX | - CPU_FEATURE_X86 | CPU_FEATURE_PAT | CPU_FEATURE_FXSR | CPU_FEATURE_SEP | - CPU_FEATURE_SSE | CPU_FEATURE_3DNOW | CPU_FEATURE_SSE2 | CPU_FEATURE_SSE3 | - CPU_FEATURE_CX128 | CPU_FEATURE_NX | CPU_FEATURE_SSSE3 | CPU_FEATURE_SSE41 | - CPU_FEATURE_PAE | CPU_FEATURE_DAZ; + CpuInfo.ProcessorFeatureBits = CPU_FEATURE_VME | CPU_FEATURE_TSC | CPU_FEATURE_CMOV | CPU_FEATURE_PGE | CPU_FEATURE_PSE | CPU_FEATURE_MTRR | + CPU_FEATURE_CX8 | CPU_FEATURE_MMX | CPU_FEATURE_X86 | CPU_FEATURE_PAT | CPU_FEATURE_FXSR | CPU_FEATURE_SEP | + CPU_FEATURE_SSE | CPU_FEATURE_3DNOW | CPU_FEATURE_SSE2 | CPU_FEATURE_SSE3 | CPU_FEATURE_CX128 | + CPU_FEATURE_NX | CPU_FEATURE_SSSE3 | CPU_FEATURE_SSE41 | CPU_FEATURE_PAE | CPU_FEATURE_DAZ; // Features that require specific host CPU support const auto CPUIDResult01 = CTX.RunCPUIDFunction(0x01, 0); @@ -34,69 +32,44 @@ CPUFeatures::CPUFeatures(FEXCore::Context::Context &CTX) { const auto FamilyIdentifier = CPUIDResult01.eax; CpuInfo.ProcessorLevel = ((FamilyIdentifier >> 8) & 0xf) + ((FamilyIdentifier >> 20) & 0xff); // Family - CpuInfo.ProcessorRevision = (FamilyIdentifier & 0xf0000) >> 4; // Extended Model - CpuInfo.ProcessorRevision |= (FamilyIdentifier & 0xf0) << 4; // Model - CpuInfo.ProcessorRevision |= FamilyIdentifier & 0xf; // Stepping + CpuInfo.ProcessorRevision = (FamilyIdentifier & 0xf0000) >> 4; // Extended Model + CpuInfo.ProcessorRevision |= (FamilyIdentifier & 0xf0) << 4; // Model + CpuInfo.ProcessorRevision |= FamilyIdentifier & 0xf; // Stepping } bool CPUFeatures::IsFeaturePresent(uint32_t Feature) { switch (Feature) { - case PF_FLOATING_POINT_PRECISION_ERRATA: - return FALSE; - case PF_FLOATING_POINT_EMULATED: - return FALSE; - case PF_COMPARE_EXCHANGE_DOUBLE: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_CX8); - case PF_MMX_INSTRUCTIONS_AVAILABLE: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_MMX); - case PF_XMMI_INSTRUCTIONS_AVAILABLE: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSE); - case PF_3DNOW_INSTRUCTIONS_AVAILABLE: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_3DNOW); - case PF_RDTSC_INSTRUCTION_AVAILABLE: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_TSC); - case PF_PAE_ENABLED: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_PAE); - case PF_XMMI64_INSTRUCTIONS_AVAILABLE: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSE2); - case PF_SSE3_INSTRUCTIONS_AVAILABLE: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSE3); - case PF_SSSE3_INSTRUCTIONS_AVAILABLE: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSSE3); - case PF_XSAVE_ENABLED: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_XSAVE); - case PF_COMPARE_EXCHANGE128: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_CX128); - case PF_SSE_DAZ_MODE_AVAILABLE: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_DAZ); - case PF_NX_ENABLED: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_NX); - case PF_SECOND_LEVEL_ADDRESS_TRANSLATION: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_2NDLEV); - case PF_VIRT_FIRMWARE_ENABLED: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_VIRT); - case PF_RDWRFSGSBASE_AVAILABLE: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_RDFS); - case PF_FASTFAIL_AVAILABLE: - return TRUE; - case PF_SSE4_1_INSTRUCTIONS_AVAILABLE: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSE41); - case PF_SSE4_2_INSTRUCTIONS_AVAILABLE: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSE42); - case PF_AVX_INSTRUCTIONS_AVAILABLE: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_AVX); - case PF_AVX2_INSTRUCTIONS_AVAILABLE: - return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_AVX2); - default: - LogMan::Msg::DFmt("Unknown CPU feature: {:X}", Feature); - return false; + case PF_FLOATING_POINT_PRECISION_ERRATA: return FALSE; + case PF_FLOATING_POINT_EMULATED: return FALSE; + case PF_COMPARE_EXCHANGE_DOUBLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_CX8); + case PF_MMX_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_MMX); + case PF_XMMI_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSE); + case PF_3DNOW_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_3DNOW); + case PF_RDTSC_INSTRUCTION_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_TSC); + case PF_PAE_ENABLED: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_PAE); + case PF_XMMI64_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSE2); + case PF_SSE3_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSE3); + case PF_SSSE3_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSSE3); + case PF_XSAVE_ENABLED: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_XSAVE); + case PF_COMPARE_EXCHANGE128: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_CX128); + case PF_SSE_DAZ_MODE_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_DAZ); + case PF_NX_ENABLED: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_NX); + case PF_SECOND_LEVEL_ADDRESS_TRANSLATION: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_2NDLEV); + case PF_VIRT_FIRMWARE_ENABLED: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_VIRT); + case PF_RDWRFSGSBASE_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_RDFS); + case PF_FASTFAIL_AVAILABLE: return TRUE; + case PF_SSE4_1_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSE41); + case PF_SSE4_2_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSE42); + case PF_AVX_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_AVX); + case PF_AVX2_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_AVX2); + default: LogMan::Msg::DFmt("Unknown CPU feature: {:X}", Feature); return false; } } -void CPUFeatures::UpdateInformation(SYSTEM_CPU_INFORMATION *Info) { +void CPUFeatures::UpdateInformation(SYSTEM_CPU_INFORMATION* Info) { Info->ProcessorArchitecture = CpuInfo.ProcessorArchitecture; Info->ProcessorLevel = CpuInfo.ProcessorLevel; Info->ProcessorRevision = CpuInfo.ProcessorRevision; Info->ProcessorFeatureBits = CpuInfo.ProcessorFeatureBits; } -} +} // namespace FEX::Windows diff --git a/Source/Windows/Common/InvalidationTracker.cpp b/Source/Windows/Common/InvalidationTracker.cpp index 429352c2d4..dfaff64c9d 100644 --- a/Source/Windows/Common/InvalidationTracker.cpp +++ b/Source/Windows/Common/InvalidationTracker.cpp @@ -8,7 +8,8 @@ #include namespace FEX::Windows { -void InvalidationTracker::HandleMemoryProtectionNotification(FEXCore::Core::InternalThreadState *Thread, uint64_t Address, uint64_t Size, ULONG Prot) { +void InvalidationTracker::HandleMemoryProtectionNotification(FEXCore::Core::InternalThreadState* Thread, uint64_t Address, uint64_t Size, + ULONG Prot) { const auto AlignedBase = Address & FHU::FEX_PAGE_MASK; const auto AlignedSize = (Address - AlignedBase + Size + FHU::FEX_PAGE_SIZE - 1) & FHU::FEX_PAGE_MASK; @@ -26,14 +27,14 @@ void InvalidationTracker::HandleMemoryProtectionNotification(FEXCore::Core::Inte } } -void InvalidationTracker::InvalidateContainingSection(FEXCore::Core::InternalThreadState *Thread, uint64_t Address, bool Free) { +void InvalidationTracker::InvalidateContainingSection(FEXCore::Core::InternalThreadState* Thread, uint64_t Address, bool Free) { MEMORY_BASIC_INFORMATION Info; - if (NtQueryVirtualMemory(NtCurrentProcess(), reinterpret_cast(Address), MemoryBasicInformation, &Info, sizeof(Info), nullptr)) + if (NtQueryVirtualMemory(NtCurrentProcess(), reinterpret_cast(Address), MemoryBasicInformation, &Info, sizeof(Info), nullptr)) { return; + } const auto SectionBase = reinterpret_cast(Info.AllocationBase); - const auto SectionSize = reinterpret_cast(Info.BaseAddress) + Info.RegionSize - - reinterpret_cast(Info.AllocationBase); + const auto SectionSize = reinterpret_cast(Info.BaseAddress) + Info.RegionSize - reinterpret_cast(Info.AllocationBase); Thread->CTX->InvalidateGuestCodeRange(Thread, SectionBase, SectionSize); if (Free) { @@ -42,7 +43,7 @@ void InvalidationTracker::InvalidateContainingSection(FEXCore::Core::InternalThr } } -void InvalidationTracker::InvalidateAlignedInterval(FEXCore::Core::InternalThreadState *Thread, uint64_t Address, uint64_t Size, bool Free) { +void InvalidationTracker::InvalidateAlignedInterval(FEXCore::Core::InternalThreadState* Thread, uint64_t Address, uint64_t Size, bool Free) { const auto AlignedBase = Address & FHU::FEX_PAGE_MASK; const auto AlignedSize = (Address - AlignedBase + Size + FHU::FEX_PAGE_SIZE - 1) & FHU::FEX_PAGE_MASK; Thread->CTX->InvalidateGuestCodeRange(Thread, AlignedBase, AlignedSize); @@ -60,7 +61,7 @@ void InvalidationTracker::ReprotectRWXIntervals(uint64_t Address, uint64_t Size) do { const auto Query = RWXIntervals.Query(Address); if (Query.Enclosed) { - void *TmpAddress = reinterpret_cast(Address); + void* TmpAddress = reinterpret_cast(Address); SIZE_T TmpSize = static_cast(std::min(End, Address + Query.Size) - Address); ULONG TmpProt; NtProtectVirtualMemory(NtCurrentProcess(), &TmpAddress, &TmpSize, PAGE_EXECUTE_READ, &TmpProt); @@ -73,16 +74,17 @@ void InvalidationTracker::ReprotectRWXIntervals(uint64_t Address, uint64_t Size) } while (Address < End); } -bool InvalidationTracker::HandleRWXAccessViolation(FEXCore::Core::InternalThreadState *Thread, uint64_t FaultAddress) { +bool InvalidationTracker::HandleRWXAccessViolation(FEXCore::Core::InternalThreadState* Thread, uint64_t FaultAddress) { const bool NeedsInvalidate = [&](uint64_t Address) { std::unique_lock Lock(RWXIntervalsLock); const bool Enclosed = RWXIntervals.Query(Address).Enclosed; // Invalidate just the single faulting page - if (!Enclosed) + if (!Enclosed) { return false; + } ULONG TmpProt; - void *TmpAddress = reinterpret_cast(Address); + void* TmpAddress = reinterpret_cast(Address); SIZE_T TmpSize = 1; NtProtectVirtualMemory(NtCurrentProcess(), &TmpAddress, &TmpSize, PAGE_EXECUTE_READWRITE, &TmpProt); return true; @@ -95,4 +97,4 @@ bool InvalidationTracker::HandleRWXAccessViolation(FEXCore::Core::InternalThread } return false; } -} +} // namespace FEX::Windows diff --git a/Source/Windows/WOW64/Module.cpp b/Source/Windows/WOW64/Module.cpp index f0a7a02470..70e84945e8 100644 --- a/Source/Windows/WOW64/Module.cpp +++ b/Source/Windows/WOW64/Module.cpp @@ -44,16 +44,16 @@ desc: Implements the WOW64 BT module API using FEXCore #include namespace ControlBits { - // When this is unset, a thread can be safely interrupted and have its context recovered - // IMPORTANT: This can only safely be written by the owning thread - static constexpr uint32_t IN_JIT{1U << 0}; +// When this is unset, a thread can be safely interrupted and have its context recovered +// IMPORTANT: This can only safely be written by the owning thread +static constexpr uint32_t IN_JIT {1U << 0}; - // JIT entry polls this bit until it is unset, at which point CONTROL_IN_JIT will be set - static constexpr uint32_t PAUSED{1U << 1}; +// JIT entry polls this bit until it is unset, at which point CONTROL_IN_JIT will be set +static constexpr uint32_t PAUSED {1U << 1}; - // When this is set, the CPU context stored in the CPU area has not yet been flushed to the FEX TLS - static constexpr uint32_t WOW_CPU_AREA_DIRTY{1U << 2}; -}; +// When this is set, the CPU context stored in the CPU area has not yet been flushed to the FEX TLS +static constexpr uint32_t WOW_CPU_AREA_DIRTY {1U << 2}; +}; // namespace ControlBits struct TLS { enum class Slot : size_t { @@ -62,287 +62,283 @@ struct TLS { THREAD_STATE = WOW64_TLS_MAX_NUMBER - 2, }; - _TEB *TEB; + _TEB* TEB; - explicit TLS(_TEB *TEB) : TEB(TEB) {} + explicit TLS(_TEB* TEB) + : TEB(TEB) {} - std::atomic &ControlWord() const { + std::atomic& ControlWord() const { // TODO: Change this when libc++ gains std::atomic_ref support - return reinterpret_cast &>(TEB->TlsSlots[FEXCore::ToUnderlying(Slot::CONTROL_WORD)]); + return reinterpret_cast&>(TEB->TlsSlots[FEXCore::ToUnderlying(Slot::CONTROL_WORD)]); } - CONTEXT *&EntryContext() const { - return reinterpret_cast(TEB->TlsSlots[FEXCore::ToUnderlying(Slot::ENTRY_CONTEXT)]); + CONTEXT*& EntryContext() const { + return reinterpret_cast(TEB->TlsSlots[FEXCore::ToUnderlying(Slot::ENTRY_CONTEXT)]); } - FEXCore::Core::InternalThreadState *&ThreadState() const { - return reinterpret_cast(TEB->TlsSlots[FEXCore::ToUnderlying(Slot::THREAD_STATE)]); + FEXCore::Core::InternalThreadState*& ThreadState() const { + return reinterpret_cast(TEB->TlsSlots[FEXCore::ToUnderlying(Slot::THREAD_STATE)]); } }; class WowSyscallHandler; namespace { - namespace BridgeInstrs { - // These directly jumped to by the guest to make system calls - uint16_t Syscall{0x2ecd}; - uint16_t UnixCall{0x2ecd}; - } +namespace BridgeInstrs { + // These directly jumped to by the guest to make system calls + uint16_t Syscall {0x2ecd}; + uint16_t UnixCall {0x2ecd}; +} // namespace BridgeInstrs - fextl::unique_ptr CTX; - fextl::unique_ptr SignalDelegator; - fextl::unique_ptr SyscallHandler; +fextl::unique_ptr CTX; +fextl::unique_ptr SignalDelegator; +fextl::unique_ptr SyscallHandler; - FEX::Windows::InvalidationTracker InvalidationTracker; - std::optional CPUFeatures; +FEX::Windows::InvalidationTracker InvalidationTracker; +std::optional CPUFeatures; - std::mutex ThreadSuspendLock; - std::unordered_set InitializedWOWThreads; // Set of TIDs, `ThreadSuspendLock` must be locked when accessing +std::mutex ThreadSuspendLock; +std::unordered_set InitializedWOWThreads; // Set of TIDs, `ThreadSuspendLock` must be locked when accessing - std::pair GetThreadTLS(HANDLE Thread) { - THREAD_BASIC_INFORMATION Info; - const NTSTATUS Err = NtQueryInformationThread(Thread, ThreadBasicInformation, &Info, sizeof(Info), nullptr); - return {Err, TLS{reinterpret_cast<_TEB *>(Info.TebBaseAddress)}}; - } +std::pair GetThreadTLS(HANDLE Thread) { + THREAD_BASIC_INFORMATION Info; + const NTSTATUS Err = NtQueryInformationThread(Thread, ThreadBasicInformation, &Info, sizeof(Info), nullptr); + return {Err, TLS {reinterpret_cast<_TEB*>(Info.TebBaseAddress)}}; +} - TLS GetTLS() { - return TLS{NtCurrentTeb()}; - } +TLS GetTLS() { + return TLS {NtCurrentTeb()}; +} - uint64_t GetWowTEB(void *TEB) { - static constexpr size_t WowTEBOffsetMemberOffset{0x180c}; - return static_cast(*reinterpret_cast(reinterpret_cast(TEB) + WowTEBOffsetMemberOffset) - + reinterpret_cast(TEB)); - } +uint64_t GetWowTEB(void* TEB) { + static constexpr size_t WowTEBOffsetMemberOffset {0x180c}; + return static_cast( + *reinterpret_cast(reinterpret_cast(TEB) + WowTEBOffsetMemberOffset) + reinterpret_cast(TEB)); +} - bool IsAddressInJit(uint64_t Address) { - return GetTLS().ThreadState()->CPUBackend->IsAddressInCodeBuffer(Address); - } +bool IsAddressInJit(uint64_t Address) { + return GetTLS().ThreadState()->CPUBackend->IsAddressInCodeBuffer(Address); } +} // namespace namespace Context { - void LoadStateFromWowContext(FEXCore::Core::InternalThreadState *Thread, uint64_t WowTEB, WOW64_CONTEXT *Context) { - auto &State = Thread->CurrentFrame->State; - - // General register state - - State.gregs[FEXCore::X86State::REG_RAX] = Context->Eax; - State.gregs[FEXCore::X86State::REG_RBX] = Context->Ebx; - State.gregs[FEXCore::X86State::REG_RCX] = Context->Ecx; - State.gregs[FEXCore::X86State::REG_RDX] = Context->Edx; - State.gregs[FEXCore::X86State::REG_RSI] = Context->Esi; - State.gregs[FEXCore::X86State::REG_RDI] = Context->Edi; - State.gregs[FEXCore::X86State::REG_RBP] = Context->Ebp; - State.gregs[FEXCore::X86State::REG_RSP] = Context->Esp; - - State.rip = Context->Eip; - CTX->SetFlagsFromCompactedEFLAGS(Thread, Context->EFlags); - - State.es_idx = Context->SegEs & 0xffff; - State.cs_idx = Context->SegCs & 0xffff; - State.ss_idx = Context->SegSs & 0xffff; - State.ds_idx = Context->SegDs & 0xffff; - State.fs_idx = Context->SegFs & 0xffff; - State.gs_idx = Context->SegGs & 0xffff; - - // The TEB is the only populated GDT entry by default - State.gdt[(Context->SegFs & 0xffff) >> 3].base = WowTEB; - State.fs_cached = WowTEB; - State.es_cached = 0; - State.cs_cached = 0; - State.ss_cached = 0; - State.ds_cached = 0; - - // Floating-point register state - const auto *XSave = reinterpret_cast(Context->ExtendedRegisters); - - memcpy(State.xmm.sse.data, XSave->XmmRegisters, sizeof(State.xmm.sse.data)); - memcpy(State.mm, XSave->FloatRegisters, sizeof(State.mm)); - - State.FCW = XSave->ControlWord; - State.flags[FEXCore::X86State::X87FLAG_C0_LOC] = (XSave->StatusWord >> 8) & 1; - State.flags[FEXCore::X86State::X87FLAG_C1_LOC] = (XSave->StatusWord >> 9) & 1; - State.flags[FEXCore::X86State::X87FLAG_C2_LOC] = (XSave->StatusWord >> 10) & 1; - State.flags[FEXCore::X86State::X87FLAG_C3_LOC] = (XSave->StatusWord >> 14) & 1; - State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] = (XSave->StatusWord >> 11) & 0b111; - State.AbridgedFTW = XSave->TagWord; - } - - void StoreWowContextFromState(FEXCore::Core::InternalThreadState *Thread, WOW64_CONTEXT *Context) { - auto &State = Thread->CurrentFrame->State; - - // General register state - - Context->Eax = State.gregs[FEXCore::X86State::REG_RAX]; - Context->Ebx = State.gregs[FEXCore::X86State::REG_RBX]; - Context->Ecx = State.gregs[FEXCore::X86State::REG_RCX]; - Context->Edx = State.gregs[FEXCore::X86State::REG_RDX]; - Context->Esi = State.gregs[FEXCore::X86State::REG_RSI]; - Context->Edi = State.gregs[FEXCore::X86State::REG_RDI]; - Context->Ebp = State.gregs[FEXCore::X86State::REG_RBP]; - Context->Esp = State.gregs[FEXCore::X86State::REG_RSP]; - - Context->Eip = State.rip; - Context->EFlags = CTX->ReconstructCompactedEFLAGS(Thread, false, nullptr, 0); - - Context->SegEs = State.es_idx; - Context->SegCs = State.cs_idx; - Context->SegSs = State.ss_idx; - Context->SegDs = State.ds_idx; - Context->SegFs = State.fs_idx; - Context->SegGs = State.gs_idx; - - // Floating-point register state - - auto *XSave = reinterpret_cast(Context->ExtendedRegisters); - - memcpy(XSave->XmmRegisters, State.xmm.sse.data, sizeof(State.xmm.sse.data)); - memcpy(XSave->FloatRegisters, State.mm, sizeof(State.mm)); - - XSave->ControlWord = State.FCW; - XSave->StatusWord = - (State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] << 11) | - (State.flags[FEXCore::X86State::X87FLAG_C0_LOC] << 8) | - (State.flags[FEXCore::X86State::X87FLAG_C1_LOC] << 9) | - (State.flags[FEXCore::X86State::X87FLAG_C2_LOC] << 10) | - (State.flags[FEXCore::X86State::X87FLAG_C3_LOC] << 14); - XSave->TagWord = State.AbridgedFTW; - - Context->FloatSave.ControlWord = XSave->ControlWord; - Context->FloatSave.StatusWord = XSave->StatusWord; - Context->FloatSave.TagWord = FEXCore::FPState::ConvertFromAbridgedFTW(XSave->StatusWord, State.mm, XSave->TagWord); - Context->FloatSave.ErrorOffset = XSave->ErrorOffset; - Context->FloatSave.ErrorSelector = XSave->ErrorSelector | (XSave->ErrorOpcode << 16); - Context->FloatSave.DataOffset = XSave->DataOffset; - Context->FloatSave.DataSelector = XSave->DataSelector; - Context->FloatSave.Cr0NpxState = XSave->StatusWord | 0xffff0000; - } - - NTSTATUS FlushThreadStateContext(HANDLE Thread) { - const auto [Err, TLS] = GetThreadTLS(Thread); - if (Err) { - return Err; - } +void LoadStateFromWowContext(FEXCore::Core::InternalThreadState* Thread, uint64_t WowTEB, WOW64_CONTEXT* Context) { + auto& State = Thread->CurrentFrame->State; + + // General register state + + State.gregs[FEXCore::X86State::REG_RAX] = Context->Eax; + State.gregs[FEXCore::X86State::REG_RBX] = Context->Ebx; + State.gregs[FEXCore::X86State::REG_RCX] = Context->Ecx; + State.gregs[FEXCore::X86State::REG_RDX] = Context->Edx; + State.gregs[FEXCore::X86State::REG_RSI] = Context->Esi; + State.gregs[FEXCore::X86State::REG_RDI] = Context->Edi; + State.gregs[FEXCore::X86State::REG_RBP] = Context->Ebp; + State.gregs[FEXCore::X86State::REG_RSP] = Context->Esp; + + State.rip = Context->Eip; + CTX->SetFlagsFromCompactedEFLAGS(Thread, Context->EFlags); + + State.es_idx = Context->SegEs & 0xffff; + State.cs_idx = Context->SegCs & 0xffff; + State.ss_idx = Context->SegSs & 0xffff; + State.ds_idx = Context->SegDs & 0xffff; + State.fs_idx = Context->SegFs & 0xffff; + State.gs_idx = Context->SegGs & 0xffff; + + // The TEB is the only populated GDT entry by default + State.gdt[(Context->SegFs & 0xffff) >> 3].base = WowTEB; + State.fs_cached = WowTEB; + State.es_cached = 0; + State.cs_cached = 0; + State.ss_cached = 0; + State.ds_cached = 0; + + // Floating-point register state + const auto* XSave = reinterpret_cast(Context->ExtendedRegisters); + + memcpy(State.xmm.sse.data, XSave->XmmRegisters, sizeof(State.xmm.sse.data)); + memcpy(State.mm, XSave->FloatRegisters, sizeof(State.mm)); + + State.FCW = XSave->ControlWord; + State.flags[FEXCore::X86State::X87FLAG_C0_LOC] = (XSave->StatusWord >> 8) & 1; + State.flags[FEXCore::X86State::X87FLAG_C1_LOC] = (XSave->StatusWord >> 9) & 1; + State.flags[FEXCore::X86State::X87FLAG_C2_LOC] = (XSave->StatusWord >> 10) & 1; + State.flags[FEXCore::X86State::X87FLAG_C3_LOC] = (XSave->StatusWord >> 14) & 1; + State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] = (XSave->StatusWord >> 11) & 0b111; + State.AbridgedFTW = XSave->TagWord; +} - WOW64_CONTEXT TmpWowContext{ - .ContextFlags = WOW64_CONTEXT_FULL | WOW64_CONTEXT_EXTENDED_REGISTERS - }; +void StoreWowContextFromState(FEXCore::Core::InternalThreadState* Thread, WOW64_CONTEXT* Context) { + auto& State = Thread->CurrentFrame->State; + + // General register state + + Context->Eax = State.gregs[FEXCore::X86State::REG_RAX]; + Context->Ebx = State.gregs[FEXCore::X86State::REG_RBX]; + Context->Ecx = State.gregs[FEXCore::X86State::REG_RCX]; + Context->Edx = State.gregs[FEXCore::X86State::REG_RDX]; + Context->Esi = State.gregs[FEXCore::X86State::REG_RSI]; + Context->Edi = State.gregs[FEXCore::X86State::REG_RDI]; + Context->Ebp = State.gregs[FEXCore::X86State::REG_RBP]; + Context->Esp = State.gregs[FEXCore::X86State::REG_RSP]; + + Context->Eip = State.rip; + Context->EFlags = CTX->ReconstructCompactedEFLAGS(Thread, false, nullptr, 0); + + Context->SegEs = State.es_idx; + Context->SegCs = State.cs_idx; + Context->SegSs = State.ss_idx; + Context->SegDs = State.ds_idx; + Context->SegFs = State.fs_idx; + Context->SegGs = State.gs_idx; + + // Floating-point register state + + auto* XSave = reinterpret_cast(Context->ExtendedRegisters); + + memcpy(XSave->XmmRegisters, State.xmm.sse.data, sizeof(State.xmm.sse.data)); + memcpy(XSave->FloatRegisters, State.mm, sizeof(State.mm)); + + XSave->ControlWord = State.FCW; + XSave->StatusWord = (State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] << 11) | (State.flags[FEXCore::X86State::X87FLAG_C0_LOC] << 8) | + (State.flags[FEXCore::X86State::X87FLAG_C1_LOC] << 9) | (State.flags[FEXCore::X86State::X87FLAG_C2_LOC] << 10) | + (State.flags[FEXCore::X86State::X87FLAG_C3_LOC] << 14); + XSave->TagWord = State.AbridgedFTW; + + Context->FloatSave.ControlWord = XSave->ControlWord; + Context->FloatSave.StatusWord = XSave->StatusWord; + Context->FloatSave.TagWord = FEXCore::FPState::ConvertFromAbridgedFTW(XSave->StatusWord, State.mm, XSave->TagWord); + Context->FloatSave.ErrorOffset = XSave->ErrorOffset; + Context->FloatSave.ErrorSelector = XSave->ErrorSelector | (XSave->ErrorOpcode << 16); + Context->FloatSave.DataOffset = XSave->DataOffset; + Context->FloatSave.DataSelector = XSave->DataSelector; + Context->FloatSave.Cr0NpxState = XSave->StatusWord | 0xffff0000; +} - Context::StoreWowContextFromState(TLS.ThreadState(), &TmpWowContext); - return RtlWow64SetThreadContext(Thread, &TmpWowContext); +NTSTATUS FlushThreadStateContext(HANDLE Thread) { + const auto [Err, TLS] = GetThreadTLS(Thread); + if (Err) { + return Err; } - void ReconstructThreadState(CONTEXT *Context) { - const auto &Config = SignalDelegator->GetConfig(); - auto *Thread = GetTLS().ThreadState(); - auto &State = Thread->CurrentFrame->State; + WOW64_CONTEXT TmpWowContext {.ContextFlags = WOW64_CONTEXT_FULL | WOW64_CONTEXT_EXTENDED_REGISTERS}; - State.rip = CTX->RestoreRIPFromHostPC(Thread, Context->Pc); + Context::StoreWowContextFromState(TLS.ThreadState(), &TmpWowContext); + return RtlWow64SetThreadContext(Thread, &TmpWowContext); +} - // Spill all SRA GPRs - for (size_t i = 0; i < Config.SRAGPRCount; i++) { - State.gregs[i] = Context->X[Config.SRAGPRMapping[i]]; - } +void ReconstructThreadState(CONTEXT* Context) { + const auto& Config = SignalDelegator->GetConfig(); + auto* Thread = GetTLS().ThreadState(); + auto& State = Thread->CurrentFrame->State; - // Spill all SRA FPRs - for (size_t i = 0; i < Config.SRAFPRCount; i++) { - memcpy(State.xmm.sse.data[i], &Context->V[Config.SRAFPRMapping[i]], sizeof(__uint128_t)); - } + State.rip = CTX->RestoreRIPFromHostPC(Thread, Context->Pc); + + // Spill all SRA GPRs + for (size_t i = 0; i < Config.SRAGPRCount; i++) { + State.gregs[i] = Context->X[Config.SRAGPRMapping[i]]; } - WOW64_CONTEXT ReconstructWowContext(CONTEXT *Context) { - ReconstructThreadState(Context); + // Spill all SRA FPRs + for (size_t i = 0; i < Config.SRAFPRCount; i++) { + memcpy(State.xmm.sse.data[i], &Context->V[Config.SRAFPRMapping[i]], sizeof(__uint128_t)); + } +} - WOW64_CONTEXT WowContext{ - .ContextFlags = WOW64_CONTEXT_ALL, - }; +WOW64_CONTEXT ReconstructWowContext(CONTEXT* Context) { + ReconstructThreadState(Context); - auto *XSave = reinterpret_cast(WowContext.ExtendedRegisters); - XSave->ControlWord = 0x27f; - XSave->MxCsr = 0x1f80; + WOW64_CONTEXT WowContext { + .ContextFlags = WOW64_CONTEXT_ALL, + }; - Context::StoreWowContextFromState(GetTLS().ThreadState(), &WowContext); - return WowContext; - } + auto* XSave = reinterpret_cast(WowContext.ExtendedRegisters); + XSave->ControlWord = 0x27f; + XSave->MxCsr = 0x1f80; - bool HandleUnalignedAccess(CONTEXT *Context) { - if (!GetTLS().ThreadState()->CPUBackend->IsAddressInCodeBuffer(Context->Pc)) { - return false; - } + Context::StoreWowContextFromState(GetTLS().ThreadState(), &WowContext); + return WowContext; +} - FEX_CONFIG_OPT(ParanoidTSO, PARANOIDTSO); - const auto Result = FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(GetTLS().ThreadState(), ParanoidTSO(), Context->Pc, &Context->X0); - if (!Result.first) { - return false; - } +bool HandleUnalignedAccess(CONTEXT* Context) { + if (!GetTLS().ThreadState()->CPUBackend->IsAddressInCodeBuffer(Context->Pc)) { + return false; + } - Context->Pc += Result.second; - return true; + FEX_CONFIG_OPT(ParanoidTSO, PARANOIDTSO); + const auto Result = FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(GetTLS().ThreadState(), ParanoidTSO(), Context->Pc, &Context->X0); + if (!Result.first) { + return false; } - void LockJITContext() { - uint32_t Expected = GetTLS().ControlWord().load(), New; + Context->Pc += Result.second; + return true; +} - // Spin until PAUSED is unset, setting IN_JIT when that occurs - do { - Expected = Expected & ~ControlBits::PAUSED; - New = (Expected | ControlBits::IN_JIT) & ~ControlBits::WOW_CPU_AREA_DIRTY; - } while (!GetTLS().ControlWord().compare_exchange_weak(Expected, New, std::memory_order::relaxed)); - std::atomic_signal_fence(std::memory_order::seq_cst); +void LockJITContext() { + uint32_t Expected = GetTLS().ControlWord().load(), New; - // If the CPU area is dirty, flush it to the JIT context before reentry - if (Expected & ControlBits::WOW_CPU_AREA_DIRTY) { - WOW64_CONTEXT *WowContext; - RtlWow64GetCurrentCpuArea(nullptr, reinterpret_cast(&WowContext), nullptr); - Context::LoadStateFromWowContext(GetTLS().ThreadState(), GetWowTEB(NtCurrentTeb()), WowContext); - } - } + // Spin until PAUSED is unset, setting IN_JIT when that occurs + do { + Expected = Expected & ~ControlBits::PAUSED; + New = (Expected | ControlBits::IN_JIT) & ~ControlBits::WOW_CPU_AREA_DIRTY; + } while (!GetTLS().ControlWord().compare_exchange_weak(Expected, New, std::memory_order::relaxed)); + std::atomic_signal_fence(std::memory_order::seq_cst); - void UnlockJITContext() { - std::atomic_signal_fence(std::memory_order::seq_cst); - GetTLS().ControlWord().fetch_and(~ControlBits::IN_JIT, std::memory_order::relaxed); + // If the CPU area is dirty, flush it to the JIT context before reentry + if (Expected & ControlBits::WOW_CPU_AREA_DIRTY) { + WOW64_CONTEXT* WowContext; + RtlWow64GetCurrentCpuArea(nullptr, reinterpret_cast(&WowContext), nullptr); + Context::LoadStateFromWowContext(GetTLS().ThreadState(), GetWowTEB(NtCurrentTeb()), WowContext); } +} - bool HandleSuspendInterrupt(CONTEXT *Context, uint64_t FaultAddress) { - if (FaultAddress != reinterpret_cast(&GetTLS().ThreadState()->InterruptFaultPage)) { - return false; - } +void UnlockJITContext() { + std::atomic_signal_fence(std::memory_order::seq_cst); + GetTLS().ControlWord().fetch_and(~ControlBits::IN_JIT, std::memory_order::relaxed); +} - void *TmpAddress = reinterpret_cast(FaultAddress); - SIZE_T TmpSize = FHU::FEX_PAGE_SIZE; - ULONG TmpProt; - NtProtectVirtualMemory(NtCurrentProcess(), &TmpAddress, &TmpSize, PAGE_READWRITE, &TmpProt); +bool HandleSuspendInterrupt(CONTEXT* Context, uint64_t FaultAddress) { + if (FaultAddress != reinterpret_cast(&GetTLS().ThreadState()->InterruptFaultPage)) { + return false; + } - // Since interrupts only happen at the start of blocks, the reconstructed state should be entirely accurate - ReconstructThreadState(Context); + void* TmpAddress = reinterpret_cast(FaultAddress); + SIZE_T TmpSize = FHU::FEX_PAGE_SIZE; + ULONG TmpProt; + NtProtectVirtualMemory(NtCurrentProcess(), &TmpAddress, &TmpSize, PAGE_READWRITE, &TmpProt); - // Yield to the suspender - UnlockJITContext(); - LockJITContext(); + // Since interrupts only happen at the start of blocks, the reconstructed state should be entirely accurate + ReconstructThreadState(Context); - // Adjust context to return to the dispatcher, reloading SRA from thread state - const auto &Config = SignalDelegator->GetConfig(); - Context->Pc = Config.AbsoluteLoopTopAddressFillSRA; - return true; - } + // Yield to the suspender + UnlockJITContext(); + LockJITContext(); + + // Adjust context to return to the dispatcher, reloading SRA from thread state + const auto& Config = SignalDelegator->GetConfig(); + Context->Pc = Config.AbsoluteLoopTopAddressFillSRA; + return true; } +} // namespace Context namespace Logging { - void MsgHandler(LogMan::DebugLevels Level, char const *Message) { - const auto Output = fextl::fmt::format("[{}][{:X}] {}\n", LogMan::DebugLevelStr(Level), GetCurrentThreadId(), Message); - __wine_dbg_output(Output.c_str()); - } +void MsgHandler(LogMan::DebugLevels Level, const char* Message) { + const auto Output = fextl::fmt::format("[{}][{:X}] {}\n", LogMan::DebugLevelStr(Level), GetCurrentThreadId(), Message); + __wine_dbg_output(Output.c_str()); +} - void AssertHandler(char const *Message) { - const auto Output = fextl::fmt::format("[ASSERT] {}\n", Message); - __wine_dbg_output(Output.c_str()); - } +void AssertHandler(const char* Message) { + const auto Output = fextl::fmt::format("[ASSERT] {}\n", Message); + __wine_dbg_output(Output.c_str()); +} - void Init() { - LogMan::Throw::InstallHandler(AssertHandler); - LogMan::Msg::InstallHandler(MsgHandler); - } +void Init() { + LogMan::Throw::InstallHandler(AssertHandler); + LogMan::Msg::InstallHandler(MsgHandler); } +} // namespace Logging class WowSyscallHandler : public FEXCore::HLE::SyscallHandler, public FEXCore::Allocator::FEXAllocOperators { public: @@ -350,9 +346,9 @@ class WowSyscallHandler : public FEXCore::HLE::SyscallHandler, public FEXCore::A OSABI = FEXCore::HLE::SyscallOSABI::OS_WIN32; } - uint64_t HandleSyscall(FEXCore::Core::CpuStateFrame *Frame, FEXCore::HLE::SyscallArguments *Args) override { - const uint64_t ReturnRIP = *(uint32_t *)(Frame->State.gregs[FEXCore::X86State::REG_RSP]); // Return address from the stack - uint64_t ReturnRSP = Frame->State.gregs[FEXCore::X86State::REG_RSP] + 4; // Stack pointer after popping return address + uint64_t HandleSyscall(FEXCore::Core::CpuStateFrame* Frame, FEXCore::HLE::SyscallArguments* Args) override { + const uint64_t ReturnRIP = *(uint32_t*)(Frame->State.gregs[FEXCore::X86State::REG_RSP]); // Return address from the stack + uint64_t ReturnRSP = Frame->State.gregs[FEXCore::X86State::REG_RSP] + 4; // Stack pointer after popping return address uint64_t ReturnRAX = 0; if (Frame->State.rip == (uint64_t)&BridgeInstrs::UnixCall) { @@ -360,7 +356,7 @@ class WowSyscallHandler : public FEXCore::HLE::SyscallHandler, public FEXCore::A unixlib_handle_t Handle; UINT32 ID; ULONG32 Args; - } *StackArgs = reinterpret_cast(ReturnRSP); + }* StackArgs = reinterpret_cast(ReturnRSP); ReturnRSP += sizeof(StackLayout); @@ -372,14 +368,11 @@ class WowSyscallHandler : public FEXCore::HLE::SyscallHandler, public FEXCore::A Context::UnlockJITContext(); Wow64ProcessPendingCrossProcessItems(); - ReturnRAX = static_cast(Wow64SystemServiceEx(static_cast(EntryRAX), - reinterpret_cast(ReturnRSP + 4))); + ReturnRAX = static_cast(Wow64SystemServiceEx(static_cast(EntryRAX), reinterpret_cast(ReturnRSP + 4))); Context::LockJITContext(); - } // If a new context has been set, use it directly and don't return to the syscall caller - if (Frame->State.rip == (uint64_t)&BridgeInstrs::Syscall || - Frame->State.rip == (uint64_t)&BridgeInstrs::UnixCall) { + if (Frame->State.rip == (uint64_t)&BridgeInstrs::Syscall || Frame->State.rip == (uint64_t)&BridgeInstrs::UnixCall) { Frame->State.gregs[FEXCore::X86State::REG_RAX] = ReturnRAX; Frame->State.gregs[FEXCore::X86State::REG_RSP] = ReturnRSP; Frame->State.rip = ReturnRIP; @@ -390,14 +383,14 @@ class WowSyscallHandler : public FEXCore::HLE::SyscallHandler, public FEXCore::A } FEXCore::HLE::SyscallABI GetSyscallABI(uint64_t Syscall) override { - return { .NumArgs = 0, .HasReturn = false, .HostSyscallNumber = -1 }; + return {.NumArgs = 0, .HasReturn = false, .HostSyscallNumber = -1}; } - FEXCore::HLE::AOTIRCacheEntryLookupResult LookupAOTIRCacheEntry(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestAddr) override { + FEXCore::HLE::AOTIRCacheEntryLookupResult LookupAOTIRCacheEntry(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestAddr) override { return {0, 0}; } - void MarkGuestExecutableRange(FEXCore::Core::InternalThreadState *Thread, uint64_t Start, uint64_t Length) override { + void MarkGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) override { InvalidationTracker.ReprotectRWXIntervals(Start, Length); } }; @@ -459,15 +452,15 @@ NTSTATUS BTCpuThreadTerm(HANDLE Thread) { return STATUS_SUCCESS; } -void *BTCpuGetBopCode() { +void* BTCpuGetBopCode() { return &BridgeInstrs::Syscall; } -void *__wine_get_unix_opcode() { +void* __wine_get_unix_opcode() { return &BridgeInstrs::UnixCall; } -NTSTATUS BTCpuGetContext(HANDLE Thread, HANDLE Process, void *Unknown, WOW64_CONTEXT *Context) { +NTSTATUS BTCpuGetContext(HANDLE Thread, HANDLE Process, void* Unknown, WOW64_CONTEXT* Context) { auto [Err, TLS] = GetThreadTLS(Thread); if (Err) { return Err; @@ -482,7 +475,7 @@ NTSTATUS BTCpuGetContext(HANDLE Thread, HANDLE Process, void *Unknown, WOW64_CON return RtlWow64GetThreadContext(Thread, Context); } -NTSTATUS BTCpuSetContext(HANDLE Thread, HANDLE Process, void *Unknown, WOW64_CONTEXT *Context) { +NTSTATUS BTCpuSetContext(HANDLE Thread, HANDLE Process, void* Unknown, WOW64_CONTEXT* Context) { auto [Err, TLS] = GetThreadTLS(Thread); if (Err) { return Err; @@ -519,7 +512,7 @@ void BTCpuSimulate() { // APC handling calls BTCpuSimulate from syscalls and then use NtContinue to return to the previous context, // to avoid the saved context being clobbered in this case only save the entry context highest in the stack - if (!GetTLS().EntryContext() || GetTLS().EntryContext()->Sp <= entry_context.Sp) { + if (!GetTLS().EntryContext() || GetTLS().EntryContext()->Sp <= entry_context.Sp) { GetTLS().EntryContext() = &entry_context; } @@ -528,7 +521,7 @@ void BTCpuSimulate() { Context::UnlockJITContext(); } -NTSTATUS BTCpuSuspendLocalThread(HANDLE Thread, ULONG *Count) { +NTSTATUS BTCpuSuspendLocalThread(HANDLE Thread, ULONG* Count) { THREAD_BASIC_INFORMATION Info; if (NTSTATUS Err = NtQueryInformationThread(Thread, ThreadBasicInformation, &Info, sizeof(Info), nullptr); Err) { return Err; @@ -539,8 +532,7 @@ NTSTATUS BTCpuSuspendLocalThread(HANDLE Thread, ULONG *Count) { LogMan::Msg::DFmt("Suspending self"); // Mark the CPU area as dirty, to force the JIT context to be restored from it on entry as it may be changed using // SetThreadContext (which doesn't use the BTCpu API) - if (!(GetTLS().ControlWord().fetch_or(ControlBits::WOW_CPU_AREA_DIRTY, std::memory_order::relaxed) & - ControlBits::WOW_CPU_AREA_DIRTY)) { + if (!(GetTLS().ControlWord().fetch_or(ControlBits::WOW_CPU_AREA_DIRTY, std::memory_order::relaxed) & ControlBits::WOW_CPU_AREA_DIRTY)) { if (NTSTATUS Err = Context::FlushThreadStateContext(Thread); Err) { return Err; } @@ -559,8 +551,9 @@ NTSTATUS BTCpuSuspendLocalThread(HANDLE Thread, ULONG *Count) { std::scoped_lock Lock(ThreadSuspendLock); // If the thread hasn't yet been initialized, suspend it without special handling as it wont yet have entered the JIT - if (!InitializedWOWThreads.contains(ThreadTID)) + if (!InitializedWOWThreads.contains(ThreadTID)) { return NtSuspendThread(Thread, Count); + } // If CONTROL_IN_JIT is unset at this point, then it can never be set (and thus the JIT cannot be reentered) as // CONTROL_PAUSED has been set, as such, while this may redundantly request interrupts in rare cases it will never @@ -569,13 +562,14 @@ NTSTATUS BTCpuSuspendLocalThread(HANDLE Thread, ULONG *Count) { LogMan::Msg::DFmt("Thread {:X} is in JIT, polling for interrupt", ThreadTID); ULONG TmpProt; - void *TmpAddress = &TLS.ThreadState()->InterruptFaultPage; + void* TmpAddress = &TLS.ThreadState()->InterruptFaultPage; SIZE_T TmpSize = FHU::FEX_PAGE_SIZE; NtProtectVirtualMemory(NtCurrentProcess(), &TmpAddress, &TmpSize, PAGE_READONLY, &TmpProt); } // Spin until the JIT is interrupted - while (TLS.ControlWord().load() & ControlBits::IN_JIT); + while (TLS.ControlWord().load() & ControlBits::IN_JIT) + ; // The JIT has now been interrupted and the context stored in the thread's CPU area is up-to-date if (Err = NtSuspendThread(Thread, Count); Err) { @@ -583,7 +577,7 @@ NTSTATUS BTCpuSuspendLocalThread(HANDLE Thread, ULONG *Count) { return Err; } - CONTEXT TmpContext{ + CONTEXT TmpContext { .ContextFlags = CONTEXT_INTEGER, }; @@ -608,9 +602,9 @@ NTSTATUS BTCpuSuspendLocalThread(HANDLE Thread, ULONG *Count) { return Err; } -NTSTATUS BTCpuResetToConsistentState(EXCEPTION_POINTERS *Ptrs) { - auto *Context = Ptrs->ContextRecord; - const auto *Exception = Ptrs->ExceptionRecord; +NTSTATUS BTCpuResetToConsistentState(EXCEPTION_POINTERS* Ptrs) { + auto* Context = Ptrs->ContextRecord; + const auto* Exception = Ptrs->ExceptionRecord; if (Exception->ExceptionCode == EXCEPTION_DATATYPE_MISALIGNMENT && Context::HandleUnalignedAccess(Context)) { LogMan::Msg::DFmt("Handled unaligned atomic: new pc: {:X}", Context->Pc); NtContinue(Context, FALSE); @@ -648,21 +642,21 @@ NTSTATUS BTCpuResetToConsistentState(EXCEPTION_POINTERS *Ptrs) { return STATUS_SUCCESS; } -void BTCpuFlushInstructionCache2(const void *Address, SIZE_T Size) { +void BTCpuFlushInstructionCache2(const void* Address, SIZE_T Size) { InvalidationTracker.InvalidateAlignedInterval(GetTLS().ThreadState(), reinterpret_cast(Address), static_cast(Size), false); } -void BTCpuNotifyMemoryAlloc(void *Address, SIZE_T Size, ULONG Type, ULONG Prot) { - InvalidationTracker.HandleMemoryProtectionNotification(GetTLS().ThreadState(), reinterpret_cast(Address), static_cast(Size), - Prot); +void BTCpuNotifyMemoryAlloc(void* Address, SIZE_T Size, ULONG Type, ULONG Prot) { + InvalidationTracker.HandleMemoryProtectionNotification(GetTLS().ThreadState(), reinterpret_cast(Address), + static_cast(Size), Prot); } -void BTCpuNotifyMemoryProtect(void *Address, SIZE_T Size, ULONG NewProt) { - InvalidationTracker.HandleMemoryProtectionNotification(GetTLS().ThreadState(), reinterpret_cast(Address), static_cast(Size), - NewProt); +void BTCpuNotifyMemoryProtect(void* Address, SIZE_T Size, ULONG NewProt) { + InvalidationTracker.HandleMemoryProtectionNotification(GetTLS().ThreadState(), reinterpret_cast(Address), + static_cast(Size), NewProt); } -void BTCpuNotifyMemoryFree(void *Address, SIZE_T Size, ULONG FreeType) { +void BTCpuNotifyMemoryFree(void* Address, SIZE_T Size, ULONG FreeType) { if (!Size) { InvalidationTracker.InvalidateContainingSection(GetTLS().ThreadState(), reinterpret_cast(Address), true); } else if (FreeType & MEM_DECOMMIT) { @@ -670,7 +664,7 @@ void BTCpuNotifyMemoryFree(void *Address, SIZE_T Size, ULONG FreeType) { } } -void BTCpuNotifyUnmapViewOfSection(void *Address, ULONG Flags) { +void BTCpuNotifyUnmapViewOfSection(void* Address, ULONG Flags) { InvalidationTracker.InvalidateContainingSection(GetTLS().ThreadState(), reinterpret_cast(Address), true); } @@ -678,7 +672,7 @@ BOOLEAN WINAPI BTCpuIsProcessorFeaturePresent(UINT Feature) { return CPUFeatures->IsFeaturePresent(Feature) ? TRUE : FALSE; } -BOOLEAN BTCpuUpdateProcessorInformation(SYSTEM_CPU_INFORMATION *Info) { +BOOLEAN BTCpuUpdateProcessorInformation(SYSTEM_CPU_INFORMATION* Info) { CPUFeatures->UpdateInformation(Info); return TRUE; } diff --git a/ThunkLibs/Generator/analysis.cpp b/ThunkLibs/Generator/analysis.cpp index fa730a7652..ee610a69c2 100644 --- a/ThunkLibs/Generator/analysis.cpp +++ b/ThunkLibs/Generator/analysis.cpp @@ -7,619 +7,613 @@ #include struct NamespaceAnnotations { - std::optional version; - std::optional load_host_endpoint_via; - bool generate_guest_symtable = false; - bool indirect_guest_calls = false; + std::optional version; + std::optional load_host_endpoint_via; + bool generate_guest_symtable = false; + bool indirect_guest_calls = false; }; static NamespaceAnnotations GetNamespaceAnnotations(clang::ASTContext& context, clang::CXXRecordDecl* decl) { - if (!decl->hasDefinition()) { - return {}; - } - - ErrorReporter report_error { context }; - NamespaceAnnotations ret; - - for (const clang::CXXBaseSpecifier& base : decl->bases()) { - auto annotation = base.getType().getAsString(); - if (annotation == "fexgen::generate_guest_symtable") { - ret.generate_guest_symtable = true; - } else if (annotation == "fexgen::indirect_guest_calls") { - ret.indirect_guest_calls = true; - } else { - throw report_error(base.getSourceRange().getBegin(), "Unknown namespace annotation"); - } + if (!decl->hasDefinition()) { + return {}; + } + + ErrorReporter report_error {context}; + NamespaceAnnotations ret; + + for (const clang::CXXBaseSpecifier& base : decl->bases()) { + auto annotation = base.getType().getAsString(); + if (annotation == "fexgen::generate_guest_symtable") { + ret.generate_guest_symtable = true; + } else if (annotation == "fexgen::indirect_guest_calls") { + ret.indirect_guest_calls = true; + } else { + throw report_error(base.getSourceRange().getBegin(), "Unknown namespace annotation"); } - - for (const clang::FieldDecl* field : decl->fields()) { - auto name = field->getNameAsString(); - if (name == "load_host_endpoint_via") { - auto loader_function_expr = field->getInClassInitializer()->IgnoreCasts(); - auto loader_function_str = llvm::dyn_cast_or_null(loader_function_expr); - if (loader_function_expr && !loader_function_str) { - throw report_error(loader_function_expr->getBeginLoc(), - "Must initialize load_host_endpoint_via with a string"); - } - if (loader_function_str) { - ret.load_host_endpoint_via = loader_function_str->getString(); - } - } else if (name == "version") { - auto initializer = field->getInClassInitializer()->IgnoreCasts(); - auto version_literal = llvm::dyn_cast_or_null(initializer); - if (!initializer || !version_literal) { - throw report_error(field->getBeginLoc(), "No version given (expected integral typed member, e.g. \"int version = 5;\")"); - } - ret.version = version_literal->getValue().getZExtValue(); - } else { - throw report_error(field->getBeginLoc(), "Unknown namespace annotation"); - } + } + + for (const clang::FieldDecl* field : decl->fields()) { + auto name = field->getNameAsString(); + if (name == "load_host_endpoint_via") { + auto loader_function_expr = field->getInClassInitializer()->IgnoreCasts(); + auto loader_function_str = llvm::dyn_cast_or_null(loader_function_expr); + if (loader_function_expr && !loader_function_str) { + throw report_error(loader_function_expr->getBeginLoc(), "Must initialize load_host_endpoint_via with a string"); + } + if (loader_function_str) { + ret.load_host_endpoint_via = loader_function_str->getString(); + } + } else if (name == "version") { + auto initializer = field->getInClassInitializer()->IgnoreCasts(); + auto version_literal = llvm::dyn_cast_or_null(initializer); + if (!initializer || !version_literal) { + throw report_error(field->getBeginLoc(), "No version given (expected integral typed member, e.g. \"int version = 5;\")"); + } + ret.version = version_literal->getValue().getZExtValue(); + } else { + throw report_error(field->getBeginLoc(), "Unknown namespace annotation"); } + } - return ret; + return ret; } enum class CallbackStrategy { - Default, - Stub, + Default, + Stub, }; struct Annotations { - bool custom_host_impl = false; - bool custom_guest_entrypoint = false; + bool custom_host_impl = false; + bool custom_guest_entrypoint = false; - bool returns_guest_pointer = false; + bool returns_guest_pointer = false; - std::optional uniform_va_type; + std::optional uniform_va_type; - CallbackStrategy callback_strategy = CallbackStrategy::Default; + CallbackStrategy callback_strategy = CallbackStrategy::Default; }; static Annotations GetAnnotations(clang::ASTContext& context, clang::CXXRecordDecl* decl) { - ErrorReporter report_error { context }; - Annotations ret; - - for (const auto& base : decl->bases()) { - auto annotation = base.getType().getAsString(); - if (annotation == "fexgen::returns_guest_pointer") { - ret.returns_guest_pointer = true; - } else if (annotation == "fexgen::custom_host_impl") { - ret.custom_host_impl = true; - } else if (annotation == "fexgen::callback_stub") { - ret.callback_strategy = CallbackStrategy::Stub; - } else if (annotation == "fexgen::custom_guest_entrypoint") { - ret.custom_guest_entrypoint = true; - } else { - throw report_error(base.getSourceRange().getBegin(), "Unknown annotation"); - } + ErrorReporter report_error {context}; + Annotations ret; + + for (const auto& base : decl->bases()) { + auto annotation = base.getType().getAsString(); + if (annotation == "fexgen::returns_guest_pointer") { + ret.returns_guest_pointer = true; + } else if (annotation == "fexgen::custom_host_impl") { + ret.custom_host_impl = true; + } else if (annotation == "fexgen::callback_stub") { + ret.callback_strategy = CallbackStrategy::Stub; + } else if (annotation == "fexgen::custom_guest_entrypoint") { + ret.custom_guest_entrypoint = true; + } else { + throw report_error(base.getSourceRange().getBegin(), "Unknown annotation"); } - - for (const auto& child_decl : decl->getPrimaryContext()->decls()) { - if (auto field = llvm::dyn_cast_or_null(child_decl)) { - throw report_error(field->getBeginLoc(), "Unknown field annotation"); - } else if (auto type_alias = llvm::dyn_cast_or_null(child_decl)) { - auto name = type_alias->getNameAsString(); - if (name == "uniform_va_type") { - ret.uniform_va_type = type_alias->getUnderlyingType(); - } else { - throw report_error(type_alias->getBeginLoc(), "Unknown type alias annotation"); - } - } + } + + for (const auto& child_decl : decl->getPrimaryContext()->decls()) { + if (auto field = llvm::dyn_cast_or_null(child_decl)) { + throw report_error(field->getBeginLoc(), "Unknown field annotation"); + } else if (auto type_alias = llvm::dyn_cast_or_null(child_decl)) { + auto name = type_alias->getNameAsString(); + if (name == "uniform_va_type") { + ret.uniform_va_type = type_alias->getUnderlyingType(); + } else { + throw report_error(type_alias->getBeginLoc(), "Unknown type alias annotation"); + } } + } - return ret; + return ret; } void AnalysisAction::ExecuteAction() { - clang::ASTFrontendAction::ExecuteAction(); - - // Post-processing happens here rather than in an overridden EndSourceFileAction implementation. - // We can't move the logic to the latter since this code might still raise errors, but - // clang's diagnostics engine is already shut down by the time EndSourceFileAction is called. - - auto& context = getCompilerInstance().getASTContext(); - if (context.getDiagnostics().hasErrorOccurred()) { - return; - } - decl_contexts.front() = context.getTranslationUnitDecl(); - - try { - ParseInterface(context); - CoverReferencedTypes(context); - OnAnalysisComplete(context); - } catch (ClangDiagnosticAsException& exception) { - exception.Report(context.getDiagnostics()); - } + clang::ASTFrontendAction::ExecuteAction(); + + // Post-processing happens here rather than in an overridden EndSourceFileAction implementation. + // We can't move the logic to the latter since this code might still raise errors, but + // clang's diagnostics engine is already shut down by the time EndSourceFileAction is called. + + auto& context = getCompilerInstance().getASTContext(); + if (context.getDiagnostics().hasErrorOccurred()) { + return; + } + decl_contexts.front() = context.getTranslationUnitDecl(); + + try { + ParseInterface(context); + CoverReferencedTypes(context); + OnAnalysisComplete(context); + } catch (ClangDiagnosticAsException& exception) { + exception.Report(context.getDiagnostics()); + } } -static clang::ClassTemplateDecl* -FindClassTemplateDeclByName(clang::DeclContext& decl_context, std::string_view symbol_name) { - auto& ast_context = decl_context.getParentASTContext(); - auto* ident = &ast_context.Idents.get(symbol_name); - auto declname = ast_context.DeclarationNames.getIdentifier(ident); - auto result = decl_context.noload_lookup(declname); - if (result.empty()) { - return nullptr; - } else if (std::next(result.begin()) == result.end()) { - return llvm::dyn_cast(*result.begin()); - } else { - throw std::runtime_error("Found multiple matches to symbol " + std::string { symbol_name }); - } +static clang::ClassTemplateDecl* FindClassTemplateDeclByName(clang::DeclContext& decl_context, std::string_view symbol_name) { + auto& ast_context = decl_context.getParentASTContext(); + auto* ident = &ast_context.Idents.get(symbol_name); + auto declname = ast_context.DeclarationNames.getIdentifier(ident); + auto result = decl_context.noload_lookup(declname); + if (result.empty()) { + return nullptr; + } else if (std::next(result.begin()) == result.end()) { + return llvm::dyn_cast(*result.begin()); + } else { + throw std::runtime_error("Found multiple matches to symbol " + std::string {symbol_name}); + } } struct TypeAnnotations { - bool is_opaque = false; - bool assumed_compatible = false; - bool emit_layout_wrappers = false; + bool is_opaque = false; + bool assumed_compatible = false; + bool emit_layout_wrappers = false; }; static TypeAnnotations GetTypeAnnotations(clang::ASTContext& context, clang::CXXRecordDecl* decl) { - if (!decl->hasDefinition()) { - return {}; - } - - ErrorReporter report_error { context }; - TypeAnnotations ret; - - for (const clang::CXXBaseSpecifier& base : decl->bases()) { - auto annotation = base.getType().getAsString(); - if (annotation == "fexgen::opaque_type") { - ret.is_opaque = true; - } else if (annotation == "fexgen::assume_compatible_data_layout") { - ret.assumed_compatible = true; - } else if (annotation == "fexgen::emit_layout_wrappers") { - ret.emit_layout_wrappers = true; - } else { - throw report_error(base.getSourceRange().getBegin(), "Unknown type annotation"); - } + if (!decl->hasDefinition()) { + return {}; + } + + ErrorReporter report_error {context}; + TypeAnnotations ret; + + for (const clang::CXXBaseSpecifier& base : decl->bases()) { + auto annotation = base.getType().getAsString(); + if (annotation == "fexgen::opaque_type") { + ret.is_opaque = true; + } else if (annotation == "fexgen::assume_compatible_data_layout") { + ret.assumed_compatible = true; + } else if (annotation == "fexgen::emit_layout_wrappers") { + ret.emit_layout_wrappers = true; + } else { + throw report_error(base.getSourceRange().getBegin(), "Unknown type annotation"); } + } - return ret; + return ret; } static ParameterAnnotations GetParameterAnnotations(clang::ASTContext& context, clang::CXXRecordDecl* decl) { - if (!decl->hasDefinition()) { - return {}; + if (!decl->hasDefinition()) { + return {}; + } + + ErrorReporter report_error {context}; + ParameterAnnotations ret; + + for (const clang::CXXBaseSpecifier& base : decl->bases()) { + auto annotation = base.getType().getAsString(); + if (annotation == "fexgen::ptr_passthrough") { + ret.is_passthrough = true; + } else if (annotation == "fexgen::assume_compatible_data_layout") { + ret.assume_compatible = true; + } else { + throw report_error(base.getSourceRange().getBegin(), "Unknown parameter annotation"); } + } - ErrorReporter report_error { context }; - ParameterAnnotations ret; + return ret; +} - for (const clang::CXXBaseSpecifier& base : decl->bases()) { - auto annotation = base.getType().getAsString(); - if (annotation == "fexgen::ptr_passthrough") { - ret.is_passthrough = true; - } else if (annotation == "fexgen::assume_compatible_data_layout") { - ret.assume_compatible = true; - } else { - throw report_error(base.getSourceRange().getBegin(), "Unknown parameter annotation"); +void AnalysisAction::ParseInterface(clang::ASTContext& context) { + ErrorReporter report_error {context}; + + const std::unordered_map no_param_annotations {}; + + // TODO: Assert fex_gen_type is not declared at non-global namespaces + if (auto template_decl = FindClassTemplateDeclByName(*context.getTranslationUnitDecl(), "fex_gen_type")) { + for (auto* decl : template_decl->specializations()) { + const auto& template_args = decl->getTemplateArgs(); + assert(template_args.size() == 1); + + // NOTE: Function types that are equivalent but use differently + // named types (e.g. GLuint/GLenum) are represented by + // different Type instances. The canonical type they refer + // to is unique, however. + clang::QualType type = context.getCanonicalType(template_args[0].getAsType()); + type = type->getLocallyUnqualifiedSingleStepDesugaredType(); + + const auto annotations = GetTypeAnnotations(context, decl); + if (type->isFunctionPointerType() || type->isFunctionType()) { + if (decl->getNumBases()) { + throw report_error(decl->getBeginLoc(), "Function pointer types cannot be annotated"); } + thunked_funcptrs[type.getAsString()] = std::pair {type.getTypePtr(), no_param_annotations}; + } else { + RepackedType repack_info = {.assumed_compatible = annotations.is_opaque || annotations.assumed_compatible, + .pointers_only = annotations.is_opaque && !annotations.assumed_compatible, + .emit_layout_wrappers = annotations.emit_layout_wrappers}; + [[maybe_unused]] auto [it, inserted] = types.emplace(context.getCanonicalType(type.getTypePtr()), repack_info); + assert(inserted); + } } + } + + // Process function parameter annotations + std::unordered_map> param_annotations; + for (auto& decl_context : decl_contexts) { + if (auto template_decl = FindClassTemplateDeclByName(*decl_context, "fex_gen_param")) { + for (auto* decl : template_decl->specializations()) { + const auto& template_args = decl->getTemplateArgs(); + assert(template_args.size() == 3); + + auto function = llvm::dyn_cast(template_args[0].getAsDecl()); + auto param_idx = template_args[1].getAsIntegral().getZExtValue(); + clang::QualType type = context.getCanonicalType(template_args[2].getAsType()); + type = type->getLocallyUnqualifiedSingleStepDesugaredType(); + + if (param_idx >= function->getNumParams()) { + throw report_error(decl->getTypeAsWritten()->getTypeLoc().getAs().getArgLoc(1).getLocation(), + "Out-of-bounds parameter index passed to fex_gen_param"); + } - return ret; -} - -void AnalysisAction::ParseInterface(clang::ASTContext& context) { - ErrorReporter report_error { context }; - - const std::unordered_map no_param_annotations {}; - - // TODO: Assert fex_gen_type is not declared at non-global namespaces - if (auto template_decl = FindClassTemplateDeclByName(*context.getTranslationUnitDecl(), "fex_gen_type")) { - for (auto* decl : template_decl->specializations()) { - const auto& template_args = decl->getTemplateArgs(); - assert(template_args.size() == 1); - - // NOTE: Function types that are equivalent but use differently - // named types (e.g. GLuint/GLenum) are represented by - // different Type instances. The canonical type they refer - // to is unique, however. - clang::QualType type = context.getCanonicalType(template_args[0].getAsType()); - type = type->getLocallyUnqualifiedSingleStepDesugaredType(); - - const auto annotations = GetTypeAnnotations(context, decl); - if (type->isFunctionPointerType() || type->isFunctionType()) { - if (decl->getNumBases()) { - throw report_error(decl->getBeginLoc(), "Function pointer types cannot be annotated"); - } - thunked_funcptrs[type.getAsString()] = std::pair { type.getTypePtr(), no_param_annotations }; - } else { - RepackedType repack_info = { - .assumed_compatible = annotations.is_opaque || annotations.assumed_compatible, - .pointers_only = annotations.is_opaque && !annotations.assumed_compatible, - .emit_layout_wrappers = annotations.emit_layout_wrappers - }; - [[maybe_unused]] auto [it, inserted] = types.emplace(context.getCanonicalType(type.getTypePtr()), repack_info); - assert(inserted); - } + if (!type->isVoidType() && !context.hasSameType(type, function->getParamDecl(param_idx)->getType())) { + throw report_error(decl->getTypeAsWritten()->getTypeLoc().getAs().getArgLoc(2).getLocation(), + "Type passed to fex_gen_param doesn't match the function signature") + .addNote(report_error(function->getParamDecl(param_idx)->getTypeSourceInfo()->getTypeLoc().getBeginLoc(), "Expected this type " + "instead")); } + + param_annotations[function][param_idx] = GetParameterAnnotations(context, decl); + } } + } + + // Process declarations and specializations of fex_gen_config, + // i.e. the function descriptions of the thunked API + for (auto& decl_context : decl_contexts) { + if (const auto template_decl = FindClassTemplateDeclByName(*decl_context, "fex_gen_config")) { + // Gather general information about symbols in this namespace + const auto annotations = GetNamespaceAnnotations(context, template_decl->getTemplatedDecl()); + + auto namespace_decl = llvm::dyn_cast(decl_context); + namespaces.push_back( + {namespace_decl, namespace_decl ? namespace_decl->getNameAsString() : "", annotations.load_host_endpoint_via.value_or(""), + annotations.generate_guest_symtable, annotations.indirect_guest_calls}); + const auto namespace_idx = namespaces.size() - 1; + const NamespaceInfo& namespace_info = namespaces.back(); + + if (annotations.version) { + if (namespace_decl) { + throw report_error(template_decl->getBeginLoc(), "Library version must be defined in the global namespace"); + } + lib_version = annotations.version; + } + + // Process specializations of template fex_gen_config + // First, perform some validation and process member annotations + // In a second iteration, process the actual function API + for (auto* decl : template_decl->specializations()) { + if (decl->getSpecializationKind() == clang::TSK_ExplicitInstantiationDefinition) { + throw report_error(decl->getBeginLoc(), "fex_gen_config may not be partially specialized\n"); + } - // Process function parameter annotations - std::unordered_map> param_annotations; - for (auto& decl_context : decl_contexts) { - if (auto template_decl = FindClassTemplateDeclByName(*decl_context, "fex_gen_param")) { - for (auto* decl : template_decl->specializations()) { - const auto& template_args = decl->getTemplateArgs(); - assert(template_args.size() == 3); - - auto function = llvm::dyn_cast(template_args[0].getAsDecl()); - auto param_idx = template_args[1].getAsIntegral().getZExtValue(); - clang::QualType type = context.getCanonicalType(template_args[2].getAsType()); - type = type->getLocallyUnqualifiedSingleStepDesugaredType(); - - if (param_idx >= function->getNumParams()) { - throw report_error(decl->getTypeAsWritten()->getTypeLoc().getAs().getArgLoc(1).getLocation(), "Out-of-bounds parameter index passed to fex_gen_param"); - } + const auto& template_args = decl->getTemplateArgs(); + assert(template_args.size() == 1); - if (!type->isVoidType() && !context.hasSameType(type, function->getParamDecl(param_idx)->getType())) { - throw report_error(decl->getTypeAsWritten()->getTypeLoc().getAs().getArgLoc(2).getLocation(), "Type passed to fex_gen_param doesn't match the function signature") - .addNote(report_error(function->getParamDecl(param_idx)->getTypeSourceInfo()->getTypeLoc().getBeginLoc(), "Expected this type instead")); - } + const auto template_arg_loc = + decl->getTypeAsWritten()->getTypeLoc().castAs().getArgLoc(0).getLocation(); - param_annotations[function][param_idx] = GetParameterAnnotations(context, decl); + if (auto emitted_function = llvm::dyn_cast(template_args[0].getAsDecl())) { + // Process later + } else if (auto annotated_member = llvm::dyn_cast(template_args[0].getAsDecl())) { + { + if (decl->getNumBases() != 1 || decl->bases_begin()->getType().getAsString() != "fexgen::custom_repack") { + throw report_error(template_arg_loc, "Unsupported member annotation(s)"); } - } - } - // Process declarations and specializations of fex_gen_config, - // i.e. the function descriptions of the thunked API - for (auto& decl_context : decl_contexts) { - if (const auto template_decl = FindClassTemplateDeclByName(*decl_context, "fex_gen_config")) { - // Gather general information about symbols in this namespace - const auto annotations = GetNamespaceAnnotations(context, template_decl->getTemplatedDecl()); - - auto namespace_decl = llvm::dyn_cast(decl_context); - namespaces.push_back({ namespace_decl, - namespace_decl ? namespace_decl->getNameAsString() : "", - annotations.load_host_endpoint_via.value_or(""), - annotations.generate_guest_symtable, - annotations.indirect_guest_calls }); - const auto namespace_idx = namespaces.size() - 1; - const NamespaceInfo& namespace_info = namespaces.back(); - - if (annotations.version) { - if (namespace_decl) { - throw report_error(template_decl->getBeginLoc(), "Library version must be defined in the global namespace"); - } - lib_version = annotations.version; + if (!annotated_member->getType()->isPointerType() && !annotated_member->getType()->isArrayType()) { + throw report_error(template_arg_loc, "custom_repack annotation requires pointer member"); } + } + + // Get or add parent type to list of structure types + auto repack_info_it = types.emplace(context.getCanonicalType(annotated_member->getParent()->getTypeForDecl()), RepackedType {}).first; + if (repack_info_it->second.assumed_compatible) { + throw report_error(template_arg_loc, "May not annotate members of opaque types"); + } + // Add member to its list of members + repack_info_it->second.custom_repacked_members.insert(annotated_member->getNameAsString()); + } else { + throw report_error(template_arg_loc, "Cannot annotate this kind of symbol"); + } + } - // Process specializations of template fex_gen_config - // First, perform some validation and process member annotations - // In a second iteration, process the actual function API - for (auto* decl : template_decl->specializations()) { - if (decl->getSpecializationKind() == clang::TSK_ExplicitInstantiationDefinition) { - throw report_error(decl->getBeginLoc(), "fex_gen_config may not be partially specialized\n"); - } + // Process API functions + for (auto* decl : template_decl->specializations()) { + if (decl->getSpecializationKind() == clang::TSK_ExplicitInstantiationDefinition) { + throw report_error(decl->getBeginLoc(), "fex_gen_config may not be partially specialized\n"); + } - const auto& template_args = decl->getTemplateArgs(); - assert(template_args.size() == 1); - - const auto template_arg_loc = decl->getTypeAsWritten()->getTypeLoc().castAs().getArgLoc(0).getLocation(); - - if (auto emitted_function = llvm::dyn_cast(template_args[0].getAsDecl())) { - // Process later - } else if (auto annotated_member = llvm::dyn_cast(template_args[0].getAsDecl())) { - { - if (decl->getNumBases() != 1 || decl->bases_begin()->getType().getAsString() != "fexgen::custom_repack") { - throw report_error(template_arg_loc, "Unsupported member annotation(s)"); - } - - if (!annotated_member->getType()->isPointerType() && !annotated_member->getType()->isArrayType()) { - throw report_error(template_arg_loc, "custom_repack annotation requires pointer member"); - } - } - - // Get or add parent type to list of structure types - auto repack_info_it = types.emplace(context.getCanonicalType(annotated_member->getParent()->getTypeForDecl()), RepackedType {}).first; - if (repack_info_it->second.assumed_compatible) { - throw report_error(template_arg_loc, "May not annotate members of opaque types"); - } - // Add member to its list of members - repack_info_it->second.custom_repacked_members.insert(annotated_member->getNameAsString()); - } else { - throw report_error(template_arg_loc, "Cannot annotate this kind of symbol"); - } - } + const auto& template_args = decl->getTemplateArgs(); + assert(template_args.size() == 1); - // Process API functions - for (auto* decl : template_decl->specializations()) { - if (decl->getSpecializationKind() == clang::TSK_ExplicitInstantiationDefinition) { - throw report_error(decl->getBeginLoc(), "fex_gen_config may not be partially specialized\n"); - } + const auto template_arg_loc = + decl->getTypeAsWritten()->getTypeLoc().castAs().getArgLoc(0).getLocation(); - const auto& template_args = decl->getTemplateArgs(); - assert(template_args.size() == 1); - - const auto template_arg_loc = decl->getTypeAsWritten()->getTypeLoc().castAs().getArgLoc(0).getLocation(); - - if (auto emitted_function = llvm::dyn_cast(template_args[0].getAsDecl())) { - auto return_type = emitted_function->getReturnType(); - - const auto annotations = GetAnnotations(context, decl); - if (return_type->isFunctionPointerType() && !annotations.returns_guest_pointer) { - throw report_error( template_arg_loc, - "Function pointer return types require explicit annotation\n"); - } - - // TODO: Use the types as written in the signature instead? - ThunkedFunction data; - data.function_name = emitted_function->getName().str(); - data.return_type = return_type; - data.is_variadic = emitted_function->isVariadic(); - - data.decl = emitted_function; - - data.custom_host_impl = annotations.custom_host_impl; - - data.param_annotations = param_annotations[emitted_function]; - - const int retval_index = -1; - for (int param_idx = retval_index; param_idx < (int)emitted_function->param_size(); ++param_idx) { - auto param_type = param_idx == retval_index ? emitted_function->getReturnType() : emitted_function->getParamDecl(param_idx)->getType(); - auto param_loc = param_idx == retval_index ? emitted_function->getReturnTypeSourceRange().getBegin() : emitted_function->getParamDecl(param_idx)->getBeginLoc(); - - if (param_idx != retval_index) { - data.param_types.push_back(param_type); - } else if (param_type->isVoidType()) { - continue; - } - - // Skip pointers-to-structs passed through to the host in guest_layout. - // This avoids pulling in member types that can't be processed. - if (data.param_annotations[param_idx].is_passthrough && - param_type->isPointerType() && param_type->getPointeeType()->isStructureType()) { - continue; - } - - auto check_struct_type = [&](const clang::Type* type) { - if (type->isIncompleteType()) { - throw report_error(type->getAsTagDecl()->getBeginLoc(), "Unannotated pointer with incomplete struct type; consider using an opaque_type annotation") - .addNote(report_error(emitted_function->getNameInfo().getLoc(), "in function", clang::DiagnosticsEngine::Note)) - .addNote(report_error(template_arg_loc, "used in annotation here", clang::DiagnosticsEngine::Note)); - } - - for (auto* member : type->getAsStructureType()->getDecl()->fields()) { - auto annotated_type = types.find(type->getCanonicalTypeUnqualified().getTypePtr()); - if (annotated_type == types.end() || !annotated_type->second.UsesCustomRepackFor(member)) { - /*if (!member->getType()->isPointerType())*/ { - // TODO: Perform more elaborate validation for non-pointers to ensure ABI compatibility - continue; - } - - throw report_error(member->getBeginLoc(), "Unannotated pointer member") - .addNote(report_error(param_loc, "in struct type", clang::DiagnosticsEngine::Note)) - .addNote(report_error(template_arg_loc, "used in annotation here", clang::DiagnosticsEngine::Note)); - } - } - }; - - if (param_type->isFunctionPointerType()) { - if (param_idx == retval_index) { - // TODO: We already rely on this in a few places... -// throw report_error(template_arg_loc, "Support for returning function pointers is not implemented"); - continue; - } - auto funcptr = emitted_function->getParamDecl(param_idx)->getFunctionType()->getAs(); - ThunkedCallback callback; - callback.return_type = funcptr->getReturnType(); - for (auto& cb_param : funcptr->getParamTypes()) { - callback.param_types.push_back(cb_param); - } - callback.is_stub = annotations.callback_strategy == CallbackStrategy::Stub; - callback.is_variadic = funcptr->isVariadic(); - - data.callbacks.emplace(param_idx, callback); - if (!callback.is_stub && !data.custom_host_impl) { - thunked_funcptrs[emitted_function->getNameAsString() + "_cb" + std::to_string(param_idx)] = std::pair { context.getCanonicalType(funcptr), no_param_annotations }; - } - - if (data.callbacks.size() != 1) { - throw report_error(template_arg_loc, "Support for more than one callback is untested"); - } - if (funcptr->isVariadic() && !callback.is_stub) { - throw report_error(template_arg_loc, "Variadic callbacks are not supported"); - } - - // Force treatment as passthrough-pointer - data.param_annotations[param_idx].is_passthrough = true; - } else if (param_type->isBuiltinType()) { - // NOTE: Intentionally not using getCanonicalType here since that would turn e.g. size_t into platform-specific types - // TODO: Still, we may want to de-duplicate some of these... - types.emplace(param_type.getTypePtr(), RepackedType { }); - } else if (param_type->isEnumeralType()) { - types.emplace(context.getCanonicalType(param_type.getTypePtr()), RepackedType { }); - } else if ( param_type->isStructureType() && - !(types.contains(context.getCanonicalType(param_type.getTypePtr())) && - LookupType(context, param_type.getTypePtr()).assumed_compatible)) { - check_struct_type(param_type.getTypePtr()); - types.emplace(context.getCanonicalType(param_type.getTypePtr()), RepackedType { }); - } else if (param_type->isPointerType()) { - auto pointee_type = param_type->getPointeeType(); - - if (pointee_type->isIntegerType()) { - // Add builtin pointee type to type list - if (!pointee_type->isEnumeralType()) { - types.emplace(pointee_type.getTypePtr(), RepackedType { }); - } else { - types.emplace(context.getCanonicalType(pointee_type.getTypePtr()), RepackedType { }); - } - } - - if (data.param_annotations[param_idx].assume_compatible) { - // Nothing to do - } else if (types.contains(context.getCanonicalType(pointee_type.getTypePtr())) && LookupType(context, pointee_type.getTypePtr()).assumed_compatible) { - // Parameter points to a type that is assumed compatible - data.param_annotations[param_idx].assume_compatible = true; - } else if (pointee_type->isStructureType()) { - // Unannotated pointer to unannotated structure. - // Append the structure type to the type list for checking data layout compatibility. - check_struct_type(pointee_type.getTypePtr()); - types.emplace(context.getCanonicalType(pointee_type.getTypePtr()), RepackedType { }); - } else if (data.param_annotations[param_idx].is_passthrough) { - if (!data.custom_host_impl) { - throw report_error(param_loc, "Passthrough annotation requires custom host implementation"); - } - - // Nothing to do - } else if (false /* TODO: Can't check if this is unsupported until data layout analysis is complete */) { - throw report_error(param_loc, "Unsupported parameter type") - .addNote(report_error(emitted_function->getNameInfo().getLoc(), "in function", clang::DiagnosticsEngine::Note)) - .addNote(report_error(template_arg_loc, "used in definition here", clang::DiagnosticsEngine::Note)); - } - } else { - // TODO: For non-pointer parameters, perform more elaborate validation to ensure ABI compatibility - } - } - - thunked_api.push_back(ThunkedAPIFunction { (const FunctionParams&)data, data.function_name, data.return_type, - namespace_info.host_loader.empty() ? "dlsym_default" : namespace_info.host_loader, - data.is_variadic || annotations.custom_guest_entrypoint, - data.is_variadic, - std::nullopt }); - if (namespace_info.generate_guest_symtable) { - thunked_api.back().symtable_namespace = namespace_idx; - } - - if (data.is_variadic) { - if (!annotations.uniform_va_type) { - throw report_error(decl->getBeginLoc(), "Variadic functions must be annotated with parameter type using uniform_va_type"); - } - - // Convert variadic argument list into a count + pointer pair - data.param_types.push_back(context.getSizeType()); - data.param_types.push_back(context.getPointerType(*annotations.uniform_va_type)); - types.emplace(context.getSizeType()->getTypePtr(), RepackedType { }); - if (!annotations.uniform_va_type.value()->isVoidPointerType()) { - types.emplace(annotations.uniform_va_type->getTypePtr(), RepackedType { }); - } - } - - if (data.is_variadic) { - // This function is thunked through an "_internal" symbol since its signature - // is different from the one in the native host/guest libraries. - data.function_name = data.function_name + "_internal"; - if (data.custom_host_impl) { - throw report_error(decl->getBeginLoc(), "Custom host impl requested but this is implied by the function signature already"); - } - data.custom_host_impl = true; - } - - // For indirect calls, register the function signature as a function pointer type - if (namespace_info.indirect_guest_calls) { - thunked_funcptrs[emitted_function->getNameAsString()] = std::pair { context.getCanonicalType(emitted_function->getFunctionType()), data.param_annotations }; - } - - thunks.push_back(std::move(data)); - } - } - } - } -} + if (auto emitted_function = llvm::dyn_cast(template_args[0].getAsDecl())) { + auto return_type = emitted_function->getReturnType(); -void AnalysisAction::CoverReferencedTypes(clang::ASTContext& context) { - // Add common fixed-size integer types explicitly - for (unsigned size : { 8, 32, 64 }) { - types.emplace(context.getIntTypeForBitwidth(size, false).getTypePtr(), RepackedType {}); - types.emplace(context.getIntTypeForBitwidth(size, true).getTypePtr(), RepackedType {}); - } + const auto annotations = GetAnnotations(context, decl); + if (return_type->isFunctionPointerType() && !annotations.returns_guest_pointer) { + throw report_error(template_arg_loc, "Function pointer return types require explicit annotation\n"); + } - // Repeat until no more children are appended - for (bool changed = true; std::exchange(changed, false);) { - for ( auto next_type_it = types.begin(), type_it = next_type_it; - type_it != types.end(); - type_it = next_type_it) { - ++next_type_it; - const auto& [type, type_repack_info] = *type_it; - if (!type->isStructureType()) { - continue; + // TODO: Use the types as written in the signature instead? + ThunkedFunction data; + data.function_name = emitted_function->getName().str(); + data.return_type = return_type; + data.is_variadic = emitted_function->isVariadic(); + + data.decl = emitted_function; + + data.custom_host_impl = annotations.custom_host_impl; + + data.param_annotations = param_annotations[emitted_function]; + + const int retval_index = -1; + for (int param_idx = retval_index; param_idx < (int)emitted_function->param_size(); ++param_idx) { + auto param_type = + param_idx == retval_index ? emitted_function->getReturnType() : emitted_function->getParamDecl(param_idx)->getType(); + auto param_loc = param_idx == retval_index ? emitted_function->getReturnTypeSourceRange().getBegin() : + emitted_function->getParamDecl(param_idx)->getBeginLoc(); + + if (param_idx != retval_index) { + data.param_types.push_back(param_type); + } else if (param_type->isVoidType()) { + continue; } - if (type_repack_info.assumed_compatible) { - // If assumed compatible, we don't need the member definitions - continue; + // Skip pointers-to-structs passed through to the host in guest_layout. + // This avoids pulling in member types that can't be processed. + if (data.param_annotations[param_idx].is_passthrough && param_type->isPointerType() && + param_type->getPointeeType()->isStructureType()) { + continue; } - for (auto* member : type->getAsStructureType()->getDecl()->fields()) { - auto member_type = member->getType().getTypePtr(); - while (member_type->isArrayType()) { - member_type = member_type->getArrayElementTypeNoTypeQual(); - } - while (member_type->isPointerType()) { - member_type = member_type->getPointeeType().getTypePtr(); - } + auto check_struct_type = [&](const clang::Type* type) { + if (type->isIncompleteType()) { + throw report_error(type->getAsTagDecl()->getBeginLoc(), "Unannotated pointer with incomplete struct type; consider using " + "an opaque_type annotation") + .addNote(report_error(emitted_function->getNameInfo().getLoc(), "in function", clang::DiagnosticsEngine::Note)) + .addNote(report_error(template_arg_loc, "used in annotation here", clang::DiagnosticsEngine::Note)); + } + + for (auto* member : type->getAsStructureType()->getDecl()->fields()) { + auto annotated_type = types.find(type->getCanonicalTypeUnqualified().getTypePtr()); + if (annotated_type == types.end() || !annotated_type->second.UsesCustomRepackFor(member)) { + /*if (!member->getType()->isPointerType())*/ { + // TODO: Perform more elaborate validation for non-pointers to ensure ABI compatibility + continue; + } - if (!member_type->isBuiltinType()) { - member_type = context.getCanonicalType(member_type); + throw report_error(member->getBeginLoc(), "Unannotated pointer member") + .addNote(report_error(param_loc, "in struct type", clang::DiagnosticsEngine::Note)) + .addNote(report_error(template_arg_loc, "used in annotation here", clang::DiagnosticsEngine::Note)); } - if (types.contains(member_type) && types.at(member_type).pointers_only) { - if (member_type == context.getCanonicalType(member->getType().getTypePtr())) { - throw std::runtime_error(fmt::format("\"{}\" references opaque type \"{}\" via non-pointer member \"{}\"", - clang::QualType { type, 0 }.getAsString(), - clang::QualType { member_type, 0 }.getAsString(), - member->getNameAsString())); - } - continue; + } + }; + + if (param_type->isFunctionPointerType()) { + if (param_idx == retval_index) { + // TODO: We already rely on this in a few places... + // throw report_error(template_arg_loc, "Support for returning function pointers is not implemented"); + continue; + } + auto funcptr = emitted_function->getParamDecl(param_idx)->getFunctionType()->getAs(); + ThunkedCallback callback; + callback.return_type = funcptr->getReturnType(); + for (auto& cb_param : funcptr->getParamTypes()) { + callback.param_types.push_back(cb_param); + } + callback.is_stub = annotations.callback_strategy == CallbackStrategy::Stub; + callback.is_variadic = funcptr->isVariadic(); + + data.callbacks.emplace(param_idx, callback); + if (!callback.is_stub && !data.custom_host_impl) { + thunked_funcptrs[emitted_function->getNameAsString() + "_cb" + std::to_string(param_idx)] = + std::pair {context.getCanonicalType(funcptr), no_param_annotations}; + } + + if (data.callbacks.size() != 1) { + throw report_error(template_arg_loc, "Support for more than one callback is untested"); + } + if (funcptr->isVariadic() && !callback.is_stub) { + throw report_error(template_arg_loc, "Variadic callbacks are not supported"); + } + + // Force treatment as passthrough-pointer + data.param_annotations[param_idx].is_passthrough = true; + } else if (param_type->isBuiltinType()) { + // NOTE: Intentionally not using getCanonicalType here since that would turn e.g. size_t into platform-specific types + // TODO: Still, we may want to de-duplicate some of these... + types.emplace(param_type.getTypePtr(), RepackedType {}); + } else if (param_type->isEnumeralType()) { + types.emplace(context.getCanonicalType(param_type.getTypePtr()), RepackedType {}); + } else if (param_type->isStructureType() && !(types.contains(context.getCanonicalType(param_type.getTypePtr())) && + LookupType(context, param_type.getTypePtr()).assumed_compatible)) { + check_struct_type(param_type.getTypePtr()); + types.emplace(context.getCanonicalType(param_type.getTypePtr()), RepackedType {}); + } else if (param_type->isPointerType()) { + auto pointee_type = param_type->getPointeeType(); + + if (pointee_type->isIntegerType()) { + // Add builtin pointee type to type list + if (!pointee_type->isEnumeralType()) { + types.emplace(pointee_type.getTypePtr(), RepackedType {}); + } else { + types.emplace(context.getCanonicalType(pointee_type.getTypePtr()), RepackedType {}); } - if (member_type->isUnionType() && !types.contains(member_type) && !type_repack_info.UsesCustomRepackFor(member)) { - throw std::runtime_error(fmt::format("\"{}\" has unannotated member \"{}\" of union type \"{}\"", - clang::QualType { type, 0 }.getAsString(), - member->getNameAsString(), - clang::QualType { member_type, 0 }.getAsString())); + } + + if (data.param_annotations[param_idx].assume_compatible) { + // Nothing to do + } else if (types.contains(context.getCanonicalType(pointee_type.getTypePtr())) && + LookupType(context, pointee_type.getTypePtr()).assumed_compatible) { + // Parameter points to a type that is assumed compatible + data.param_annotations[param_idx].assume_compatible = true; + } else if (pointee_type->isStructureType()) { + // Unannotated pointer to unannotated structure. + // Append the structure type to the type list for checking data layout compatibility. + check_struct_type(pointee_type.getTypePtr()); + types.emplace(context.getCanonicalType(pointee_type.getTypePtr()), RepackedType {}); + } else if (data.param_annotations[param_idx].is_passthrough) { + if (!data.custom_host_impl) { + throw report_error(param_loc, "Passthrough annotation requires custom host implementation"); } - if (!member_type->isStructureType() && !(member_type->isBuiltinType() && !member_type->isVoidType()) && !member_type->isEnumeralType()) { - continue; - } + // Nothing to do + } else if (false /* TODO: Can't check if this is unsupported until data layout analysis is complete */) { + throw report_error(param_loc, "Unsupported parameter type") + .addNote(report_error(emitted_function->getNameInfo().getLoc(), "in function", clang::DiagnosticsEngine::Note)) + .addNote(report_error(template_arg_loc, "used in definition here", clang::DiagnosticsEngine::Note)); + } + } else { + // TODO: For non-pointer parameters, perform more elaborate validation to ensure ABI compatibility + } + } + + thunked_api.push_back(ThunkedAPIFunction {(const FunctionParams&)data, data.function_name, data.return_type, + namespace_info.host_loader.empty() ? "dlsym_default" : namespace_info.host_loader, + data.is_variadic || annotations.custom_guest_entrypoint, data.is_variadic, std::nullopt}); + if (namespace_info.generate_guest_symtable) { + thunked_api.back().symtable_namespace = namespace_idx; + } + + if (data.is_variadic) { + if (!annotations.uniform_va_type) { + throw report_error(decl->getBeginLoc(), "Variadic functions must be annotated with parameter type using uniform_va_type"); + } - auto [new_type_it, inserted] = types.emplace(member_type, RepackedType { }); - if (inserted) { - changed = true; - next_type_it = new_type_it; - } + // Convert variadic argument list into a count + pointer pair + data.param_types.push_back(context.getSizeType()); + data.param_types.push_back(context.getPointerType(*annotations.uniform_va_type)); + types.emplace(context.getSizeType()->getTypePtr(), RepackedType {}); + if (!annotations.uniform_va_type.value()->isVoidPointerType()) { + types.emplace(annotations.uniform_va_type->getTypePtr(), RepackedType {}); } + } + + if (data.is_variadic) { + // This function is thunked through an "_internal" symbol since its signature + // is different from the one in the native host/guest libraries. + data.function_name = data.function_name + "_internal"; + if (data.custom_host_impl) { + throw report_error(decl->getBeginLoc(), "Custom host impl requested but this is implied by the function signature already"); + } + data.custom_host_impl = true; + } + + // For indirect calls, register the function signature as a function pointer type + if (namespace_info.indirect_guest_calls) { + thunked_funcptrs[emitted_function->getNameAsString()] = + std::pair {context.getCanonicalType(emitted_function->getFunctionType()), data.param_annotations}; + } + + thunks.push_back(std::move(data)); } + } } + } } -class ASTVisitor : public clang::RecursiveASTVisitor { - std::vector& decl_contexts; +void AnalysisAction::CoverReferencedTypes(clang::ASTContext& context) { + // Add common fixed-size integer types explicitly + for (unsigned size : {8, 32, 64}) { + types.emplace(context.getIntTypeForBitwidth(size, false).getTypePtr(), RepackedType {}); + types.emplace(context.getIntTypeForBitwidth(size, true).getTypePtr(), RepackedType {}); + } + + // Repeat until no more children are appended + for (bool changed = true; std::exchange(changed, false);) { + for (auto next_type_it = types.begin(), type_it = next_type_it; type_it != types.end(); type_it = next_type_it) { + ++next_type_it; + const auto& [type, type_repack_info] = *type_it; + if (!type->isStructureType()) { + continue; + } + + if (type_repack_info.assumed_compatible) { + // If assumed compatible, we don't need the member definitions + continue; + } + + for (auto* member : type->getAsStructureType()->getDecl()->fields()) { + auto member_type = member->getType().getTypePtr(); + while (member_type->isArrayType()) { + member_type = member_type->getArrayElementTypeNoTypeQual(); + } + while (member_type->isPointerType()) { + member_type = member_type->getPointeeType().getTypePtr(); + } -public: - ASTVisitor(std::vector& decl_contexts_) - : decl_contexts(decl_contexts_) { - } + if (!member_type->isBuiltinType()) { + member_type = context.getCanonicalType(member_type); + } + if (types.contains(member_type) && types.at(member_type).pointers_only) { + if (member_type == context.getCanonicalType(member->getType().getTypePtr())) { + throw std::runtime_error( + fmt::format("\"{}\" references opaque type \"{}\" via non-pointer member \"{}\"", clang::QualType {type, 0}.getAsString(), + clang::QualType {member_type, 0}.getAsString(), member->getNameAsString())); + } + continue; + } + if (member_type->isUnionType() && !types.contains(member_type) && !type_repack_info.UsesCustomRepackFor(member)) { + throw std::runtime_error(fmt::format("\"{}\" has unannotated member \"{}\" of union type \"{}\"", clang::QualType {type, 0}.getAsString(), + member->getNameAsString(), clang::QualType {member_type, 0}.getAsString())); + } - /** - * Matches "template struct fex_gen_config { ... }" - */ - bool VisitClassTemplateDecl(clang::ClassTemplateDecl* decl) { - if (decl->getName() != "fex_gen_config") { - return true; + if (!member_type->isStructureType() && !(member_type->isBuiltinType() && !member_type->isVoidType()) && !member_type->isEnumeralType()) { + continue; } - if (llvm::dyn_cast(decl->getDeclContext())) { - decl_contexts.push_back(decl->getDeclContext()); + auto [new_type_it, inserted] = types.emplace(member_type, RepackedType {}); + if (inserted) { + changed = true; + next_type_it = new_type_it; } + } + } + } +} + +class ASTVisitor : public clang::RecursiveASTVisitor { + std::vector& decl_contexts; - return true; +public: + ASTVisitor(std::vector& decl_contexts_) + : decl_contexts(decl_contexts_) {} + + /** + * Matches "template struct fex_gen_config { ... }" + */ + bool VisitClassTemplateDecl(clang::ClassTemplateDecl* decl) { + if (decl->getName() != "fex_gen_config") { + return true; + } + + if (llvm::dyn_cast(decl->getDeclContext())) { + decl_contexts.push_back(decl->getDeclContext()); } + + return true; + } }; class ASTConsumer : public clang::ASTConsumer { - std::vector& decl_contexts; + std::vector& decl_contexts; public: - ASTConsumer(std::vector& decl_contexts_) - : decl_contexts(decl_contexts_) { - } + ASTConsumer(std::vector& decl_contexts_) + : decl_contexts(decl_contexts_) {} - void HandleTranslationUnit(clang::ASTContext& context) override { - ASTVisitor { decl_contexts }.TraverseDecl(context.getTranslationUnitDecl()); - } + void HandleTranslationUnit(clang::ASTContext& context) override { + ASTVisitor {decl_contexts}.TraverseDecl(context.getTranslationUnitDecl()); + } }; std::unique_ptr AnalysisAction::CreateASTConsumer(clang::CompilerInstance&, clang::StringRef) { - return std::make_unique(decl_contexts); + return std::make_unique(decl_contexts); } diff --git a/ThunkLibs/Generator/data_layout.cpp b/ThunkLibs/Generator/data_layout.cpp index 0076a69d74..315550a061 100644 --- a/ThunkLibs/Generator/data_layout.cpp +++ b/ThunkLibs/Generator/data_layout.cpp @@ -10,399 +10,405 @@ constexpr bool enable_debug_output = false; // Visitor for gathering data layout information that can be passed across libclang invocations class AnalyzeDataLayoutAction : public AnalysisAction { - ABI& type_abi; + ABI& type_abi; - void OnAnalysisComplete(clang::ASTContext&) override; + void OnAnalysisComplete(clang::ASTContext&) override; public: - AnalyzeDataLayoutAction(ABI&); + AnalyzeDataLayoutAction(ABI&); }; -AnalyzeDataLayoutAction::AnalyzeDataLayoutAction(ABI& abi_) : type_abi(abi_) { -} - -std::unordered_map ComputeDataLayout(const clang::ASTContext& context, const std::unordered_map& types) { - std::unordered_map layout; - - // First, add all types directly used in function signatures of the library API to the meta set - for (const auto& [type, type_repack_info] : types) { - if (type_repack_info.assumed_compatible) { - auto [_, inserted] = layout.insert(std::pair { context.getCanonicalType(type), TypeInfo {} }); - if (!inserted) { - throw std::runtime_error("Failed to gather type metadata: Opaque type \"" + clang::QualType { type, 0 }.getAsString() + "\" already registered"); - } - continue; - } +AnalyzeDataLayoutAction::AnalyzeDataLayoutAction(ABI& abi_) + : type_abi(abi_) {} + +std::unordered_map +ComputeDataLayout(const clang::ASTContext& context, const std::unordered_map& types) { + std::unordered_map layout; + + // First, add all types directly used in function signatures of the library API to the meta set + for (const auto& [type, type_repack_info] : types) { + if (type_repack_info.assumed_compatible) { + auto [_, inserted] = layout.insert(std::pair {context.getCanonicalType(type), TypeInfo {}}); + if (!inserted) { + throw std::runtime_error( + "Failed to gather type metadata: Opaque type \"" + clang::QualType {type, 0}.getAsString() + "\" already registered"); + } + continue; + } - if (type->isIncompleteType()) { - throw std::runtime_error("Cannot compute data layout of incomplete type \"" + clang::QualType { type, 0 }.getAsString() + "\". Did you forget any annotations?"); - } + if (type->isIncompleteType()) { + throw std::runtime_error( + "Cannot compute data layout of incomplete type \"" + clang::QualType {type, 0}.getAsString() + "\". Did you forget any annotations?"); + } - if (type->isStructureType()) { - StructInfo info; - info.size_bits = context.getTypeSize(type); - info.alignment_bits = context.getTypeAlign(type); + if (type->isStructureType()) { + StructInfo info; + info.size_bits = context.getTypeSize(type); + info.alignment_bits = context.getTypeAlign(type); + + auto [_, inserted] = layout.insert(std::pair {context.getCanonicalType(type), info}); + if (!inserted) { + throw std::runtime_error("Failed to gather type metadata: Type \"" + clang::QualType {type, 0}.getAsString() + "\" already registered"); + } + } else if (type->isBuiltinType() || type->isEnumeralType()) { + SimpleTypeInfo info; + info.size_bits = context.getTypeSize(type); + info.alignment_bits = context.getTypeAlign(type); + + // NOTE: Non-enum types are intentionally not canonicalized since that would turn e.g. size_t into platform-specific types + auto [_, inserted] = layout.insert(std::pair {type->isEnumeralType() ? context.getCanonicalType(type) : type, info}); + if (!inserted) { + throw std::runtime_error("Failed to gather type metadata: Type \"" + clang::QualType {type, 0}.getAsString() + "\" already registered"); + } + } + } - auto [_, inserted] = layout.insert(std::pair { context.getCanonicalType(type), info }); - if (!inserted) { - throw std::runtime_error("Failed to gather type metadata: Type \"" + clang::QualType { type, 0 }.getAsString() + "\" already registered"); - } - } else if (type->isBuiltinType() || type->isEnumeralType()) { - SimpleTypeInfo info; - info.size_bits = context.getTypeSize(type); - info.alignment_bits = context.getTypeAlign(type); - - // NOTE: Non-enum types are intentionally not canonicalized since that would turn e.g. size_t into platform-specific types - auto [_, inserted] = layout.insert(std::pair { type->isEnumeralType() ? context.getCanonicalType(type) : type, info }); - if (!inserted) { - throw std::runtime_error("Failed to gather type metadata: Type \"" + clang::QualType { type, 0 }.getAsString() + "\" already registered"); - } - } + // Then, add information about members + for (const auto& [type, type_repack_info] : types) { + if (!type->isStructureType() || type_repack_info.assumed_compatible) { + continue; } - // Then, add information about members - for (const auto& [type, type_repack_info] : types) { - if (!type->isStructureType() || type_repack_info.assumed_compatible) { - continue; + auto& info = *layout.at(context.getCanonicalType(type)).get_if_struct(); + + for (auto* field : type->getAsStructureType()->getDecl()->fields()) { + auto field_type = field->getType().getTypePtr(); + std::optional array_size; + if (auto array_type = llvm::dyn_cast(field->getType())) { + array_size = array_type->getSize().getZExtValue(); + field_type = array_type->getElementType().getTypePtr(); + if (llvm::isa(field_type)) { + throw std::runtime_error("Unsupported multi-dimensional array member \"" + field->getNameAsString() + "\" in type \"" + + clang::QualType {type, 0}.getAsString() + "\""); } - - auto& info = *layout.at(context.getCanonicalType(type)).get_if_struct(); - - for (auto* field : type->getAsStructureType()->getDecl()->fields()) { - auto field_type = field->getType().getTypePtr(); - std::optional array_size; - if (auto array_type = llvm::dyn_cast(field->getType())) { - array_size = array_type->getSize().getZExtValue(); - field_type = array_type->getElementType().getTypePtr(); - if (llvm::isa(field_type)) { - throw std::runtime_error("Unsupported multi-dimensional array member \"" + field->getNameAsString() + "\" in type \"" + clang::QualType { type, 0 }.getAsString() + "\""); - } - } - - StructInfo::MemberInfo member_info { - .size_bits = context.getTypeSize(field->getType()), // Total size even for arrays - .offset_bits = context.getFieldOffset(field), - .type_name = get_type_name(context, field_type), - .member_name = field->getNameAsString(), - .array_size = array_size, - .is_function_pointer = field_type->isFunctionPointerType(), - .is_integral = field->getType()->isIntegerType(), - .is_signed_integer = field->getType()->isSignedIntegerType(), - }; - - // TODO: Process types in dependency-order. Currently we skip this - // check if we haven't processed the member type already, - // which is only safe since this is a consistency check - if (field_type->isStructureType() && layout.contains(context.getCanonicalType(field_type))) { - // Assert for self-consistency - auto field_meta = layout.at(context.getCanonicalType(field_type)); - (void)types.at(context.getCanonicalType(field_type)); - if (auto field_info = field_meta.get_if_simple_or_struct()) { - if (field_info->size_bits != member_info.size_bits / member_info.array_size.value_or(1)) { - throw std::runtime_error("Inconsistent type size detected"); - } - } - } - - // Add built-in types, even if referenced through a pointer - for (auto* inner_field_type = field_type; inner_field_type; inner_field_type = inner_field_type->getPointeeType().getTypePtrOrNull()) { - if (inner_field_type->isBuiltinType() || inner_field_type->isEnumeralType()) { - // The analysis pass doesn't explicitly register built-in types, so add them manually here - SimpleTypeInfo info { - .size_bits = context.getTypeSize(inner_field_type), - .alignment_bits = context.getTypeAlign(inner_field_type), - }; - if (!inner_field_type->isBuiltinType()) { - inner_field_type = context.getCanonicalType(inner_field_type); - } - [[maybe_unused]] auto [prev, inserted] = layout.insert(std::pair { inner_field_type, info }); -// if (!inserted && prev->second != TypeInfo { info }) { -// // TODO: Throw error since consistency check failed -// } - } - } - - info.members.push_back(member_info); + } + + StructInfo::MemberInfo member_info { + .size_bits = context.getTypeSize(field->getType()), // Total size even for arrays + .offset_bits = context.getFieldOffset(field), + .type_name = get_type_name(context, field_type), + .member_name = field->getNameAsString(), + .array_size = array_size, + .is_function_pointer = field_type->isFunctionPointerType(), + .is_integral = field->getType()->isIntegerType(), + .is_signed_integer = field->getType()->isSignedIntegerType(), + }; + + // TODO: Process types in dependency-order. Currently we skip this + // check if we haven't processed the member type already, + // which is only safe since this is a consistency check + if (field_type->isStructureType() && layout.contains(context.getCanonicalType(field_type))) { + // Assert for self-consistency + auto field_meta = layout.at(context.getCanonicalType(field_type)); + (void)types.at(context.getCanonicalType(field_type)); + if (auto field_info = field_meta.get_if_simple_or_struct()) { + if (field_info->size_bits != member_info.size_bits / member_info.array_size.value_or(1)) { + throw std::runtime_error("Inconsistent type size detected"); + } } - } - - if (enable_debug_output) { - for (const auto& [type, info] : layout) { - auto basic_info = info.get_if_simple_or_struct(); - if (!basic_info) { - continue; - } - - fprintf(stderr, " Host entry %s: %lu (%lu)\n", clang::QualType { type, 0 }.getAsString().c_str(), basic_info->size_bits / 8, basic_info->alignment_bits / 8); + } + + // Add built-in types, even if referenced through a pointer + for (auto* inner_field_type = field_type; inner_field_type; inner_field_type = inner_field_type->getPointeeType().getTypePtrOrNull()) { + if (inner_field_type->isBuiltinType() || inner_field_type->isEnumeralType()) { + // The analysis pass doesn't explicitly register built-in types, so add them manually here + SimpleTypeInfo info { + .size_bits = context.getTypeSize(inner_field_type), + .alignment_bits = context.getTypeAlign(inner_field_type), + }; + if (!inner_field_type->isBuiltinType()) { + inner_field_type = context.getCanonicalType(inner_field_type); + } + [[maybe_unused]] auto [prev, inserted] = layout.insert(std::pair {inner_field_type, info}); + // if (!inserted && prev->second != TypeInfo { info }) { + // // TODO: Throw error since consistency check failed + // } + } + } - if (auto struct_info = info.get_if_struct()) { - for (const auto& member : struct_info->members) { - fprintf(stderr, " Offset %lu-%lu: %s %s%s\n", member.offset_bits / 8, (member.offset_bits + member.size_bits - 1) / 8, member.type_name.c_str(), member.member_name.c_str(), member.array_size ? fmt::format("[{}]", member.array_size.value()).c_str() : ""); - } - } + info.members.push_back(member_info); + } + } + + if (enable_debug_output) { + for (const auto& [type, info] : layout) { + auto basic_info = info.get_if_simple_or_struct(); + if (!basic_info) { + continue; + } + + fprintf(stderr, " Host entry %s: %lu (%lu)\n", clang::QualType {type, 0}.getAsString().c_str(), basic_info->size_bits / 8, + basic_info->alignment_bits / 8); + + if (auto struct_info = info.get_if_struct()) { + for (const auto& member : struct_info->members) { + fprintf(stderr, " Offset %lu-%lu: %s %s%s\n", member.offset_bits / 8, (member.offset_bits + member.size_bits - 1) / 8, + member.type_name.c_str(), member.member_name.c_str(), + member.array_size ? fmt::format("[{}]", member.array_size.value()).c_str() : ""); } + } } + } - return layout; + return layout; } ABI GetStableLayout(const clang::ASTContext& context, const std::unordered_map& data_layout) { - ABI stable_layout; - - for (auto [type, type_info] : data_layout) { - auto type_name = get_type_name(context, type); - if (auto struct_info = type_info.get_if_struct()) { - for (auto& member : struct_info->members) { - if (member.is_integral) { - // Map member types to fixed-size integers - auto alt_type_name = get_fixed_size_int_name(member.is_signed_integer, member.size_bits); - auto alt_type_info = SimpleTypeInfo { - .size_bits = member.size_bits, - .alignment_bits = context.getTypeAlign(context.getIntTypeForBitwidth(member.size_bits, member.is_signed_integer)), - }; - stable_layout.insert(std::pair { alt_type_name, alt_type_info }); - member.type_name = std::move(alt_type_name); - } - } - } + ABI stable_layout; - auto [it, inserted] = stable_layout.insert(std::pair { type_name, std::move(type_info) }); - if (type->isIntegerType()) { - auto alt_type_name = get_fixed_size_int_name(type, context); - stable_layout.insert(std::pair { std::move(alt_type_name), type_info }); + for (auto [type, type_info] : data_layout) { + auto type_name = get_type_name(context, type); + if (auto struct_info = type_info.get_if_struct()) { + for (auto& member : struct_info->members) { + if (member.is_integral) { + // Map member types to fixed-size integers + auto alt_type_name = get_fixed_size_int_name(member.is_signed_integer, member.size_bits); + auto alt_type_info = SimpleTypeInfo { + .size_bits = member.size_bits, + .alignment_bits = context.getTypeAlign(context.getIntTypeForBitwidth(member.size_bits, member.is_signed_integer)), + }; + stable_layout.insert(std::pair {alt_type_name, alt_type_info}); + member.type_name = std::move(alt_type_name); } + } + } - if (!inserted && it->second != type_info && !type->isIntegerType()) { - throw std::runtime_error("Duplicate type information: Tried to re-register type \"" + type_name + "\""); - } + auto [it, inserted] = stable_layout.insert(std::pair {type_name, std::move(type_info)}); + if (type->isIntegerType()) { + auto alt_type_name = get_fixed_size_int_name(type, context); + stable_layout.insert(std::pair {std::move(alt_type_name), type_info}); } - stable_layout.pointer_size = context.getTypeSize(context.getUIntPtrType()) / 8; + if (!inserted && it->second != type_info && !type->isIntegerType()) { + throw std::runtime_error("Duplicate type information: Tried to re-register type \"" + type_name + "\""); + } + } - return stable_layout; + stable_layout.pointer_size = context.getTypeSize(context.getUIntPtrType()) / 8; + + return stable_layout; } static std::array GetSha256(const std::string& function_name) { - std::array sha256; - SHA256(reinterpret_cast(function_name.data()), - function_name.size(), - sha256.data()); - return sha256; + std::array sha256; + SHA256(reinterpret_cast(function_name.data()), function_name.size(), sha256.data()); + return sha256; }; std::string GetTypeNameWithFixedSizeIntegers(clang::ASTContext& context, clang::QualType type) { - if (type->isBuiltinType()) { - auto size = context.getTypeSize(type); - return fmt::format("uint{}_t", size); - } else if (type->isPointerType() && type->getPointeeType()->isBuiltinType() && context.getTypeSize(type->getPointeeType()) > 8) { - // TODO: Also apply this path to char-like types - auto size = context.getTypeSize(type->getPointeeType()); - return fmt::format("uint{}_t*", size); - } else { - return type.getAsString(); - } + if (type->isBuiltinType()) { + auto size = context.getTypeSize(type); + return fmt::format("uint{}_t", size); + } else if (type->isPointerType() && type->getPointeeType()->isBuiltinType() && context.getTypeSize(type->getPointeeType()) > 8) { + // TODO: Also apply this path to char-like types + auto size = context.getTypeSize(type->getPointeeType()); + return fmt::format("uint{}_t*", size); + } else { + return type.getAsString(); + } } void AnalyzeDataLayoutAction::OnAnalysisComplete(clang::ASTContext& context) { - type_abi = GetStableLayout(context, ComputeDataLayout(context, types)); - - // Register functions that must be guest-callable through host function pointers - for (auto funcptr_type_it = thunked_funcptrs.begin(); funcptr_type_it != thunked_funcptrs.end(); ++funcptr_type_it) { - auto& funcptr_id = funcptr_type_it->first; - auto& [type, param_annotations] = funcptr_type_it->second; - auto func_type = type->getAs(); - std::string mangled_name = clang::QualType { type, 0 }.getAsString(); - auto cb_sha256 = GetSha256("fexcallback_" + mangled_name); - FuncPtrInfo info = { cb_sha256 }; - - // TODO: Also apply GetTypeNameWithFixedSizeIntegers here - info.result = func_type->getReturnType().getAsString(); - - for (auto arg : func_type->getParamTypes()) { - info.args.push_back(GetTypeNameWithFixedSizeIntegers(context, arg)); - } - type_abi.thunked_funcptrs[funcptr_id] = std::move(info); + type_abi = GetStableLayout(context, ComputeDataLayout(context, types)); + + // Register functions that must be guest-callable through host function pointers + for (auto funcptr_type_it = thunked_funcptrs.begin(); funcptr_type_it != thunked_funcptrs.end(); ++funcptr_type_it) { + auto& funcptr_id = funcptr_type_it->first; + auto& [type, param_annotations] = funcptr_type_it->second; + auto func_type = type->getAs(); + std::string mangled_name = clang::QualType {type, 0}.getAsString(); + auto cb_sha256 = GetSha256("fexcallback_" + mangled_name); + FuncPtrInfo info = {cb_sha256}; + + // TODO: Also apply GetTypeNameWithFixedSizeIntegers here + info.result = func_type->getReturnType().getAsString(); + + for (auto arg : func_type->getParamTypes()) { + info.args.push_back(GetTypeNameWithFixedSizeIntegers(context, arg)); } + type_abi.thunked_funcptrs[funcptr_id] = std::move(info); + } } -TypeCompatibility DataLayoutCompareAction::GetTypeCompatibility( - const clang::ASTContext& context, - const clang::Type* type, - const std::unordered_map host_abi, - std::unordered_map& type_compat) { - assert(type->isCanonicalUnqualified() || type->isBuiltinType() || type->isEnumeralType()); - - { - // Reserve a slot to be filled later. The placeholder value is used - // to detect infinite recursions. - constexpr auto placeholder_compat = TypeCompatibility { 100 }; - auto [existing_compat_it, is_new_type] = type_compat.emplace(type, placeholder_compat); - if (!is_new_type) { - if (existing_compat_it->second == placeholder_compat) { - throw std::runtime_error("Found recursive reference to type \"" + clang::QualType { type, 0 }.getAsString() + "\""); - } - - return existing_compat_it->second; - } - } - - if (types.contains(type) && types.at(type).assumed_compatible) { - if (types.at(type).pointers_only && !type->isPointerType()) { - throw std::runtime_error("Tried to dereference opaque type \"" + clang::QualType { type, 0 }.getAsString() + "\" when querying data layout compatibility"); - } - type_compat.at(type) = TypeCompatibility::Full; - return TypeCompatibility::Full; +TypeCompatibility DataLayoutCompareAction::GetTypeCompatibility(const clang::ASTContext& context, const clang::Type* type, + const std::unordered_map host_abi, + std::unordered_map& type_compat) { + assert(type->isCanonicalUnqualified() || type->isBuiltinType() || type->isEnumeralType()); + + { + // Reserve a slot to be filled later. The placeholder value is used + // to detect infinite recursions. + constexpr auto placeholder_compat = TypeCompatibility {100}; + auto [existing_compat_it, is_new_type] = type_compat.emplace(type, placeholder_compat); + if (!is_new_type) { + if (existing_compat_it->second == placeholder_compat) { + throw std::runtime_error("Found recursive reference to type \"" + clang::QualType {type, 0}.getAsString() + "\""); + } + + return existing_compat_it->second; } + } - auto type_name = get_type_name(context, type); - // Look up the same type name in the guest map, - // unless it's an integer (which is mapped to fixed-size uintX_t types) - auto guest_info = guest_abi.at(!type->isIntegerType() ? type_name : get_fixed_size_int_name(type, context)); - auto& host_info = host_abi.at(type->isBuiltinType() ? type : context.getCanonicalType(type)); - - const bool is_32bit = (guest_abi.pointer_size == 4); - - // Assume full compatibility, then downgrade as needed - auto compat = TypeCompatibility::Full; - - if (guest_info != host_info) { - // Non-matching data layout... downgrade to Repackable - // TODO: Even for non-structs, this only works if the types are reasonably similar (e.g. uint32_t -> uint64_t) - compat = TypeCompatibility::Repackable; + if (types.contains(type) && types.at(type).assumed_compatible) { + if (types.at(type).pointers_only && !type->isPointerType()) { + throw std::runtime_error( + "Tried to dereference opaque type \"" + clang::QualType {type, 0}.getAsString() + "\" when querying data layout compatibility"); } - - auto guest_struct_info = guest_info.get_if_struct(); - if (guest_struct_info && guest_struct_info->members.size() != host_info.get_if_struct()->members.size()) { - // Members are missing from either the guest or host layout - // NOTE: If the members are merely named differently, this will be caught in the else-if below - compat = TypeCompatibility::None; - } else if (guest_struct_info) { - std::vector member_compat; - for (std::size_t member_idx = 0; member_idx < guest_struct_info->members.size(); ++member_idx) { - // Look up the corresponding member in the host struct definition. - // The members may be listed in a different order, so we can't - // directly use member_idx for this - auto* host_member_field = [&]() -> clang::FieldDecl* { - auto struct_decl = type->getAsStructureType()->getDecl(); - auto it = std::find_if(struct_decl->field_begin(), struct_decl->field_end(), [&](auto* field) { - return field->getName() == guest_struct_info->members.at(member_idx).member_name; - }); - if (it == struct_decl->field_end()) { - return nullptr; - } - return *it; - }(); - if (!host_member_field) { - // No corresponding host struct member - // TODO: Also detect host members that are missing from the guest struct - member_compat.push_back(TypeCompatibility::None); - break; - } - - auto host_member_type = context.getCanonicalType(host_member_field->getType().getTypePtr()); - if (auto array_type = llvm::dyn_cast(host_member_type)) { - // Compare array element type only. The array size is already considered by the layout information of the containing struct. - host_member_type = context.getCanonicalType(array_type->getElementType().getTypePtr()); - } - - if (host_member_type->isPointerType()) { - // Automatic repacking of pointers to non-compatible types is only possible if: - // * Pointee is fully compatible, or - // * Pointer member is annotated - // TODO: Don't restrict this to structure types. it applies to pointers to builtin types too! - auto host_member_pointee_type = context.getCanonicalType(host_member_type->getPointeeType().getTypePtr()); - if (types.at(type).UsesCustomRepackFor(host_member_field)) { - member_compat.push_back(TypeCompatibility::Repackable); - } else if (types.contains(host_member_pointee_type) && types.at(host_member_pointee_type).assumed_compatible) { - // Pointee doesn't need repacking, but pointer needs extending on 32-bit - member_compat.push_back(is_32bit ? TypeCompatibility::Repackable : TypeCompatibility::Full); - } else if (host_member_pointee_type->isPointerType()) { - // This is a nested pointer, e.g. void** - - if (is_32bit) { - // Nested pointers can't be repacked on 32-bit - member_compat.push_back(TypeCompatibility::None); - } else if (types.contains(host_member_pointee_type->getPointeeType().getTypePtr()) && types.at(host_member_pointee_type->getPointeeType().getTypePtr()).assumed_compatible) { - // Pointers to opaque types are fine - member_compat.push_back(TypeCompatibility::Full); - } else { - // Check the innermost type's compatibility on 64-bit - auto pointee_pointee_type = host_member_pointee_type->getPointeeType().getTypePtr(); - // TODO: Not sure how to handle void here. Probably should require an annotation instead of "just working" - auto pointee_pointee_compat = pointee_pointee_type->isVoidType() ? TypeCompatibility::Full : GetTypeCompatibility(context, pointee_pointee_type, host_abi, type_compat); - if (pointee_pointee_compat == TypeCompatibility::Full) { - member_compat.push_back(TypeCompatibility::Full); - } else { - member_compat.push_back(TypeCompatibility::None); - } - } - } else if (!host_member_pointee_type->isVoidType() && (host_member_pointee_type->isBuiltinType() || host_member_pointee_type->isEnumeralType())) { - // TODO: What are good heuristics for this? - // size_t should yield TypeCompatibility::Repackable - // inconsistent types should probably default to TypeCompatibility::None - // For now, just always assume compatible... (will degrade to Repackable below) - member_compat.push_back(TypeCompatibility::Full); - } else if (!host_member_pointee_type->isVoidType() && (host_member_pointee_type->isStructureType() || types.contains(host_member_pointee_type))) { - auto pointee_compat = GetTypeCompatibility(context, host_member_pointee_type, host_abi, type_compat); - if (pointee_compat == TypeCompatibility::Full) { - // Pointee is fully compatible, so automatic repacking only requires converting the pointers themselves - member_compat.push_back(is_32bit ? TypeCompatibility::Repackable : TypeCompatibility::Full); - } else { - // If the pointee is incompatible (even if repackable), automatic repacking isn't possible - member_compat.push_back(TypeCompatibility::None); - } - } else if (!is_32bit && host_member_pointee_type->isVoidType()) { - // TODO: Not sure how to handle void here. Probably should require an annotation instead of "just working" - member_compat.push_back(TypeCompatibility::Full); - } else { - member_compat.push_back(TypeCompatibility::None); - } - continue; - } - - if (guest_abi.at(guest_struct_info->members[member_idx].type_name).get_if_struct()) { - auto host_type_info = host_abi.at(host_member_type); - member_compat.push_back(GetTypeCompatibility(context, host_member_type, host_abi, type_compat)); + type_compat.at(type) = TypeCompatibility::Full; + return TypeCompatibility::Full; + } + + auto type_name = get_type_name(context, type); + // Look up the same type name in the guest map, + // unless it's an integer (which is mapped to fixed-size uintX_t types) + auto guest_info = guest_abi.at(!type->isIntegerType() ? type_name : get_fixed_size_int_name(type, context)); + auto& host_info = host_abi.at(type->isBuiltinType() ? type : context.getCanonicalType(type)); + + const bool is_32bit = (guest_abi.pointer_size == 4); + + // Assume full compatibility, then downgrade as needed + auto compat = TypeCompatibility::Full; + + if (guest_info != host_info) { + // Non-matching data layout... downgrade to Repackable + // TODO: Even for non-structs, this only works if the types are reasonably similar (e.g. uint32_t -> uint64_t) + compat = TypeCompatibility::Repackable; + } + + auto guest_struct_info = guest_info.get_if_struct(); + if (guest_struct_info && guest_struct_info->members.size() != host_info.get_if_struct()->members.size()) { + // Members are missing from either the guest or host layout + // NOTE: If the members are merely named differently, this will be caught in the else-if below + compat = TypeCompatibility::None; + } else if (guest_struct_info) { + std::vector member_compat; + for (std::size_t member_idx = 0; member_idx < guest_struct_info->members.size(); ++member_idx) { + // Look up the corresponding member in the host struct definition. + // The members may be listed in a different order, so we can't + // directly use member_idx for this + auto* host_member_field = [&]() -> clang::FieldDecl* { + auto struct_decl = type->getAsStructureType()->getDecl(); + auto it = std::find_if(struct_decl->field_begin(), struct_decl->field_end(), + [&](auto* field) { return field->getName() == guest_struct_info->members.at(member_idx).member_name; }); + if (it == struct_decl->field_end()) { + return nullptr; + } + return *it; + }(); + if (!host_member_field) { + // No corresponding host struct member + // TODO: Also detect host members that are missing from the guest struct + member_compat.push_back(TypeCompatibility::None); + break; + } + + auto host_member_type = context.getCanonicalType(host_member_field->getType().getTypePtr()); + if (auto array_type = llvm::dyn_cast(host_member_type)) { + // Compare array element type only. The array size is already considered by the layout information of the containing struct. + host_member_type = context.getCanonicalType(array_type->getElementType().getTypePtr()); + } + + if (host_member_type->isPointerType()) { + // Automatic repacking of pointers to non-compatible types is only possible if: + // * Pointee is fully compatible, or + // * Pointer member is annotated + // TODO: Don't restrict this to structure types. it applies to pointers to builtin types too! + auto host_member_pointee_type = context.getCanonicalType(host_member_type->getPointeeType().getTypePtr()); + if (types.at(type).UsesCustomRepackFor(host_member_field)) { + member_compat.push_back(TypeCompatibility::Repackable); + } else if (types.contains(host_member_pointee_type) && types.at(host_member_pointee_type).assumed_compatible) { + // Pointee doesn't need repacking, but pointer needs extending on 32-bit + member_compat.push_back(is_32bit ? TypeCompatibility::Repackable : TypeCompatibility::Full); + } else if (host_member_pointee_type->isPointerType()) { + // This is a nested pointer, e.g. void** + + if (is_32bit) { + // Nested pointers can't be repacked on 32-bit + member_compat.push_back(TypeCompatibility::None); + } else if (types.contains(host_member_pointee_type->getPointeeType().getTypePtr()) && + types.at(host_member_pointee_type->getPointeeType().getTypePtr()).assumed_compatible) { + // Pointers to opaque types are fine + member_compat.push_back(TypeCompatibility::Full); + } else { + // Check the innermost type's compatibility on 64-bit + auto pointee_pointee_type = host_member_pointee_type->getPointeeType().getTypePtr(); + // TODO: Not sure how to handle void here. Probably should require an annotation instead of "just working" + auto pointee_pointee_compat = pointee_pointee_type->isVoidType() ? + TypeCompatibility::Full : + GetTypeCompatibility(context, pointee_pointee_type, host_abi, type_compat); + if (pointee_pointee_compat == TypeCompatibility::Full) { + member_compat.push_back(TypeCompatibility::Full); } else { - // Member was checked for size/alignment above already + member_compat.push_back(TypeCompatibility::None); } - } - - if (std::all_of(member_compat.begin(), member_compat.end(), [](auto compat) { return compat == TypeCompatibility::Full; })) { - // TypeCompatibility::Full or ::Repackable - } else if (std::none_of(member_compat.begin(), member_compat.end(), [](auto compat) { return compat == TypeCompatibility::None; })) { - // Downgrade to Repackable - compat = TypeCompatibility::Repackable; + } + } else if (!host_member_pointee_type->isVoidType() && + (host_member_pointee_type->isBuiltinType() || host_member_pointee_type->isEnumeralType())) { + // TODO: What are good heuristics for this? + // size_t should yield TypeCompatibility::Repackable + // inconsistent types should probably default to TypeCompatibility::None + // For now, just always assume compatible... (will degrade to Repackable below) + member_compat.push_back(TypeCompatibility::Full); + } else if (!host_member_pointee_type->isVoidType() && + (host_member_pointee_type->isStructureType() || types.contains(host_member_pointee_type))) { + auto pointee_compat = GetTypeCompatibility(context, host_member_pointee_type, host_abi, type_compat); + if (pointee_compat == TypeCompatibility::Full) { + // Pointee is fully compatible, so automatic repacking only requires converting the pointers themselves + member_compat.push_back(is_32bit ? TypeCompatibility::Repackable : TypeCompatibility::Full); + } else { + // If the pointee is incompatible (even if repackable), automatic repacking isn't possible + member_compat.push_back(TypeCompatibility::None); + } + } else if (!is_32bit && host_member_pointee_type->isVoidType()) { + // TODO: Not sure how to handle void here. Probably should require an annotation instead of "just working" + member_compat.push_back(TypeCompatibility::Full); } else { - // Downgrade to None - compat = TypeCompatibility::None; + member_compat.push_back(TypeCompatibility::None); } + continue; + } + + if (guest_abi.at(guest_struct_info->members[member_idx].type_name).get_if_struct()) { + auto host_type_info = host_abi.at(host_member_type); + member_compat.push_back(GetTypeCompatibility(context, host_member_type, host_abi, type_compat)); + } else { + // Member was checked for size/alignment above already + } } - type_compat.at(type) = compat; - return compat; + if (std::all_of(member_compat.begin(), member_compat.end(), [](auto compat) { return compat == TypeCompatibility::Full; })) { + // TypeCompatibility::Full or ::Repackable + } else if (std::none_of(member_compat.begin(), member_compat.end(), [](auto compat) { return compat == TypeCompatibility::None; })) { + // Downgrade to Repackable + compat = TypeCompatibility::Repackable; + } else { + // Downgrade to None + compat = TypeCompatibility::None; + } + } + + type_compat.at(type) = compat; + return compat; } FuncPtrInfo DataLayoutCompareAction::LookupGuestFuncPtrInfo(const char* funcptr_id) { - return guest_abi.thunked_funcptrs.at(funcptr_id); + return guest_abi.thunked_funcptrs.at(funcptr_id); } -DataLayoutCompareActionFactory::DataLayoutCompareActionFactory(const ABI& abi) : abi(abi) { - -} +DataLayoutCompareActionFactory::DataLayoutCompareActionFactory(const ABI& abi) + : abi(abi) {} DataLayoutCompareActionFactory::~DataLayoutCompareActionFactory() = default; std::unique_ptr DataLayoutCompareActionFactory::create() { - return std::make_unique(abi); + return std::make_unique(abi); } -AnalyzeDataLayoutActionFactory::AnalyzeDataLayoutActionFactory() : abi(std::make_unique()) { - -} +AnalyzeDataLayoutActionFactory::AnalyzeDataLayoutActionFactory() + : abi(std::make_unique()) {} AnalyzeDataLayoutActionFactory::~AnalyzeDataLayoutActionFactory() = default; std::unique_ptr AnalyzeDataLayoutActionFactory::create() { - return std::make_unique(*abi); + return std::make_unique(*abi); } diff --git a/ThunkLibs/Generator/gen.cpp b/ThunkLibs/Generator/gen.cpp index d15cfc03f8..22a9b75e7e 100644 --- a/ThunkLibs/Generator/gen.cpp +++ b/ThunkLibs/Generator/gen.cpp @@ -18,38 +18,41 @@ class GenerateThunkLibsAction : public DataLayoutCompareAction { public: - GenerateThunkLibsAction(const std::string& libname, const OutputFilenames&, const ABI& abi); + GenerateThunkLibsAction(const std::string& libname, const OutputFilenames&, const ABI& abi); private: - // Generate helper code for thunk libraries and write them to the output file - void OnAnalysisComplete(clang::ASTContext&) override; + // Generate helper code for thunk libraries and write them to the output file + void OnAnalysisComplete(clang::ASTContext&) override; - // Emit guest_layout/host_layout wrappers for types passed across architecture boundaries - void EmitLayoutWrappers(clang::ASTContext&, std::ofstream&, std::unordered_map& type_compat); + // Emit guest_layout/host_layout wrappers for types passed across architecture boundaries + void EmitLayoutWrappers(clang::ASTContext&, std::ofstream&, std::unordered_map& type_compat); - const std::string& libfilename; - std::string libname; // sanitized filename, usable as part of emitted function names - const OutputFilenames& output_filenames; + const std::string& libfilename; + std::string libname; // sanitized filename, usable as part of emitted function names + const OutputFilenames& output_filenames; }; GenerateThunkLibsAction::GenerateThunkLibsAction(const std::string& libname_, const OutputFilenames& output_filenames_, const ABI& abi) - : DataLayoutCompareAction(abi), libfilename(libname_), libname(libname_), output_filenames(output_filenames_) { - for (auto& c : libname) { - if (c == '-') { - c = '_'; - } + : DataLayoutCompareAction(abi) + , libfilename(libname_) + , libname(libname_) + , output_filenames(output_filenames_) { + for (auto& c : libname) { + if (c == '-') { + c = '_'; } + } } template static std::string format_function_args(const FunctionParams& params, Fn&& format_arg) { - std::string ret; - for (std::size_t idx = 0; idx < params.param_types.size(); ++idx) { - ret += std::forward(format_arg)(idx) + ", "; - } - // drop trailing ", " - ret.resize(ret.size() > 2 ? ret.size() - 2 : 0); - return ret; + std::string ret; + for (std::size_t idx = 0; idx < params.param_types.size(); ++idx) { + ret += std::forward(format_arg)(idx) + ", "; + } + // drop trailing ", " + ret.resize(ret.size() > 2 ? ret.size() - 2 : 0); + return ret; }; // Custom sort algorithm that works with partial orders. @@ -62,737 +65,739 @@ static std::string format_function_args(const FunctionParams& params, Fn&& forma // order of A and B undetermined. In effect when iterating over the sorted // range, each dependency is visited before any of its dependees. template -void BubbleSort(It begin, It end, - std::relation, std::iter_value_t> auto compare) { - bool fixpoint; - do { - fixpoint = true; - for (auto it = begin; it != end; ++it) { - for (auto it2 = std::next(it); it2 != end; ++it2) { - if (compare(*it2, *it)) { - std::swap(*it, *it2); - fixpoint = false; - it2 = it; - } - } +void BubbleSort(It begin, It end, std::relation, std::iter_value_t> auto compare) { + bool fixpoint; + do { + fixpoint = true; + for (auto it = begin; it != end; ++it) { + for (auto it2 = std::next(it); it2 != end; ++it2) { + if (compare(*it2, *it)) { + std::swap(*it, *it2); + fixpoint = false; + it2 = it; } - } while (!fixpoint); + } + } + } while (!fixpoint); } // Compares such that A < B if B contains A as a member and requires A to be completely defined (i.e. non-pointer/non-reference). // This applies recursively to structs contained by B. struct compare_by_struct_dependency { - clang::ASTContext& context; - - bool operator()(const std::pair& a, - const std::pair& b) const { - return (*this)(a.first, b.first); - } + clang::ASTContext& context; - bool operator()(const clang::Type* a, const clang::Type* b) const { - if (llvm::isa(b)) { - throw std::runtime_error("Cannot have \"b\" be an array"); - } + bool operator()(const std::pair& a, + const std::pair& b) const { + return (*this)(a.first, b.first); + } - auto* b_as_struct = b->getAsStructureType(); - if (!b_as_struct) { - // Not a struct => no dependency - return false; - } + bool operator()(const clang::Type* a, const clang::Type* b) const { + if (llvm::isa(b)) { + throw std::runtime_error("Cannot have \"b\" be an array"); + } - if (a->isArrayType()) { - throw std::runtime_error("Cannot have \"a\" be an array"); - } + auto* b_as_struct = b->getAsStructureType(); + if (!b_as_struct) { + // Not a struct => no dependency + return false; + } - for (auto* child : b_as_struct->getDecl()->fields()) { - auto child_type = child->getType().getTypePtr(); + if (a->isArrayType()) { + throw std::runtime_error("Cannot have \"a\" be an array"); + } - if (child_type->isPointerType()) { - // Pointers don't need the definition to be available - continue; - } + for (auto* child : b_as_struct->getDecl()->fields()) { + auto child_type = child->getType().getTypePtr(); - // Peel off any array type layers from the member - while (auto child_as_array = llvm::dyn_cast(child_type)) { - child_type = child_as_array->getArrayElementTypeNoTypeQual(); - } + if (child_type->isPointerType()) { + // Pointers don't need the definition to be available + continue; + } - if (context.hasSameType(a, child_type)) { - return true; - } + // Peel off any array type layers from the member + while (auto child_as_array = llvm::dyn_cast(child_type)) { + child_type = child_as_array->getArrayElementTypeNoTypeQual(); + } - if ((*this)(a, child_type)) { - // Child depends on A => transitive dependency - return true; - } - } + if (context.hasSameType(a, child_type)) { + return true; + } - // No dependency found - return false; + if ((*this)(a, child_type)) { + // Child depends on A => transitive dependency + return true; + } } -}; -void GenerateThunkLibsAction::EmitLayoutWrappers( - clang::ASTContext& context, std::ofstream& file, - std::unordered_map& type_compat) { - // Sort struct types by dependency so that repacking code is emitted in an order that compiles fine - std::vector> types { this->types.begin(), this->types.end() }; - BubbleSort(types.begin(), types.end(), compare_by_struct_dependency { context }); + // No dependency found + return false; + } +}; - for (const auto& [type, type_repack_info] : types) { - auto struct_name = get_type_name(context, type); +void GenerateThunkLibsAction::EmitLayoutWrappers(clang::ASTContext& context, std::ofstream& file, + std::unordered_map& type_compat) { + // Sort struct types by dependency so that repacking code is emitted in an order that compiles fine + std::vector> types {this->types.begin(), this->types.end()}; + BubbleSort(types.begin(), types.end(), compare_by_struct_dependency {context}); + + for (const auto& [type, type_repack_info] : types) { + auto struct_name = get_type_name(context, type); + + // Opaque types don't need layout definitions + if (type_repack_info.assumed_compatible && type_repack_info.pointers_only) { + if (guest_abi.pointer_size != 4) { + fmt::print(file, "template<> inline constexpr bool has_compatible_data_layout<{}*> = true;\n", struct_name); + } + continue; + } else if (type_repack_info.assumed_compatible) { + // TODO: Handle more cleanly + type_compat[type] = TypeCompatibility::Full; + } - // Opaque types don't need layout definitions - if (type_repack_info.assumed_compatible && type_repack_info.pointers_only) { - if (guest_abi.pointer_size != 4) { - fmt::print(file, "template<> inline constexpr bool has_compatible_data_layout<{}*> = true;\n", struct_name); - } - continue; - } else if (type_repack_info.assumed_compatible) { - // TODO: Handle more cleanly - type_compat[type] = TypeCompatibility::Full; - } + // These must be handled later since they are not canonicalized and hence must be de-duplicated first + if (type->isBuiltinType()) { + continue; + } - // These must be handled later since they are not canonicalized and hence must be de-duplicated first - if (type->isBuiltinType()) { - continue; - } + // TODO: Instead, map these names back to *some* type that's named? + if (struct_name.starts_with("unnamed_")) { + continue; + } - // TODO: Instead, map these names back to *some* type that's named? - if (struct_name.starts_with("unnamed_")) { - continue; - } + if (type->isEnumeralType()) { + fmt::print(file, "template<>\nstruct __attribute__((packed)) guest_layout<{}> {{\n", struct_name); + fmt::print(file, " using type = {}int{}_t;\n", type->isUnsignedIntegerOrEnumerationType() ? "u" : "", + guest_abi.at(struct_name).get_if_simple_or_struct()->size_bits); + fmt::print(file, " type data;\n"); + fmt::print(file, "}};\n"); + continue; + } - if (type->isEnumeralType()) { - fmt::print(file, "template<>\nstruct __attribute__((packed)) guest_layout<{}> {{\n", struct_name); - fmt::print(file, " using type = {}int{}_t;\n", - type->isUnsignedIntegerOrEnumerationType() ? "u" : "", - guest_abi.at(struct_name).get_if_simple_or_struct()->size_bits); - fmt::print(file, " type data;\n"); - fmt::print(file, "}};\n"); - continue; - } + if (type_compat.at(type) == TypeCompatibility::None && !type_repack_info.emit_layout_wrappers) { + // Disallow use of layout wrappers for this type by specializing without a definition + fmt::print(file, "template<>\nstruct guest_layout<{}>;\n", struct_name); + fmt::print(file, "template<>\nstruct host_layout<{}>;\n", struct_name); + fmt::print(file, "guest_layout<{}>& to_guest(const host_layout<{}>&) = delete;\n", struct_name, struct_name); + continue; + } - if (type_compat.at(type) == TypeCompatibility::None && !type_repack_info.emit_layout_wrappers) { - // Disallow use of layout wrappers for this type by specializing without a definition - fmt::print(file, "template<>\nstruct guest_layout<{}>;\n", struct_name); - fmt::print(file, "template<>\nstruct host_layout<{}>;\n", struct_name); - fmt::print(file, "guest_layout<{}>& to_guest(const host_layout<{}>&) = delete;\n", struct_name, struct_name); - continue; + // Guest layout definition + // NOTE: uint64_t has lower alignment requirements on 32-bit than on 64-bit, so we require tightly packed structs + // TODO: Now we must emit padding bytes explicitly, though! + fmt::print(file, "template<>\nstruct __attribute__((packed)) guest_layout<{}> {{\n", struct_name); + if (type_compat.at(type) == TypeCompatibility::Full) { + fmt::print(file, " using type = {};\n", struct_name); + } else { + fmt::print(file, " struct type {{\n"); + for (auto& member : guest_abi.at(struct_name).get_if_struct()->members) { + fmt::print(file, " guest_layout<{}{}> {};\n", member.type_name, + member.array_size ? fmt::format("[{}]", member.array_size.value()) : "", member.member_name); + } + fmt::print(file, " }};\n"); + } + fmt::print(file, " type data;\n"); + fmt::print(file, "}};\n"); + + fmt::print(file, "template<>\nstruct guest_layout : guest_layout<{}> {{\n", struct_name, struct_name); + fmt::print(file, " guest_layout& operator=(const guest_layout<{}>& other) {{ memcpy(this, &other, sizeof(other)); return *this; }}\n", + struct_name); + fmt::print(file, "}};\n"); + + // Host layout definition + fmt::print(file, "template<>\n"); + fmt::print(file, "struct host_layout<{}> {{\n", struct_name); + fmt::print(file, " using type = {};\n", struct_name); + fmt::print(file, " type data;\n"); + fmt::print(file, "\n"); + // Host->guest layout conversion + fmt::print(file, " host_layout(const guest_layout<{}>& from) :\n", struct_name); + if (type_compat.at(type) == TypeCompatibility::Full) { + fmt::print(file, " data {{ from.data }} {{\n"); + } else { + // Conversion needs struct repacking. + // Wrapping each member in `host_layout<>` ensures this is done recursively. + fmt::print(file, " data {{\n"); + auto map_field = [&file](clang::FieldDecl* member, bool skip_arrays) { + auto decl_name = member->getNameAsString(); + auto type_name = member->getType().getAsString(); + auto array_type = llvm::dyn_cast(member->getType()); + if (!array_type && skip_arrays) { + if (member->getType()->isFunctionPointerType()) { + // Function pointers must be handled manually, so zero them out by default + fmt::print(file, " .{} {{ }},\n", decl_name); + } else { + fmt::print(file, " .{} = host_layout<{}> {{ from.data.{} }}.data,\n", decl_name, type_name, decl_name); + } + } else if (array_type && !skip_arrays) { + // Copy element-wise below + fmt::print(file, " for (size_t i = 0; i < {}; ++i) {{\n", array_type->getSize().getZExtValue()); + fmt::print(file, " data.{}[i] = host_layout<{}> {{ from.data.{} }}.data[i];\n", decl_name, type_name, decl_name); + fmt::print(file, " }}\n"); } - - // Guest layout definition - // NOTE: uint64_t has lower alignment requirements on 32-bit than on 64-bit, so we require tightly packed structs - // TODO: Now we must emit padding bytes explicitly, though! - fmt::print(file, "template<>\nstruct __attribute__((packed)) guest_layout<{}> {{\n", struct_name); - if (type_compat.at(type) == TypeCompatibility::Full) { - fmt::print(file, " using type = {};\n", struct_name); + }; + // Prefer initialization via the constructor's initializer list if possible (to detect unintended narrowing), otherwise initialize in the body + for (auto* member : type->getAsStructureType()->getDecl()->fields()) { + if (!type_repack_info.UsesCustomRepackFor(member)) { + map_field(member, true); } else { - fmt::print(file, " struct type {{\n"); - for (auto& member : guest_abi.at(struct_name).get_if_struct()->members) { - fmt::print( file, " guest_layout<{}{}> {};\n", - member.type_name, - member.array_size ? fmt::format("[{}]", member.array_size.value()) : "", - member.member_name); - } - fmt::print(file, " }};\n"); + // Leave field uninitialized } - fmt::print(file, " type data;\n"); - fmt::print(file, "}};\n"); - - fmt::print(file, "template<>\nstruct guest_layout : guest_layout<{}> {{\n", struct_name, struct_name); - fmt::print(file, " guest_layout& operator=(const guest_layout<{}>& other) {{ memcpy(this, &other, sizeof(other)); return *this; }}\n", struct_name); - fmt::print(file, "}};\n"); - - // Host layout definition - fmt::print(file, "template<>\n"); - fmt::print(file, "struct host_layout<{}> {{\n", struct_name); - fmt::print(file, " using type = {};\n", struct_name); - fmt::print(file, " type data;\n"); - fmt::print(file, "\n"); - // Host->guest layout conversion - fmt::print(file, " host_layout(const guest_layout<{}>& from) :\n", struct_name); - if (type_compat.at(type) == TypeCompatibility::Full) { - fmt::print(file, " data {{ from.data }} {{\n"); + } + fmt::print(file, " }} {{\n"); + for (auto* member : type->getAsStructureType()->getDecl()->fields()) { + if (!type_repack_info.UsesCustomRepackFor(member)) { + map_field(member, false); } else { - // Conversion needs struct repacking. - // Wrapping each member in `host_layout<>` ensures this is done recursively. - fmt::print(file, " data {{\n"); - auto map_field = [&file](clang::FieldDecl* member, bool skip_arrays) { - auto decl_name = member->getNameAsString(); - auto type_name = member->getType().getAsString(); - auto array_type = llvm::dyn_cast(member->getType()); - if (!array_type && skip_arrays) { - if (member->getType()->isFunctionPointerType()) { - // Function pointers must be handled manually, so zero them out by default - fmt::print(file, " .{} {{ }},\n", decl_name); - } else { - fmt::print(file, " .{} = host_layout<{}> {{ from.data.{} }}.data,\n", decl_name, type_name, decl_name); - } - } else if (array_type && !skip_arrays) { - // Copy element-wise below - fmt::print(file, " for (size_t i = 0; i < {}; ++i) {{\n", array_type->getSize().getZExtValue()); - fmt::print(file, " data.{}[i] = host_layout<{}> {{ from.data.{} }}.data[i];\n", decl_name, type_name, decl_name); - fmt::print(file, " }}\n"); - } - }; - // Prefer initialization via the constructor's initializer list if possible (to detect unintended narrowing), otherwise initialize in the body - for (auto* member : type->getAsStructureType()->getDecl()->fields()) { - if (!type_repack_info.UsesCustomRepackFor(member)) { - map_field(member, true); - } else { - // Leave field uninitialized - } - } - fmt::print(file, " }} {{\n"); - for (auto* member : type->getAsStructureType()->getDecl()->fields()) { - if (!type_repack_info.UsesCustomRepackFor(member)) { - map_field(member, false); - } else { - // Leave field uninitialized - } - } + // Leave field uninitialized + } + } + } + fmt::print(file, " }}\n"); + fmt::print(file, "}};\n\n"); + + // Guest->host layout conversion + fmt::print(file, "inline guest_layout<{}> to_guest(const host_layout<{}>& from) {{\n", struct_name, struct_name); + if (type_compat.at(type) == TypeCompatibility::Full) { + fmt::print(file, " guest_layout<{}> ret;\n", struct_name); + fmt::print(file, " static_assert(sizeof(from) == sizeof(ret));\n"); + fmt::print(file, " memcpy(&ret, &from, sizeof(from));\n"); + } else { + // Conversion needs struct repacking. + // Wrapping each member in `to_guest(to_host_layout(...))` ensures this is done recursively. + fmt::print(file, " guest_layout<{}> ret {{ .data {{\n", struct_name); + auto map_field2 = [&file](const StructInfo::MemberInfo& member, bool skip_arrays) { + auto& decl_name = member.member_name; + auto& array_size = member.array_size; + if (!array_size && skip_arrays) { + if (member.is_function_pointer) { + // Function pointers must be handled manually, so zero them out by default + fmt::print(file, " .{} {{ }},\n", decl_name); + } else { + fmt::print(file, " .{} = to_guest(to_host_layout(from.data.{})),\n", decl_name, decl_name); + } + } else if (array_size && !skip_arrays) { + // Copy element-wise below + fmt::print(file, " for (size_t i = 0; i < {}; ++i) {{\n", array_size.value()); + fmt::print(file, " ret.data.{}.data[i] = to_guest(to_host_layout(from.data.{}[i]));\n", decl_name, decl_name); + fmt::print(file, " }}\n"); } - fmt::print(file, " }}\n"); - fmt::print(file, "}};\n\n"); - - // Guest->host layout conversion - fmt::print(file, "inline guest_layout<{}> to_guest(const host_layout<{}>& from) {{\n", struct_name, struct_name); - if (type_compat.at(type) == TypeCompatibility::Full) { - fmt::print(file, " guest_layout<{}> ret;\n", struct_name); - fmt::print(file, " static_assert(sizeof(from) == sizeof(ret));\n"); - fmt::print(file, " memcpy(&ret, &from, sizeof(from));\n"); + }; + + // Prefer initialization via the constructor's initializer list if possible (to detect unintended narrowing), otherwise initialize in the body + for (auto& member : guest_abi.at(struct_name).get_if_struct()->members) { + if (!type_repack_info.UsesCustomRepackFor(member.member_name)) { + map_field2(member, true); } else { - // Conversion needs struct repacking. - // Wrapping each member in `to_guest(to_host_layout(...))` ensures this is done recursively. - fmt::print(file, " guest_layout<{}> ret {{ .data {{\n", struct_name); - auto map_field2 = [&file](const StructInfo::MemberInfo& member, bool skip_arrays) { - auto& decl_name = member.member_name; - auto& array_size = member.array_size; - if (!array_size && skip_arrays) { - if (member.is_function_pointer) { - // Function pointers must be handled manually, so zero them out by default - fmt::print(file, " .{} {{ }},\n", decl_name); - } else { - fmt::print(file, " .{} = to_guest(to_host_layout(from.data.{})),\n", decl_name, decl_name); - } - } else if (array_size && !skip_arrays) { - // Copy element-wise below - fmt::print(file, " for (size_t i = 0; i < {}; ++i) {{\n", array_size.value()); - fmt::print(file, " ret.data.{}.data[i] = to_guest(to_host_layout(from.data.{}[i]));\n", decl_name, decl_name); - fmt::print(file, " }}\n"); - } - }; - - // Prefer initialization via the constructor's initializer list if possible (to detect unintended narrowing), otherwise initialize in the body - for (auto& member : guest_abi.at(struct_name).get_if_struct()->members) { - if (!type_repack_info.UsesCustomRepackFor(member.member_name)) { - map_field2(member, true); - } else { - // Leave field uninitialized - } - } - fmt::print(file, " }} }};\n"); - for (auto& member : guest_abi.at(struct_name).get_if_struct()->members) { - if (!type_repack_info.UsesCustomRepackFor(member.member_name)) { - map_field2(member, false); - } else { - // Leave field uninitialized - } - } + // Leave field uninitialized } - fmt::print(file, " return ret;\n"); - fmt::print(file, "}}\n\n"); - - // Forward-declare user-provided repacking functions - if (type_repack_info.custom_repacked_members.empty()) { - fmt::print(file, "void fex_apply_custom_repacking_entry(host_layout<{}>& source, const guest_layout<{}>& from) {{\n", struct_name, struct_name); - fmt::print(file, "}}\n"); - fmt::print(file, "bool fex_apply_custom_repacking_exit(guest_layout<{}>& into, host_layout<{}>& from) {{\n", struct_name, struct_name); - fmt::print(file, " return false;\n"); - fmt::print(file, "}}\n"); + } + fmt::print(file, " }} }};\n"); + for (auto& member : guest_abi.at(struct_name).get_if_struct()->members) { + if (!type_repack_info.UsesCustomRepackFor(member.member_name)) { + map_field2(member, false); } else { - fmt::print(file, "void fex_custom_repack_entry(host_layout<{}>& into, const guest_layout<{}>& from);\n", - struct_name, struct_name); - fmt::print(file, "bool fex_custom_repack_exit(guest_layout<{}>& into, const host_layout<{}>& from);\n\n", - struct_name, struct_name); - - fmt::print(file, "void fex_apply_custom_repacking_entry(host_layout<{}>& source, const guest_layout<{}>& from) {{\n", struct_name, struct_name); - fmt::print(file, " fex_custom_repack_entry(source, from);\n"); - fmt::print(file, "}}\n"); - - fmt::print(file, "bool fex_apply_custom_repacking_exit(guest_layout<{}>& into, host_layout<{}>& from) {{\n", struct_name, struct_name); - fmt::print(file, " return fex_custom_repack_exit(into, from);\n"); - fmt::print(file, "}}\n"); + // Leave field uninitialized } - - fmt::print(file, "template<> inline constexpr bool has_compatible_data_layout<{}> = {};\n", - struct_name, (type_compat.at(type) == TypeCompatibility::Full)); + } + } + fmt::print(file, " return ret;\n"); + fmt::print(file, "}}\n\n"); + + // Forward-declare user-provided repacking functions + if (type_repack_info.custom_repacked_members.empty()) { + fmt::print(file, "void fex_apply_custom_repacking_entry(host_layout<{}>& source, const guest_layout<{}>& from) {{\n", struct_name, struct_name); + fmt::print(file, "}}\n"); + fmt::print(file, "bool fex_apply_custom_repacking_exit(guest_layout<{}>& into, host_layout<{}>& from) {{\n", struct_name, struct_name); + fmt::print(file, " return false;\n"); + fmt::print(file, "}}\n"); + } else { + fmt::print(file, "void fex_custom_repack_entry(host_layout<{}>& into, const guest_layout<{}>& from);\n", struct_name, struct_name); + fmt::print(file, "bool fex_custom_repack_exit(guest_layout<{}>& into, const host_layout<{}>& from);\n\n", struct_name, struct_name); + + fmt::print(file, "void fex_apply_custom_repacking_entry(host_layout<{}>& source, const guest_layout<{}>& from) {{\n", struct_name, struct_name); + fmt::print(file, " fex_custom_repack_entry(source, from);\n"); + fmt::print(file, "}}\n"); + + fmt::print(file, "bool fex_apply_custom_repacking_exit(guest_layout<{}>& into, host_layout<{}>& from) {{\n", struct_name, struct_name); + fmt::print(file, " return fex_custom_repack_exit(into, from);\n"); + fmt::print(file, "}}\n"); } + + fmt::print(file, "template<> inline constexpr bool has_compatible_data_layout<{}> = {};\n", struct_name, + (type_compat.at(type) == TypeCompatibility::Full)); + } } void GenerateThunkLibsAction::OnAnalysisComplete(clang::ASTContext& context) { - ErrorReporter report_error { context }; - - // Compute data layout differences between host and guest - auto type_compat = [&]() { - std::unordered_map ret; - const auto host_abi = ComputeDataLayout(context, types); - for (const auto& [type, type_repack_info] : types) { - if (!type_repack_info.pointers_only) { - GetTypeCompatibility(context, type, host_abi, ret); - } - } - return ret; - }(); + ErrorReporter report_error {context}; - static auto format_decl = [](clang::QualType type, const std::string_view& name) { - clang::QualType innermostPointee = type; - while (innermostPointee->isPointerType()) { - innermostPointee = innermostPointee->getPointeeType(); - } - if (innermostPointee->isFunctionType()) { - // Function pointer declarations (e.g. void (**callback)()) require - // the variable name to be prefixed *and* suffixed. - - auto signature = type.getAsString(); - - // Search for strings like (*), (**), or (*****). Insert the - // variable name before the closing parenthesis - auto needle = signature.begin(); - for (; needle != signature.end(); ++needle) { - if (signature.end() - needle < 3 || - std::string_view { &*needle, 2 } != "(*") { - continue; - } - while (*++needle == '*') { - } - if (*needle == ')') { - break; - } - } - if (needle == signature.end()) { - // It's *probably* a typedef, so this should be safe after all - return fmt::format("{} {}", signature, name); - } else { - signature.insert(needle, name.begin(), name.end()); - return signature; - } - } else { - return type.getAsString() + " " + std::string(name); - } - }; + // Compute data layout differences between host and guest + auto type_compat = [&]() { + std::unordered_map ret; + const auto host_abi = ComputeDataLayout(context, types); + for (const auto& [type, type_repack_info] : types) { + if (!type_repack_info.pointers_only) { + GetTypeCompatibility(context, type, host_abi, ret); + } + } + return ret; + }(); - auto format_function_params = [](const FunctionParams& params) { - std::string ret; - for (std::size_t idx = 0; idx < params.param_types.size(); ++idx) { - auto& type = params.param_types[idx]; - ret += format_decl(type, fmt::format("a_{}", idx)) + ", "; + static auto format_decl = [](clang::QualType type, const std::string_view& name) { + clang::QualType innermostPointee = type; + while (innermostPointee->isPointerType()) { + innermostPointee = innermostPointee->getPointeeType(); + } + if (innermostPointee->isFunctionType()) { + // Function pointer declarations (e.g. void (**callback)()) require + // the variable name to be prefixed *and* suffixed. + + auto signature = type.getAsString(); + + // Search for strings like (*), (**), or (*****). Insert the + // variable name before the closing parenthesis + auto needle = signature.begin(); + for (; needle != signature.end(); ++needle) { + if (signature.end() - needle < 3 || std::string_view {&*needle, 2} != "(*") { + continue; } - // drop trailing ", " - ret.resize(ret.size() > 2 ? ret.size() - 2 : 0); - return ret; - }; - - auto get_sha256 = [this](const std::string& function_name, bool include_libname) { - std::string sha256_message = (include_libname ? libname + ":" : "") + function_name; - std::vector sha256(SHA256_DIGEST_LENGTH); - SHA256(reinterpret_cast(sha256_message.data()), - sha256_message.size(), - sha256.data()); - return sha256; - }; - - auto get_callback_name = [](std::string_view function_name, unsigned param_index) -> std::string { - return fmt::format("{}CBFN{}", function_name, param_index); - }; - - // Files used guest-side - if (!output_filenames.guest.empty()) { - std::ofstream file(output_filenames.guest); - - // Guest->Host transition points for API functions - file << "extern \"C\" {\n"; - for (auto& thunk : thunks) { - const auto& function_name = thunk.function_name; - auto sha256 = get_sha256(function_name, true); - fmt::print( file, "MAKE_THUNK({}, {}, \"{:#02x}\")\n", - libname, function_name, fmt::join(sha256, ", ")); + while (*++needle == '*') {} + if (*needle == ')') { + break; } - file << "}\n"; - - // Guest->Host transition points for invoking runtime host-function pointers based on their signature - std::vector> sha256s; - for (auto type_it = thunked_funcptrs.begin(); type_it != thunked_funcptrs.end(); ++type_it) { - auto* type = type_it->second.first; - std::string funcptr_signature = clang::QualType { type, 0 }.getAsString(); - - auto cb_sha256 = get_sha256("fexcallback_" + funcptr_signature, false); - auto it = std::find(sha256s.begin(), sha256s.end(), cb_sha256); - if (it != sha256s.end()) { - // TODO: Avoid this ugly way of avoiding duplicates - continue; - } else { - sha256s.push_back(cb_sha256); - } + } + if (needle == signature.end()) { + // It's *probably* a typedef, so this should be safe after all + return fmt::format("{} {}", signature, name); + } else { + signature.insert(needle, name.begin(), name.end()); + return signature; + } + } else { + return type.getAsString() + " " + std::string(name); + } + }; - // Thunk used for guest-side calls to host function pointers - file << " // " << funcptr_signature << "\n"; - auto funcptr_idx = std::distance(thunked_funcptrs.begin(), type_it); - fmt::print( file, " MAKE_CALLBACK_THUNK(callback_{}, {}, \"{:#02x}\");\n", - funcptr_idx, funcptr_signature, fmt::join(cb_sha256, ", ")); - } + auto format_function_params = [](const FunctionParams& params) { + std::string ret; + for (std::size_t idx = 0; idx < params.param_types.size(); ++idx) { + auto& type = params.param_types[idx]; + ret += format_decl(type, fmt::format("a_{}", idx)) + ", "; + } + // drop trailing ", " + ret.resize(ret.size() > 2 ? ret.size() - 2 : 0); + return ret; + }; + + auto get_sha256 = [this](const std::string& function_name, bool include_libname) { + std::string sha256_message = (include_libname ? libname + ":" : "") + function_name; + std::vector sha256(SHA256_DIGEST_LENGTH); + SHA256(reinterpret_cast(sha256_message.data()), sha256_message.size(), sha256.data()); + return sha256; + }; + + auto get_callback_name = [](std::string_view function_name, unsigned param_index) -> std::string { + return fmt::format("{}CBFN{}", function_name, param_index); + }; + + // Files used guest-side + if (!output_filenames.guest.empty()) { + std::ofstream file(output_filenames.guest); + + // Guest->Host transition points for API functions + file << "extern \"C\" {\n"; + for (auto& thunk : thunks) { + const auto& function_name = thunk.function_name; + auto sha256 = get_sha256(function_name, true); + fmt::print(file, "MAKE_THUNK({}, {}, \"{:#02x}\")\n", libname, function_name, fmt::join(sha256, ", ")); + } + file << "}\n"; + + // Guest->Host transition points for invoking runtime host-function pointers based on their signature + std::vector> sha256s; + for (auto type_it = thunked_funcptrs.begin(); type_it != thunked_funcptrs.end(); ++type_it) { + auto* type = type_it->second.first; + std::string funcptr_signature = clang::QualType {type, 0}.getAsString(); + + auto cb_sha256 = get_sha256("fexcallback_" + funcptr_signature, false); + auto it = std::find(sha256s.begin(), sha256s.end(), cb_sha256); + if (it != sha256s.end()) { + // TODO: Avoid this ugly way of avoiding duplicates + continue; + } else { + sha256s.push_back(cb_sha256); + } + + // Thunk used for guest-side calls to host function pointers + file << " // " << funcptr_signature << "\n"; + auto funcptr_idx = std::distance(thunked_funcptrs.begin(), type_it); + fmt::print(file, " MAKE_CALLBACK_THUNK(callback_{}, {}, \"{:#02x}\");\n", funcptr_idx, funcptr_signature, fmt::join(cb_sha256, ", ")); + } - // Thunks-internal packing functions - file << "extern \"C\" {\n"; - for (auto& data : thunks) { - const auto& function_name = data.function_name; - bool is_void = data.return_type->isVoidType(); - file << "FEX_PACKFN_LINKAGE auto fexfn_pack_" << function_name << "("; - for (std::size_t idx = 0; idx < data.param_types.size(); ++idx) { - auto& type = data.param_types[idx]; - file << (idx == 0 ? "" : ", ") << format_decl(type, fmt::format("a_{}", idx)); - } - // Using trailing return type as it makes handling function pointer returns much easier - file << ") -> " << data.return_type.getAsString() << " {\n"; - file << " struct __attribute__((packed)) {\n"; - for (std::size_t idx = 0; idx < data.param_types.size(); ++idx) { - auto& type = data.param_types[idx]; - file << " " << format_decl(type.getUnqualifiedType(), fmt::format("a_{}", idx)) << ";\n"; - } - if (!is_void) { - file << " " << format_decl(data.return_type, "rv") << ";\n"; - } else if (data.param_types.size() == 0) { - // Avoid "empty struct has size 0 in C, size 1 in C++" warning - file << " char force_nonempty;\n"; - } - file << " } args;\n"; - - for (std::size_t idx = 0; idx < data.param_types.size(); ++idx) { - auto cb = data.callbacks.find(idx); - - file << " args.a_" << idx << " = "; - if (cb == data.callbacks.end() || cb->second.is_stub) { - file << "a_" << idx << ";\n"; - } else { - // Before passing guest function pointers to the host, wrap them in a host-callable trampoline - fmt::print(file, "AllocateHostTrampolineForGuestFunction(a_{});\n", idx); - } - } - file << " fexthunks_" << libname << "_" << function_name << "(&args);\n"; - if (!is_void) { - file << " return args.rv;\n"; - } - file << "}\n"; + // Thunks-internal packing functions + file << "extern \"C\" {\n"; + for (auto& data : thunks) { + const auto& function_name = data.function_name; + bool is_void = data.return_type->isVoidType(); + file << "FEX_PACKFN_LINKAGE auto fexfn_pack_" << function_name << "("; + for (std::size_t idx = 0; idx < data.param_types.size(); ++idx) { + auto& type = data.param_types[idx]; + file << (idx == 0 ? "" : ", ") << format_decl(type, fmt::format("a_{}", idx)); + } + // Using trailing return type as it makes handling function pointer returns much easier + file << ") -> " << data.return_type.getAsString() << " {\n"; + file << " struct __attribute__((packed)) {\n"; + for (std::size_t idx = 0; idx < data.param_types.size(); ++idx) { + auto& type = data.param_types[idx]; + file << " " << format_decl(type.getUnqualifiedType(), fmt::format("a_{}", idx)) << ";\n"; + } + if (!is_void) { + file << " " << format_decl(data.return_type, "rv") << ";\n"; + } else if (data.param_types.size() == 0) { + // Avoid "empty struct has size 0 in C, size 1 in C++" warning + file << " char force_nonempty;\n"; + } + file << " } args;\n"; + + for (std::size_t idx = 0; idx < data.param_types.size(); ++idx) { + auto cb = data.callbacks.find(idx); + + file << " args.a_" << idx << " = "; + if (cb == data.callbacks.end() || cb->second.is_stub) { + file << "a_" << idx << ";\n"; + } else { + // Before passing guest function pointers to the host, wrap them in a host-callable trampoline + fmt::print(file, "AllocateHostTrampolineForGuestFunction(a_{});\n", idx); } - file << "}\n"; + } + file << " fexthunks_" << libname << "_" << function_name << "(&args);\n"; + if (!is_void) { + file << " return args.rv;\n"; + } + file << "}\n"; + } + file << "}\n"; + + // Publicly exports equivalent to symbols exported from the native guest library + file << "extern \"C\" {\n"; + for (auto& data : thunked_api) { + if (data.custom_guest_impl) { + continue; + } + + const auto& function_name = data.function_name; + + file << "__attribute__((alias(\"fexfn_pack_" << function_name << "\"))) auto " << function_name << "("; + for (std::size_t idx = 0; idx < data.param_types.size(); ++idx) { + auto& type = data.param_types[idx]; + file << (idx == 0 ? "" : ", ") << format_decl(type, "a_" + std::to_string(idx)); + } + file << ") -> " << data.return_type.getAsString() << ";\n"; + } + file << "}\n"; + + // Symbol enumerators + for (std::size_t namespace_idx = 0; namespace_idx < namespaces.size(); ++namespace_idx) { + const auto& ns = namespaces[namespace_idx]; + file << "#define FOREACH_" << ns.name << (ns.name.empty() ? "" : "_") << "SYMBOL(EXPAND) \\\n"; + for (auto& symbol : thunked_api) { + if (symbol.symtable_namespace.value_or(0) == namespace_idx) { + file << " EXPAND(" << symbol.function_name << ", \"TODO\") \\\n"; + } + } + file << "\n"; + } + } - // Publicly exports equivalent to symbols exported from the native guest library - file << "extern \"C\" {\n"; - for (auto& data : thunked_api) { - if (data.custom_guest_impl) { - continue; - } + // Files used host-side + if (!output_filenames.host.empty()) { + std::ofstream file(output_filenames.host); - const auto& function_name = data.function_name; + EmitLayoutWrappers(context, file, type_compat); - file << "__attribute__((alias(\"fexfn_pack_" << function_name << "\"))) auto " << function_name << "("; - for (std::size_t idx = 0; idx < data.param_types.size(); ++idx) { - auto& type = data.param_types[idx]; - file << (idx == 0 ? "" : ", ") << format_decl(type, "a_" + std::to_string(idx)); - } - file << ") -> " << data.return_type.getAsString() << ";\n"; + // Forward declarations for symbols loaded from the native host library + for (auto& import : thunked_api) { + const auto& function_name = import.function_name; + const char* variadic_ellipsis = import.is_variadic ? ", ..." : ""; + file << "using fexldr_type_" << libname << "_" << function_name << " = auto " + << "(" << format_function_params(import) << variadic_ellipsis << ") -> " << import.return_type.getAsString() << ";\n"; + file << "static fexldr_type_" << libname << "_" << function_name << " *fexldr_ptr_" << libname << "_" << function_name << ";\n"; + } + + file << "extern \"C\" {\n"; + for (auto& thunk : thunks) { + const auto& function_name = thunk.function_name; + + // Generate stub callbacks + for (auto& [cb_idx, cb] : thunk.callbacks) { + if (cb.is_stub) { + const char* variadic_ellipsis = cb.is_variadic ? ", ..." : ""; + auto cb_function_name = "fexfn_unpack_" + get_callback_name(function_name, cb_idx) + "_stub"; + file << "[[noreturn]] static " << cb.return_type.getAsString() << " " << cb_function_name << "(" << format_function_params(cb) + << variadic_ellipsis << ") {\n"; + file << " fprintf(stderr, \"FATAL: Attempted to invoke callback stub for " << function_name << "\\n\");\n"; + file << " std::abort();\n"; + file << "}\n"; } - file << "}\n"; - - // Symbol enumerators - for (std::size_t namespace_idx = 0; namespace_idx < namespaces.size(); ++namespace_idx) { - const auto& ns = namespaces[namespace_idx]; - file << "#define FOREACH_" << ns.name << (ns.name.empty() ? "" : "_") << "SYMBOL(EXPAND) \\\n"; - for (auto& symbol : thunked_api) { - if (symbol.symtable_namespace.value_or(0) == namespace_idx) { - file << " EXPAND(" << symbol.function_name << ", \"TODO\") \\\n"; - } - } - file << "\n"; + } + + auto get_guest_type_name = [this](clang::QualType type) { + if (type->isBuiltinType() && !type->isFloatingType()) { + auto size = guest_abi.at(type.getUnqualifiedType().getAsString()).get_if_simple_or_struct()->size_bits; + return get_fixed_size_int_name(type.getTypePtr(), size); + } else if (type->isPointerType() && type->getPointeeType()->isIntegerType() && !type->getPointeeType()->isEnumeralType() && + !type->getPointeeType()->isVoidType()) { + auto size = guest_abi.at(type->getPointeeType().getUnqualifiedType().getAsString()).get_if_simple_or_struct()->size_bits; + return fmt::format("{}{}*", type->getPointeeType().isConstQualified() ? "const " : "", + get_fixed_size_int_name(type->getPointeeType().getTypePtr(), size)); + } else { + return type.getUnqualifiedType().getAsString(); } - } + }; - // Files used host-side - if (!output_filenames.host.empty()) { - std::ofstream file(output_filenames.host); + // Forward declarations for user-provided implementations + if (thunk.custom_host_impl) { + file << "static auto fexfn_impl_" << libname << "_" << function_name << "("; + for (std::size_t idx = 0; idx < thunk.param_types.size(); ++idx) { + auto& type = thunk.param_types[idx]; - EmitLayoutWrappers(context, file, type_compat); + file << (idx == 0 ? "" : ", "); - // Forward declarations for symbols loaded from the native host library - for (auto& import : thunked_api) { - const auto& function_name = import.function_name; - const char* variadic_ellipsis = import.is_variadic ? ", ..." : ""; - file << "using fexldr_type_" << libname << "_" << function_name << " = auto " << "(" << format_function_params(import) << variadic_ellipsis << ") -> " << import.return_type.getAsString() << ";\n"; - file << "static fexldr_type_" << libname << "_" << function_name << " *fexldr_ptr_" << libname << "_" << function_name << ";\n"; + if (thunk.param_annotations[idx].is_passthrough) { + fmt::print(file, "guest_layout<{}> a_{}", get_guest_type_name(type), idx); + } else { + file << format_decl(type, fmt::format("a_{}", idx)); + } + } + // Using trailing return type as it makes handling function pointer returns much easier + file << ") -> " << thunk.return_type.getAsString() << ";\n"; + } + + // Check data layout compatibility of parameter types + // TODO: Also check non-struct/non-pointer types + // TODO: Also check return type + for (size_t param_idx = 0; param_idx != thunk.param_types.size(); ++param_idx) { + const auto& param_type = thunk.param_types[param_idx]; + if (!param_type->isPointerType() || !param_type->getPointeeType()->isStructureType()) { + continue; } + if (!thunk.param_annotations[param_idx].is_passthrough) { + auto type = param_type->getPointeeType(); + if (!types.at(context.getCanonicalType(type.getTypePtr())).assumed_compatible && + type_compat.at(context.getCanonicalType(type.getTypePtr())) == TypeCompatibility::None) { + // TODO: Factor in "assume_compatible_layout" annotations here + // That annotation should cause the type to be treated as TypeCompatibility::Full + throw report_error(thunk.decl->getLocation(), "Unsupported parameter type %0").AddTaggedVal(param_type); + } + } + } - file << "extern \"C\" {\n"; - for (auto& thunk : thunks) { - const auto& function_name = thunk.function_name; - - // Generate stub callbacks - for (auto& [cb_idx, cb] : thunk.callbacks) { - if (cb.is_stub) { - const char* variadic_ellipsis = cb.is_variadic ? ", ..." : ""; - auto cb_function_name = "fexfn_unpack_" + get_callback_name(function_name, cb_idx) + "_stub"; - file << "[[noreturn]] static " << cb.return_type.getAsString() << " " - << cb_function_name << "(" - << format_function_params(cb) << variadic_ellipsis << ") {\n"; - file << " fprintf(stderr, \"FATAL: Attempted to invoke callback stub for " << function_name << "\\n\");\n"; - file << " std::abort();\n"; - file << "}\n"; - } - } + // Packed argument structs used in fexfn_unpack_* + auto GeneratePackedArgs = [&](const auto& function_name, const ThunkedFunction& thunk) -> std::string { + std::string struct_name = "fexfn_packed_args_" + libname + "_" + function_name; + file << "struct __attribute__((packed)) " << struct_name << " {\n"; - auto get_guest_type_name = [this](clang::QualType type) { - if (type->isBuiltinType() && !type->isFloatingType()) { - auto size = guest_abi.at(type.getUnqualifiedType().getAsString()).get_if_simple_or_struct()->size_bits; - return get_fixed_size_int_name(type.getTypePtr(), size); - } else if (type->isPointerType() && type->getPointeeType()->isIntegerType() && !type->getPointeeType()->isEnumeralType() && !type->getPointeeType()->isVoidType()) { - auto size = guest_abi.at(type->getPointeeType().getUnqualifiedType().getAsString()).get_if_simple_or_struct()->size_bits; - return fmt::format("{}{}*", type->getPointeeType().isConstQualified() ? "const " : "", get_fixed_size_int_name(type->getPointeeType().getTypePtr(), size)); - } else { - return type.getUnqualifiedType().getAsString(); - } - }; - - // Forward declarations for user-provided implementations - if (thunk.custom_host_impl) { - file << "static auto fexfn_impl_" << libname << "_" << function_name << "("; - for (std::size_t idx = 0; idx < thunk.param_types.size(); ++idx) { - auto& type = thunk.param_types[idx]; - - file << (idx == 0 ? "" : ", "); - - if (thunk.param_annotations[idx].is_passthrough) { - fmt::print(file, "guest_layout<{}> a_{}", get_guest_type_name(type), idx); - } else { - file << format_decl(type, fmt::format("a_{}", idx)); - } - } - // Using trailing return type as it makes handling function pointer returns much easier - file << ") -> " << thunk.return_type.getAsString() << ";\n"; - } + for (std::size_t idx = 0; idx < thunk.param_types.size(); ++idx) { + fmt::print(file, " guest_layout<{}> a_{};\n", get_guest_type_name(thunk.param_types[idx]), idx); + } + if (!thunk.return_type->isVoidType()) { + fmt::print(file, " guest_layout<{}> rv;\n", get_guest_type_name(thunk.return_type)); + } else if (thunk.param_types.size() == 0) { + // Avoid "empty struct has size 0 in C, size 1 in C++" warning + file << " char force_nonempty;\n"; + } + file << "};\n"; + return struct_name; + }; + auto struct_name = GeneratePackedArgs(function_name, thunk); + + // Unpacking functions + auto function_to_call = "fexldr_ptr_" + libname + "_" + function_name; + if (thunk.custom_host_impl) { + function_to_call = "fexfn_impl_" + libname + "_" + function_name; + } + + auto get_type_name_with_nonconst_pointee = [&](clang::QualType type) { + type = type.getLocalUnqualifiedType(); + if (type->isPointerType()) { + // Strip away "const" from pointee type + type = context.getPointerType(type->getPointeeType().getLocalUnqualifiedType()); + } + return get_type_name(context, type.getTypePtr()); + }; - // Check data layout compatibility of parameter types - // TODO: Also check non-struct/non-pointer types - // TODO: Also check return type - for (size_t param_idx = 0; param_idx != thunk.param_types.size(); ++param_idx) { - const auto& param_type = thunk.param_types[param_idx]; - if (!param_type->isPointerType() || !param_type->getPointeeType()->isStructureType()) { - continue; - } - if (!thunk.param_annotations[param_idx].is_passthrough) { - auto type = param_type->getPointeeType(); - if (!types.at(context.getCanonicalType(type.getTypePtr())).assumed_compatible && type_compat.at(context.getCanonicalType(type.getTypePtr())) == TypeCompatibility::None) { - // TODO: Factor in "assume_compatible_layout" annotations here - // That annotation should cause the type to be treated as TypeCompatibility::Full - throw report_error(thunk.decl->getLocation(), "Unsupported parameter type %0").AddTaggedVal(param_type); - } - } - } - // Packed argument structs used in fexfn_unpack_* - auto GeneratePackedArgs = [&](const auto &function_name, const ThunkedFunction &thunk) -> std::string { - std::string struct_name = "fexfn_packed_args_" + libname + "_" + function_name; - file << "struct __attribute__((packed)) " << struct_name << " {\n"; - - for (std::size_t idx = 0; idx < thunk.param_types.size(); ++idx) { - fmt::print(file, " guest_layout<{}> a_{};\n", get_guest_type_name(thunk.param_types[idx]), idx); - } - if (!thunk.return_type->isVoidType()) { - fmt::print(file, " guest_layout<{}> rv;\n", get_guest_type_name(thunk.return_type)); - } else if (thunk.param_types.size() == 0) { - // Avoid "empty struct has size 0 in C, size 1 in C++" warning - file << " char force_nonempty;\n"; - } - file << "};\n"; - return struct_name; - }; - auto struct_name = GeneratePackedArgs(function_name, thunk); - - // Unpacking functions - auto function_to_call = "fexldr_ptr_" + libname + "_" + function_name; - if (thunk.custom_host_impl) { - function_to_call = "fexfn_impl_" + libname + "_" + function_name; - } + file << "static void fexfn_unpack_" << libname << "_" << function_name << "(" << struct_name << "* args) {\n"; - auto get_type_name_with_nonconst_pointee = [&](clang::QualType type) { - type = type.getLocalUnqualifiedType(); - if (type->isPointerType()) { - // Strip away "const" from pointee type - type = context.getPointerType(type->getPointeeType().getLocalUnqualifiedType()); - } - return get_type_name(context, type.getTypePtr()); - }; - - - file << "static void fexfn_unpack_" << libname << "_" << function_name << "(" << struct_name << "* args) {\n"; - - for (unsigned param_idx = 0; param_idx != thunk.param_types.size(); ++param_idx) { - if (thunk.callbacks.contains(param_idx) && thunk.callbacks.at(param_idx).is_stub) { - continue; - } - - auto& param_type = thunk.param_types[param_idx]; - const bool is_assumed_compatible = param_type->isPointerType() && - (thunk.param_annotations[param_idx].assume_compatible || ((param_type->getPointeeType()->isStructureType() || (param_type->getPointeeType()->isPointerType() && param_type->getPointeeType()->getPointeeType()->isStructureType())) && - (types.contains(context.getCanonicalType(param_type->getPointeeType()->getLocallyUnqualifiedSingleStepDesugaredType().getTypePtr())) && LookupType(context, context.getCanonicalType(param_type->getPointeeType()->getLocallyUnqualifiedSingleStepDesugaredType().getTypePtr())).assumed_compatible))); - - std::optional pointee_compat; - if (param_type->isPointerType()) { - // Get TypeCompatibility from existing entry, or register TypeCompatibility::None if no entry exists - // TODO: Currently needs TypeCompatibility::Full workaround... - pointee_compat = type_compat.emplace(context.getCanonicalType(param_type->getPointeeType().getTypePtr()), TypeCompatibility::Full).first->second; - } - - if (thunk.param_annotations[param_idx].is_passthrough) { - // args are passed directly to function, no need to use `unpacked` wrappers - continue; - } - - // Layout repacking happens here - if (!param_type->isPointerType() || (is_assumed_compatible || pointee_compat == TypeCompatibility::Full) || - param_type->getPointeeType()->isBuiltinType() /* TODO: handle size_t. Actually, properly check for data layout compatibility */) { - // Fully compatible - fmt::print(file, " host_layout<{}> a_{} {{ args->a_{} }};\n", get_type_name(context, param_type.getTypePtr()), param_idx, param_idx); - } else if (pointee_compat == TypeCompatibility::Repackable) { - // TODO: Require opt-in for this to be emitted since it's single-element only; otherwise, pointers-to-arrays arguments will cause stack trampling - fmt::print(file, " auto a_{} = make_repack_wrapper<{}>(args->a_{});\n", param_idx, get_type_name_with_nonconst_pointee(param_type), param_idx); - } else { - throw report_error(thunk.decl->getLocation(), "Cannot generate unpacking function for function %0 with unannotated pointer parameter %1").AddString(function_name).AddTaggedVal(param_type); - } - } + for (unsigned param_idx = 0; param_idx != thunk.param_types.size(); ++param_idx) { + if (thunk.callbacks.contains(param_idx) && thunk.callbacks.at(param_idx).is_stub) { + continue; + } - if (!thunk.return_type->isVoidType()) { - fmt::print(file, " args->rv = "); - if (!thunk.return_type->isFunctionPointerType()) { - fmt::print(file, "to_guest(to_host_layout<{}>(", thunk.return_type.getAsString()); - } - } - fmt::print(file, "{}(", function_to_call); - { - auto format_param = [&](std::size_t idx) { - auto cb = thunk.callbacks.find(idx); - if (cb != thunk.callbacks.end() && cb->second.is_stub) { - return "fexfn_unpack_" + get_callback_name(function_name, cb->first) + "_stub"; - } else if (cb != thunk.callbacks.end()) { - auto arg_name = fmt::format("args->a_{}", idx); // Use parameter directly - // Use comma operator to inject a function call before returning the argument - // TODO: Avoid casting away the guest_layout - if (thunk.custom_host_impl) { - return fmt::format("(FinalizeHostTrampolineForGuestFunction({}), {})", arg_name, arg_name); - } else { - return fmt::format("(FinalizeHostTrampolineForGuestFunction({}), ({})(uint64_t {{ {}.data }}))", arg_name, get_type_name(context, thunk.param_types[idx].getTypePtr()), arg_name); - } - } else if (thunk.param_annotations[idx].is_passthrough) { - // Pass raw guest_layout - return fmt::format("args->a_{}", idx); - } else { - // Unwrap host_layout/repack_wrapper layer - return fmt::format("unwrap_host(a_{})", idx); - } - }; - - fmt::print(file, "{}", format_function_args(thunk, format_param)); - } - if (!thunk.return_type->isVoidType() && !thunk.return_type->isFunctionPointerType()) { - fmt::print(file, "))"); - } - fmt::print(file, ");\n"); + auto& param_type = thunk.param_types[param_idx]; + const bool is_assumed_compatible = + param_type->isPointerType() && + (thunk.param_annotations[param_idx].assume_compatible || + ((param_type->getPointeeType()->isStructureType() || + (param_type->getPointeeType()->isPointerType() && param_type->getPointeeType()->getPointeeType()->isStructureType())) && + (types.contains(context.getCanonicalType(param_type->getPointeeType()->getLocallyUnqualifiedSingleStepDesugaredType().getTypePtr())) && + LookupType(context, context.getCanonicalType(param_type->getPointeeType()->getLocallyUnqualifiedSingleStepDesugaredType().getTypePtr())) + .assumed_compatible))); + + std::optional pointee_compat; + if (param_type->isPointerType()) { + // Get TypeCompatibility from existing entry, or register TypeCompatibility::None if no entry exists + // TODO: Currently needs TypeCompatibility::Full workaround... + pointee_compat = + type_compat.emplace(context.getCanonicalType(param_type->getPointeeType().getTypePtr()), TypeCompatibility::Full).first->second; + } - file << "}\n"; + if (thunk.param_annotations[param_idx].is_passthrough) { + // args are passed directly to function, no need to use `unpacked` wrappers + continue; } - file << "}\n"; - - // Endpoints for Guest->Host invocation of API functions - file << "static ExportEntry exports[] = {\n"; - for (auto& thunk : thunks) { - const auto& function_name = thunk.function_name; - auto sha256 = get_sha256(function_name, true); - fmt::print( file, " {{(uint8_t*)\"\\x{:02x}\", (void(*)(void *))&fexfn_unpack_{}_{}}}, // {}:{}\n", - fmt::join(sha256, "\\x"), libname, function_name, libname, function_name); + + // Layout repacking happens here + if (!param_type->isPointerType() || (is_assumed_compatible || pointee_compat == TypeCompatibility::Full) || + param_type->getPointeeType()->isBuiltinType() /* TODO: handle size_t. Actually, properly check for data layout compatibility */) { + // Fully compatible + fmt::print(file, " host_layout<{}> a_{} {{ args->a_{} }};\n", get_type_name(context, param_type.getTypePtr()), param_idx, param_idx); + } else if (pointee_compat == TypeCompatibility::Repackable) { + // TODO: Require opt-in for this to be emitted since it's single-element only; otherwise, pointers-to-arrays arguments will cause stack trampling + fmt::print(file, " auto a_{} = make_repack_wrapper<{}>(args->a_{});\n", param_idx, + get_type_name_with_nonconst_pointee(param_type), param_idx); + } else { + throw report_error(thunk.decl->getLocation(), "Cannot generate unpacking function for function %0 with unannotated pointer " + "parameter %1") + .AddString(function_name) + .AddTaggedVal(param_type); } + } - // Endpoints for Guest->Host invocation of runtime host-function pointers - // NOTE: The function parameters may differ slightly between guest and host, - // e.g. due to differing sizes or due to data layout differences. - // Hence, two separate parameter lists are managed here. - for (auto& host_funcptr_entry : thunked_funcptrs) { - auto& [type, param_annotations] = host_funcptr_entry.second; - auto func_type = type->getAs(); - FuncPtrInfo info = { }; - - // TODO: Use GetTypeNameWithFixedSizeIntegers - info.result = func_type->getReturnType().getAsString(); - - // NOTE: In guest contexts, integer types must be mapped to - // fixed-size equivalents. Since this is a host context, this - // isn't strictly necessary here, but it makes matching up - // guest_layout/host_layout constructors easier. - for (auto arg : func_type->getParamTypes()) { - info.args.push_back(GetTypeNameWithFixedSizeIntegers(context, arg)); - } + if (!thunk.return_type->isVoidType()) { + fmt::print(file, " args->rv = "); + if (!thunk.return_type->isFunctionPointerType()) { + fmt::print(file, "to_guest(to_host_layout<{}>(", thunk.return_type.getAsString()); + } + } + fmt::print(file, "{}(", function_to_call); + { + auto format_param = [&](std::size_t idx) { + auto cb = thunk.callbacks.find(idx); + if (cb != thunk.callbacks.end() && cb->second.is_stub) { + return "fexfn_unpack_" + get_callback_name(function_name, cb->first) + "_stub"; + } else if (cb != thunk.callbacks.end()) { + auto arg_name = fmt::format("args->a_{}", idx); // Use parameter directly + // Use comma operator to inject a function call before returning the argument + // TODO: Avoid casting away the guest_layout + if (thunk.custom_host_impl) { + return fmt::format("(FinalizeHostTrampolineForGuestFunction({}), {})", arg_name, arg_name); + } else { + return fmt::format("(FinalizeHostTrampolineForGuestFunction({}), ({})(uint64_t {{ {}.data }}))", arg_name, + get_type_name(context, thunk.param_types[idx].getTypePtr()), arg_name); + } + } else if (thunk.param_annotations[idx].is_passthrough) { + // Pass raw guest_layout + return fmt::format("args->a_{}", idx); + } else { + // Unwrap host_layout/repack_wrapper layer + return fmt::format("unwrap_host(a_{})", idx); + } + }; + + fmt::print(file, "{}", format_function_args(thunk, format_param)); + } + if (!thunk.return_type->isVoidType() && !thunk.return_type->isFunctionPointerType()) { + fmt::print(file, "))"); + } + fmt::print(file, ");\n"); + + file << "}\n"; + } + file << "}\n"; + + // Endpoints for Guest->Host invocation of API functions + file << "static ExportEntry exports[] = {\n"; + for (auto& thunk : thunks) { + const auto& function_name = thunk.function_name; + auto sha256 = get_sha256(function_name, true); + fmt::print(file, " {{(uint8_t*)\"\\x{:02x}\", (void(*)(void *))&fexfn_unpack_{}_{}}}, // {}:{}\n", fmt::join(sha256, "\\x"), libname, + function_name, libname, function_name); + } - std::string annotations; - for (int param_idx = 0; param_idx < info.args.size(); ++param_idx) { - if (param_idx != 0) { - annotations += ", "; - } - - annotations += "ParameterAnnotations {"; - if (param_annotations.contains(param_idx) && param_annotations.at(param_idx).is_passthrough) { - annotations += ".is_passthrough=true,"; - } - if (param_annotations.contains(param_idx) && param_annotations.at(param_idx).assume_compatible) { - annotations += ".assume_compatible=true,"; - } - annotations += "}"; - } - auto guest_info = LookupGuestFuncPtrInfo(host_funcptr_entry.first.c_str()); - // TODO: Consider differences in guest/host return types - fmt::print( file, " {{(uint8_t*)\"\\x{:02x}\", (void(*)(void *))&GuestWrapperForHostFunction<{}({}){}{}>::Call<{}>}}, // {}\n", - fmt::join(guest_info.sha256, "\\x"), guest_info.result, fmt::join(info.args, ", "), guest_info.args.empty() ? "" : ", ", fmt::join(guest_info.args, ", "), annotations, host_funcptr_entry.first); + // Endpoints for Guest->Host invocation of runtime host-function pointers + // NOTE: The function parameters may differ slightly between guest and host, + // e.g. due to differing sizes or due to data layout differences. + // Hence, two separate parameter lists are managed here. + for (auto& host_funcptr_entry : thunked_funcptrs) { + auto& [type, param_annotations] = host_funcptr_entry.second; + auto func_type = type->getAs(); + FuncPtrInfo info = {}; + + // TODO: Use GetTypeNameWithFixedSizeIntegers + info.result = func_type->getReturnType().getAsString(); + + // NOTE: In guest contexts, integer types must be mapped to + // fixed-size equivalents. Since this is a host context, this + // isn't strictly necessary here, but it makes matching up + // guest_layout/host_layout constructors easier. + for (auto arg : func_type->getParamTypes()) { + info.args.push_back(GetTypeNameWithFixedSizeIntegers(context, arg)); + } + + std::string annotations; + for (int param_idx = 0; param_idx < info.args.size(); ++param_idx) { + if (param_idx != 0) { + annotations += ", "; } - file << " { nullptr, nullptr }\n"; - file << "};\n"; + annotations += "ParameterAnnotations {"; + if (param_annotations.contains(param_idx) && param_annotations.at(param_idx).is_passthrough) { + annotations += ".is_passthrough=true,"; + } + if (param_annotations.contains(param_idx) && param_annotations.at(param_idx).assume_compatible) { + annotations += ".assume_compatible=true,"; + } + annotations += "}"; + } + auto guest_info = LookupGuestFuncPtrInfo(host_funcptr_entry.first.c_str()); + // TODO: Consider differences in guest/host return types + fmt::print(file, " {{(uint8_t*)\"\\x{:02x}\", (void(*)(void *))&GuestWrapperForHostFunction<{}({}){}{}>::Call<{}>}}, // {}\n", + fmt::join(guest_info.sha256, "\\x"), guest_info.result, fmt::join(info.args, ", "), guest_info.args.empty() ? "" : ", ", + fmt::join(guest_info.args, ", "), annotations, host_funcptr_entry.first); + } - // Symbol lookup from native host library - file << "static void* fexldr_ptr_" << libname << "_so;\n"; - file << "extern \"C\" bool fexldr_init_" << libname << "() {\n"; + file << " { nullptr, nullptr }\n"; + file << "};\n"; - std::string version_suffix; - if (lib_version) { - version_suffix = '.' + std::to_string(*lib_version); - } - const std::string library_filename = libfilename + ".so" + version_suffix; + // Symbol lookup from native host library + file << "static void* fexldr_ptr_" << libname << "_so;\n"; + file << "extern \"C\" bool fexldr_init_" << libname << "() {\n"; - // Load the host library in the global symbol namespace. - // This follows how these libraries get loaded in a non-emulated environment, - // Either by directly linking to the library or a loader (In OpenGL or Vulkan) putting everything in the global namespace. - file << " fexldr_ptr_" << libname << "_so = dlopen(\"" << library_filename << "\", RTLD_GLOBAL | RTLD_LAZY);\n"; + std::string version_suffix; + if (lib_version) { + version_suffix = '.' + std::to_string(*lib_version); + } + const std::string library_filename = libfilename + ".so" + version_suffix; - file << " if (!fexldr_ptr_" << libname << "_so) { return false; }\n\n"; - for (auto& import : thunked_api) { - fmt::print( file, " (void*&)fexldr_ptr_{}_{} = {}(fexldr_ptr_{}_so, \"{}\");\n", - libname, import.function_name, import.host_loader, libname, import.function_name); - } - file << " return true;\n"; - file << "}\n"; + // Load the host library in the global symbol namespace. + // This follows how these libraries get loaded in a non-emulated environment, + // Either by directly linking to the library or a loader (In OpenGL or Vulkan) putting everything in the global namespace. + file << " fexldr_ptr_" << libname << "_so = dlopen(\"" << library_filename << "\", RTLD_GLOBAL | RTLD_LAZY);\n"; + + file << " if (!fexldr_ptr_" << libname << "_so) { return false; }\n\n"; + for (auto& import : thunked_api) { + fmt::print(file, " (void*&)fexldr_ptr_{}_{} = {}(fexldr_ptr_{}_so, \"{}\");\n", libname, import.function_name, import.host_loader, + libname, import.function_name); } + file << " return true;\n"; + file << "}\n"; + } } -bool GenerateThunkLibsActionFactory::runInvocation( - std::shared_ptr Invocation, clang::FileManager *Files, - std::shared_ptr PCHContainerOps, - clang::DiagnosticConsumer *DiagConsumer) { +bool GenerateThunkLibsActionFactory::runInvocation(std::shared_ptr Invocation, clang::FileManager* Files, + std::shared_ptr PCHContainerOps, + clang::DiagnosticConsumer* DiagConsumer) { clang::CompilerInstance Compiler(std::move(PCHContainerOps)); Compiler.setInvocation(std::move(Invocation)); Compiler.setFileManager(Files); @@ -800,8 +805,9 @@ bool GenerateThunkLibsActionFactory::runInvocation( GenerateThunkLibsAction Action(libname, output_filenames, abi); Compiler.createDiagnostics(DiagConsumer, false); - if (!Compiler.hasDiagnostics()) + if (!Compiler.hasDiagnostics()) { return false; + } Compiler.createSourceManager(*Files); diff --git a/ThunkLibs/Generator/main.cpp b/ThunkLibs/Generator/main.cpp index d238fc1ea1..a378cc0902 100644 --- a/ThunkLibs/Generator/main.cpp +++ b/ThunkLibs/Generator/main.cpp @@ -11,81 +11,81 @@ using namespace clang::tooling; void print_usage(const char* program_name) { - std::cerr << "Usage: " << program_name << " -- \n"; + std::cerr << "Usage: " << program_name << " -- \n"; } int main(int argc, char* const argv[]) { - llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); - - if (argc < 5) { - print_usage(argv[0]); - return EXIT_FAILURE; - } - - // Parse compile flags after "--" (this updates argc to the index of the "--" separator) - std::string error; - auto compile_db = FixedCompilationDatabase::loadFromCommandLine(argc, argv, error); - if (!compile_db) { - print_usage(argv[0]); - std::cerr << "\nError: " << error << "\n"; - return EXIT_FAILURE; - } - - // Process arguments before the "--" separator - if (argc != 5 && argc != 6) { - print_usage(argv[0]); - return EXIT_FAILURE; - } - - char* const* arg = argv + 1; - const auto filename = *arg++; - const std::string libname = *arg++; - const std::string target_abi = *arg++; - const std::string output_filename = *arg++; - - OutputFilenames output_filenames; - if (target_abi == "-host") { - output_filenames.host = output_filename; - } else if (target_abi == "-guest") { - output_filenames.guest = output_filename; - } else { - std::cerr << "Unrecognized generator target ABI \"" << target_abi << "\"\n"; - return EXIT_FAILURE; - } - - ClangTool Tool(*compile_db, { filename }); - if (CLANG_RESOURCE_DIR[0] != 0) { - auto set_resource_directory = [](const clang::tooling::CommandLineArguments &Args, clang::StringRef) { - clang::tooling::CommandLineArguments AdjustedArgs = Args; - AdjustedArgs.push_back(std::string { "-resource-dir=" } + CLANG_RESOURCE_DIR); - return AdjustedArgs; - }; - Tool.appendArgumentsAdjuster(set_resource_directory); - } - - ClangTool GuestTool = Tool; - - { - const bool is_32bit_guest = (argv[5] == std::string_view { "-for-32bit-guest" }); - auto append_guest_args = [is_32bit_guest](const clang::tooling::CommandLineArguments &Args, clang::StringRef) { - clang::tooling::CommandLineArguments AdjustedArgs = Args; - const char* platform = is_32bit_guest ? "i686" : "x86_64"; - if (is_32bit_guest) { - AdjustedArgs.push_back("-m32"); - AdjustedArgs.push_back("-DIS_32BIT_THUNK"); - } - AdjustedArgs.push_back(std::string { "--target=" } + platform + "-linux-unknown"); - AdjustedArgs.push_back("-isystem"); - AdjustedArgs.push_back(std::string { "/usr/" } + platform + "-linux-gnu/include/"); - AdjustedArgs.push_back("-DGUEST_THUNK_LIBRARY"); - return AdjustedArgs; - }; - GuestTool.appendArgumentsAdjuster(append_guest_args); - } - - auto data_layout_analysis_factory = std::make_unique(); - GuestTool.run(data_layout_analysis_factory.get()); - auto& data_layout = data_layout_analysis_factory->GetDataLayout(); - - return Tool.run(std::make_unique(std::move(libname), std::move(output_filenames), data_layout).get()); + llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); + + if (argc < 5) { + print_usage(argv[0]); + return EXIT_FAILURE; + } + + // Parse compile flags after "--" (this updates argc to the index of the "--" separator) + std::string error; + auto compile_db = FixedCompilationDatabase::loadFromCommandLine(argc, argv, error); + if (!compile_db) { + print_usage(argv[0]); + std::cerr << "\nError: " << error << "\n"; + return EXIT_FAILURE; + } + + // Process arguments before the "--" separator + if (argc != 5 && argc != 6) { + print_usage(argv[0]); + return EXIT_FAILURE; + } + + char* const* arg = argv + 1; + const auto filename = *arg++; + const std::string libname = *arg++; + const std::string target_abi = *arg++; + const std::string output_filename = *arg++; + + OutputFilenames output_filenames; + if (target_abi == "-host") { + output_filenames.host = output_filename; + } else if (target_abi == "-guest") { + output_filenames.guest = output_filename; + } else { + std::cerr << "Unrecognized generator target ABI \"" << target_abi << "\"\n"; + return EXIT_FAILURE; + } + + ClangTool Tool(*compile_db, {filename}); + if (CLANG_RESOURCE_DIR[0] != 0) { + auto set_resource_directory = [](const clang::tooling::CommandLineArguments& Args, clang::StringRef) { + clang::tooling::CommandLineArguments AdjustedArgs = Args; + AdjustedArgs.push_back(std::string {"-resource-dir="} + CLANG_RESOURCE_DIR); + return AdjustedArgs; + }; + Tool.appendArgumentsAdjuster(set_resource_directory); + } + + ClangTool GuestTool = Tool; + + { + const bool is_32bit_guest = (argv[5] == std::string_view {"-for-32bit-guest"}); + auto append_guest_args = [is_32bit_guest](const clang::tooling::CommandLineArguments& Args, clang::StringRef) { + clang::tooling::CommandLineArguments AdjustedArgs = Args; + const char* platform = is_32bit_guest ? "i686" : "x86_64"; + if (is_32bit_guest) { + AdjustedArgs.push_back("-m32"); + AdjustedArgs.push_back("-DIS_32BIT_THUNK"); + } + AdjustedArgs.push_back(std::string {"--target="} + platform + "-linux-unknown"); + AdjustedArgs.push_back("-isystem"); + AdjustedArgs.push_back(std::string {"/usr/"} + platform + "-linux-gnu/include/"); + AdjustedArgs.push_back("-DGUEST_THUNK_LIBRARY"); + return AdjustedArgs; + }; + GuestTool.appendArgumentsAdjuster(append_guest_args); + } + + auto data_layout_analysis_factory = std::make_unique(); + GuestTool.run(data_layout_analysis_factory.get()); + auto& data_layout = data_layout_analysis_factory->GetDataLayout(); + + return Tool.run(std::make_unique(std::move(libname), std::move(output_filenames), data_layout).get()); } diff --git a/ThunkLibs/libEGL/libEGL_Guest.cpp b/ThunkLibs/libEGL/libEGL_Guest.cpp index 78f4890def..c157f00ebc 100644 --- a/ThunkLibs/libEGL/libEGL_Guest.cpp +++ b/ThunkLibs/libEGL/libEGL_Guest.cpp @@ -19,10 +19,10 @@ typedef void voidFunc(); extern "C" { - voidFunc *eglGetProcAddress(const char *procname) { - // TODO: Fix this HACK - return glXGetProcAddress((const GLubyte*)procname); - } +voidFunc* eglGetProcAddress(const char* procname) { + // TODO: Fix this HACK + return glXGetProcAddress((const GLubyte*)procname); +} } LOAD_LIB(libEGL) diff --git a/ThunkLibs/libEGL/libEGL_interface.cpp b/ThunkLibs/libEGL/libEGL_interface.cpp index c9bb2d1cc6..27de16df36 100644 --- a/ThunkLibs/libEGL/libEGL_interface.cpp +++ b/ThunkLibs/libEGL/libEGL_interface.cpp @@ -4,30 +4,48 @@ template struct fex_gen_config { - unsigned version = 1; + unsigned version = 1; }; // Function, parameter index, parameter type [optional] template struct fex_gen_param {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; // EGLNativeDisplayType is a pointer to opaque data (wl_display/(X)Display/...) -template<> struct fex_gen_config {}; -template<> struct fex_gen_param : fexgen::assume_compatible_data_layout {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_param : fexgen::assume_compatible_data_layout {}; diff --git a/ThunkLibs/libGL/libGL_Guest.cpp b/ThunkLibs/libGL/libGL_Guest.cpp index 8c21af5411..3d09be3f77 100644 --- a/ThunkLibs/libGL/libGL_Guest.cpp +++ b/ThunkLibs/libGL/libGL_Guest.cpp @@ -30,48 +30,46 @@ typedef void voidFunc(); // Maps OpenGL API function names to the address of a guest function which is // linked to the corresponding host function pointer -const std::unordered_map HostPtrInvokers = - std::invoke([]() { +const std::unordered_map HostPtrInvokers = std::invoke([]() { #define PAIR(name, unused) Ret[#name] = reinterpret_cast(GetCallerForHostFunction(name)); - std::unordered_map Ret; - FOREACH_internal_SYMBOL(PAIR); - return Ret; + std::unordered_map Ret; + FOREACH_internal_SYMBOL(PAIR); + return Ret; #undef PAIR - }); +}); extern "C" { - voidFunc *glXGetProcAddress(const GLubyte *procname) { - auto Ret = fexfn_pack_glXGetProcAddress(procname); - if (!Ret) { - return nullptr; - } - - auto TargetFuncIt = HostPtrInvokers.find(reinterpret_cast(procname)); - if (TargetFuncIt == HostPtrInvokers.end()) { - std::string_view procname_s { reinterpret_cast(procname) }; - // If glXGetProcAddress is querying itself, then we can just return itself. - // Some games do this for unknown reasons. - if (procname_s == "glXGetProcAddress" || - procname_s == "glXGetProcAddressARB") { - return reinterpret_cast(glXGetProcAddress); - } +voidFunc* glXGetProcAddress(const GLubyte* procname) { + auto Ret = fexfn_pack_glXGetProcAddress(procname); + if (!Ret) { + return nullptr; + } - // Extension found in host but not in our interface definition => Not fatal but warn about it - // Some games query leaked GLES symbols but don't use them - // glFrustrumf : ES 1.x function - // - Papers, Please - // - Dicey Dungeons - fprintf(stderr, "glXGetProcAddress: not found %s\n", procname); - return nullptr; + auto TargetFuncIt = HostPtrInvokers.find(reinterpret_cast(procname)); + if (TargetFuncIt == HostPtrInvokers.end()) { + std::string_view procname_s {reinterpret_cast(procname)}; + // If glXGetProcAddress is querying itself, then we can just return itself. + // Some games do this for unknown reasons. + if (procname_s == "glXGetProcAddress" || procname_s == "glXGetProcAddressARB") { + return reinterpret_cast(glXGetProcAddress); } - LinkAddressToFunction((uintptr_t)Ret, TargetFuncIt->second); - return Ret; + // Extension found in host but not in our interface definition => Not fatal but warn about it + // Some games query leaked GLES symbols but don't use them + // glFrustrumf : ES 1.x function + // - Papers, Please + // - Dicey Dungeons + fprintf(stderr, "glXGetProcAddress: not found %s\n", procname); + return nullptr; } - voidFunc *glXGetProcAddressARB(const GLubyte *procname) { - return glXGetProcAddress(procname); - } + LinkAddressToFunction((uintptr_t)Ret, TargetFuncIt->second); + return Ret; +} + +voidFunc* glXGetProcAddressARB(const GLubyte* procname) { + return glXGetProcAddress(procname); +} } // libGL.so must pull in libX11.so as a dependency. Referencing some libX11 diff --git a/ThunkLibs/libGL/libGL_Host.cpp b/ThunkLibs/libGL/libGL_Host.cpp index a35f4bb335..aad50da4ac 100644 --- a/ThunkLibs/libGL/libGL_Host.cpp +++ b/ThunkLibs/libGL/libGL_Host.cpp @@ -25,7 +25,7 @@ desc: Uses glXGetProcAddress instead of dlsym #include "thunkgen_host_libGL.inl" void* symbolFromGlXGetProcAddr(void*, const char* name) { - return (void*)glXGetProcAddress((const GLubyte*)name); + return (void*)glXGetProcAddress((const GLubyte*)name); } EXPORTS(libGL) diff --git a/ThunkLibs/libGL/libGL_interface.cpp b/ThunkLibs/libGL/libGL_interface.cpp index d05c0cbab4..856dc2b26b 100644 --- a/ThunkLibs/libGL/libGL_interface.cpp +++ b/ThunkLibs/libGL/libGL_interface.cpp @@ -15,3146 +15,6255 @@ template struct fex_gen_config { - unsigned version = 1; + unsigned version = 1; }; -template<> struct fex_gen_config : fexgen::custom_guest_entrypoint, fexgen::returns_guest_pointer {}; +template<> +struct fex_gen_config : fexgen::custom_guest_entrypoint, fexgen::returns_guest_pointer {}; template struct fex_gen_type {}; -template<> struct fex_gen_type> : fexgen::opaque_type {}; -template<> struct fex_gen_type> : fexgen::opaque_type {}; -template<> struct fex_gen_type> : fexgen::opaque_type {}; +template<> +struct fex_gen_type> : fexgen::opaque_type {}; +template<> +struct fex_gen_type> : fexgen::opaque_type {}; +template<> +struct fex_gen_type> : fexgen::opaque_type {}; // NOTE: These should be opaque, but actually aren't because the respective libraries aren't thunked -template<> struct fex_gen_type<_cl_context> : fexgen::opaque_type {}; -template<> struct fex_gen_type<_cl_event> : fexgen::opaque_type {}; +template<> +struct fex_gen_type<_cl_context> : fexgen::opaque_type {}; +template<> +struct fex_gen_type<_cl_event> : fexgen::opaque_type {}; // Opaque for the purpose of libGL -template<> struct fex_gen_type<_XDisplay> : fexgen::opaque_type {}; +template<> +struct fex_gen_type<_XDisplay> : fexgen::opaque_type {}; #ifndef IS_32BIT_THUNK // TODO: These are largely compatible, *but* contain function pointer members that need adjustment! -template<> struct fex_gen_type : fexgen::assume_compatible_data_layout {}; +template<> +struct fex_gen_type : fexgen::assume_compatible_data_layout {}; #endif // Symbols queryable through glXGetProcAddr namespace internal { template -struct fex_gen_config : fexgen::generate_guest_symtable, fexgen::indirect_guest_calls { -}; +struct fex_gen_config : fexgen::generate_guest_symtable, fexgen::indirect_guest_calls {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; -//template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; +// template<> struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config : fexgen::callback_stub {}; -template<> struct fex_gen_config : fexgen::callback_stub {}; -template<> struct fex_gen_config : fexgen::callback_stub {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config : fexgen::callback_stub {}; +template<> +struct fex_gen_config : fexgen::callback_stub {}; +template<> +struct fex_gen_config : fexgen::callback_stub {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; // GLext.h -//template<> struct fex_gen_config : fexgen::custom_guest_entrypoint, fexgen::returns_guest_pointer{}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; +// template<> struct fex_gen_config : fexgen::custom_guest_entrypoint, fexgen::returns_guest_pointer{}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; // glx.h -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; // glxext.h -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; -template<> struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; +template<> +struct fex_gen_config {}; } // namespace internal diff --git a/ThunkLibs/libSDL2/libSDL2_Guest.cpp b/ThunkLibs/libSDL2/libSDL2_Guest.cpp index 788a8d1adb..da362ea640 100644 --- a/ThunkLibs/libSDL2/libSDL2_Guest.cpp +++ b/ThunkLibs/libSDL2/libSDL2_Guest.cpp @@ -28,44 +28,72 @@ LOAD_LIB(libSDL2) struct __va_list_tag; -int SDL_snprintf(char*, size_t, const char*, ...) { return printf("SDL2: SDL_snprintf\n"); } -int SDL_sscanf(const char*, const char*, ...) { return printf("SDL2: SDL_sscanf\n"); } -void SDL_Log(const char*, ...) { printf("SDL2: SDL_Log\n"); } -void SDL_LogCritical(int, const char*, ...) { printf("SDL2: SDL_LogCritical\n"); } -void SDL_LogDebug(int, const char*, ...) { printf("SDL2: SDL_LogDebug\n"); } -void SDL_LogError(int, const char*, ...) { printf("SDL2: SDL_LogError\n"); } -void SDL_LogInfo(int, const char*, ...) { printf("SDL2: SDL_LogInfo\n"); } -void SDL_LogMessage(int, SDL_LogPriority, const char*, ...) { printf("SDL2: SDL_LogMessage\n"); } -void SDL_LogVerbose(int, const char*, ...) { printf("SDL2: SDL_LogVerbose\n"); } -void SDL_LogWarn(int, const char*, ...) { printf("SDL2: SDL_LogWarn\n"); } -int SDL_SetError(const char*, ...) { return printf("SDL2: SDL_SetError\n"); } +int SDL_snprintf(char*, size_t, const char*, ...) { + return printf("SDL2: SDL_snprintf\n"); +} +int SDL_sscanf(const char*, const char*, ...) { + return printf("SDL2: SDL_sscanf\n"); +} +void SDL_Log(const char*, ...) { + printf("SDL2: SDL_Log\n"); +} +void SDL_LogCritical(int, const char*, ...) { + printf("SDL2: SDL_LogCritical\n"); +} +void SDL_LogDebug(int, const char*, ...) { + printf("SDL2: SDL_LogDebug\n"); +} +void SDL_LogError(int, const char*, ...) { + printf("SDL2: SDL_LogError\n"); +} +void SDL_LogInfo(int, const char*, ...) { + printf("SDL2: SDL_LogInfo\n"); +} +void SDL_LogMessage(int, SDL_LogPriority, const char*, ...) { + printf("SDL2: SDL_LogMessage\n"); +} +void SDL_LogVerbose(int, const char*, ...) { + printf("SDL2: SDL_LogVerbose\n"); +} +void SDL_LogWarn(int, const char*, ...) { + printf("SDL2: SDL_LogWarn\n"); +} +int SDL_SetError(const char*, ...) { + return printf("SDL2: SDL_SetError\n"); +} -void SDL_LogMessageV(int, SDL_LogPriority, const char*, __va_list_tag*) { printf("SDL2: SDL_LogMessageV\n");} -int SDL_vsnprintf(char*, size_t, const char*, __va_list_tag*) { return printf("SDL2: SDL_vsnprintf\n");} -int SDL_vsscanf(const char*, const char*, __va_list_tag*) { return printf("SDL2: SDL_vsscanf\n");} +void SDL_LogMessageV(int, SDL_LogPriority, const char*, __va_list_tag*) { + printf("SDL2: SDL_LogMessageV\n"); +} +int SDL_vsnprintf(char*, size_t, const char*, __va_list_tag*) { + return printf("SDL2: SDL_vsnprintf\n"); +} +int SDL_vsscanf(const char*, const char*, __va_list_tag*) { + return printf("SDL2: SDL_vsscanf\n"); +} extern "C" { - void* SDL_GL_GetProcAddress(const char* name) { - // TODO: Fix this HACK - return (void*)glXGetProcAddress((const GLubyte*)name); - } +void* SDL_GL_GetProcAddress(const char* name) { + // TODO: Fix this HACK + return (void*)glXGetProcAddress((const GLubyte*)name); +} - // TODO: These are not 100% conforming to SDL either - void *SDL_LoadObject(const char *sofile) { - auto lib = dlopen(sofile, RTLD_NOW | RTLD_LOCAL); - if (!lib) { - printf("SDL_LoadObject: Failed to load %s\n", sofile); - } - return lib; - } +// TODO: These are not 100% conforming to SDL either +void* SDL_LoadObject(const char* sofile) { + auto lib = dlopen(sofile, RTLD_NOW | RTLD_LOCAL); + if (!lib) { + printf("SDL_LoadObject: Failed to load %s\n", sofile); + } + return lib; +} - void *SDL_LoadFunction(void *lib, const char *name) { - return dlsym(lib, name); - } +void* SDL_LoadFunction(void* lib, const char* name) { + return dlsym(lib, name); +} - void SDL_UnloadObject(void *lib) { - if (lib) { - dlclose(lib); - } - } +void SDL_UnloadObject(void* lib) { + if (lib) { + dlclose(lib); + } +} } diff --git a/ThunkLibs/libVDSO/libVDSO_Guest.cpp b/ThunkLibs/libVDSO/libVDSO_Guest.cpp index f3ede2e3ec..b8ce3d6cb4 100644 --- a/ThunkLibs/libVDSO/libVDSO_Guest.cpp +++ b/ThunkLibs/libVDSO/libVDSO_Guest.cpp @@ -19,28 +19,26 @@ desc: Linux VDSO thunking #include "thunkgen_guest_libVDSO.inl" extern "C" { -time_t __vdso_time(time_t *tloc) __attribute__((alias("fexfn_pack_time"))); -int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) __attribute__((alias("fexfn_pack_gettimeofday"))); -int __vdso_clock_gettime(clockid_t, struct timespec *) __attribute__((alias("fexfn_pack_clock_gettime"))); -int __vdso_clock_getres(clockid_t, struct timespec *) __attribute__((alias("fexfn_pack_clock_getres"))); -int __vdso_getcpu(uint32_t *, uint32_t *) __attribute__((alias("fexfn_pack_getcpu"))); +time_t __vdso_time(time_t* tloc) __attribute__((alias("fexfn_pack_time"))); +int __vdso_gettimeofday(struct timeval* tv, struct timezone* tz) __attribute__((alias("fexfn_pack_gettimeofday"))); +int __vdso_clock_gettime(clockid_t, struct timespec*) __attribute__((alias("fexfn_pack_clock_gettime"))); +int __vdso_clock_getres(clockid_t, struct timespec*) __attribute__((alias("fexfn_pack_clock_getres"))); +int __vdso_getcpu(uint32_t*, uint32_t*) __attribute__((alias("fexfn_pack_getcpu"))); #if __SIZEOF_POINTER__ == 4 -int __vdso_clock_gettime64(clockid_t, struct timespec64 *) __attribute__((alias("fexfn_pack_clock_gettime64"))); +int __vdso_clock_gettime64(clockid_t, struct timespec64*) __attribute__((alias("fexfn_pack_clock_gettime64"))); -__attribute__((naked)) -int __kernel_vsyscall() { +__attribute__((naked)) int __kernel_vsyscall() { asm volatile(R"( .intel_syntax noprefix int 0x80; ret; .att_syntax prefix - )" - ::: "memory"); + )" :: + : "memory"); } -__attribute__((naked)) -void __kernel_sigreturn() { +__attribute__((naked)) void __kernel_sigreturn() { asm volatile(R"( .intel_syntax noprefix pop eax; @@ -48,18 +46,17 @@ void __kernel_sigreturn() { int 0x80; nop; .att_syntax prefix - )" - ::: "memory"); + )" :: + : "memory"); } -__attribute__((naked)) -void __kernel_rt_sigreturn() { +__attribute__((naked)) void __kernel_rt_sigreturn() { asm volatile(R"( .intel_syntax noprefix mov eax, 0xad; int 0x80; .att_syntax prefix - )" - ::: "memory"); + )" :: + : "memory"); } #endif diff --git a/ThunkLibs/libVDSO/libVDSO_interface.cpp b/ThunkLibs/libVDSO/libVDSO_interface.cpp index 6d123cee6b..ac924cd2f7 100644 --- a/ThunkLibs/libVDSO/libVDSO_interface.cpp +++ b/ThunkLibs/libVDSO/libVDSO_interface.cpp @@ -7,16 +7,21 @@ #include "Types.h" template -struct fex_gen_config { -}; +struct fex_gen_config {}; -template<> struct fex_gen_config