Skip to content

Commit

Permalink
x64: Implement SIMD fma (#4474)
Browse files Browse the repository at this point in the history
* x64: Add VEX Instruction Encoder

This uses a similar builder pattern to the EVEX Encoder.
Does not yet support memory accesses.

* x64: Add FMA Flag

* x64: Implement SIMD `fma`

* x64: Use 4 register Vex Inst

* x64: Reorder VEX pretty print args
  • Loading branch information
afonso360 authored Jul 25, 2022
1 parent 4aaf7ff commit 02c3b47
Show file tree
Hide file tree
Showing 15 changed files with 640 additions and 3 deletions.
9 changes: 8 additions & 1 deletion cranelift/codegen/meta/src/isa/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ fn define_settings(shared: &SettingGroup) -> SettingGroup {
"AVX2: CPUID.07H:EBX.AVX2[bit 5]",
false,
);
let has_fma = settings.add_bool(
"has_fma",
"Has support for FMA.",
"FMA: CPUID.01H:ECX.FMA[bit 12]",
false,
);
let has_avx512bitalg = settings.add_bool(
"has_avx512bitalg",
"Has support for AVX512BITALG.",
Expand Down Expand Up @@ -116,6 +122,7 @@ fn define_settings(shared: &SettingGroup) -> SettingGroup {
settings.add_predicate("use_ssse3", predicate!(has_ssse3));
settings.add_predicate("use_sse41", predicate!(has_sse41));
settings.add_predicate("use_sse42", predicate!(has_sse41 && has_sse42));
settings.add_predicate("use_fma", predicate!(has_avx && has_fma));

settings.add_predicate(
"use_ssse3_simd",
Expand Down Expand Up @@ -195,7 +202,7 @@ fn define_settings(shared: &SettingGroup) -> SettingGroup {
let broadwell = settings.add_preset(
"broadwell",
"Broadwell microarchitecture.",
preset!(haswell),
preset!(haswell && has_fma),
);
let skylake = settings.add_preset("skylake", "Skylake microarchitecture.", preset!(broadwell));
let cannonlake = settings.add_preset(
Expand Down
4 changes: 3 additions & 1 deletion cranelift/codegen/src/isa/x64/encoding/rex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ impl From<(OperandSize, Reg)> for RexFlags {
/// Allows using the same opcode byte in different "opcode maps" to allow for more instruction
/// encodings. See appendix A in the Intel Software Developer's Manual, volume 2A, for more details.
#[allow(missing_docs)]
#[derive(PartialEq)]
pub enum OpcodeMap {
None,
_0F,
Expand All @@ -168,7 +169,7 @@ pub enum OpcodeMap {

impl OpcodeMap {
/// Normally the opcode map is specified as bytes in the instruction, but some x64 encoding
/// formats pack this information as bits in a prefix (e.g. EVEX).
/// formats pack this information as bits in a prefix (e.g. VEX / EVEX).
pub(crate) fn bits(&self) -> u8 {
match self {
OpcodeMap::None => 0b00,
Expand All @@ -187,6 +188,7 @@ impl Default for OpcodeMap {

/// We may need to include one or more legacy prefix bytes before the REX prefix. This enum
/// covers only the small set of possibilities that we actually need.
#[derive(PartialEq)]
pub enum LegacyPrefixes {
/// No prefix bytes.
None,
Expand Down
Loading

0 comments on commit 02c3b47

Please sign in to comment.