Skip to content

Commit

Permalink
Merge pull request eclipse-omr#862 from 0dvictor/SIMD
Browse files Browse the repository at this point in the history
Introduce SIMD evaluators on X86
  • Loading branch information
0xdaryl authored Apr 12, 2017
2 parents f67fb94 + 1226e87 commit e30b6fa
Show file tree
Hide file tree
Showing 7 changed files with 252 additions and 26 deletions.
38 changes: 19 additions & 19 deletions compiler/x/amd64/codegen/TreeEvaluatorTable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@
TR::TreeEvaluator::unImpOpEvaluator, // TR::vnot
TR::TreeEvaluator::unImpOpEvaluator, // TR::vselect
TR::TreeEvaluator::unImpOpEvaluator, // TR::vperm
TR::TreeEvaluator::unImpOpEvaluator, // TR::vsplats
TR::TreeEvaluator::SIMDsplatsEvaluator, // TR::vsplats
TR::TreeEvaluator::unImpOpEvaluator, // TR::vdmergel
TR::TreeEvaluator::unImpOpEvaluator, // TR::vdmergeh
TR::TreeEvaluator::unImpOpEvaluator, // TR::vdsetelem
Expand Down Expand Up @@ -459,9 +459,9 @@
TR::TreeEvaluator::unImpOpEvaluator, // TR::vdec
TR::TreeEvaluator::unImpOpEvaluator, // TR::vneg
TR::TreeEvaluator::unImpOpEvaluator, // TR::vcom
TR::TreeEvaluator::unImpOpEvaluator, // TR::vadd
TR::TreeEvaluator::FloatingPointAndVectorBinaryArithmeticEvaluator, // TR::vadd
TR::TreeEvaluator::unImpOpEvaluator, // TR::vsub
TR::TreeEvaluator::unImpOpEvaluator, // TR::vmul
TR::TreeEvaluator::FloatingPointAndVectorBinaryArithmeticEvaluator, // TR::vmul
TR::TreeEvaluator::unImpOpEvaluator, // TR::vdiv
TR::TreeEvaluator::unImpOpEvaluator, // TR::vrem
TR::TreeEvaluator::unImpOpEvaluator, // TR::vand
Expand All @@ -480,10 +480,10 @@
TR::TreeEvaluator::unImpOpEvaluator, // TR::vucmple
TR::TreeEvaluator::unImpOpEvaluator, // TR::vcmpge
TR::TreeEvaluator::unImpOpEvaluator, // TR::vucmpge
TR::TreeEvaluator::unImpOpEvaluator, // TR::vload
TR::TreeEvaluator::unImpOpEvaluator, // TR::vloadi
TR::TreeEvaluator::unImpOpEvaluator, // TR::vstore
TR::TreeEvaluator::unImpOpEvaluator, // TR::vstorei
TR::TreeEvaluator::SIMDloadEvaluator, // TR::vload
TR::TreeEvaluator::SIMDloadEvaluator, // TR::vloadi
TR::TreeEvaluator::SIMDstoreEvaluator, // TR::vstore
TR::TreeEvaluator::SIMDstoreEvaluator, // TR::vstorei
TR::TreeEvaluator::unImpOpEvaluator, // TR::vrand
TR::TreeEvaluator::unImpOpEvaluator, // TR::vreturn
TR::TreeEvaluator::unImpOpEvaluator, // TR::vcall
Expand All @@ -494,18 +494,18 @@
TR::TreeEvaluator::unImpOpEvaluator, // TR::getvelem
TR::TreeEvaluator::unImpOpEvaluator, // TR::vsetelem

TR::TreeEvaluator::unImpOpEvaluator, // TR::vbRegLoad
TR::TreeEvaluator::unImpOpEvaluator, // TR::vsRegLoad
TR::TreeEvaluator::unImpOpEvaluator, // TR::viRegLoad
TR::TreeEvaluator::unImpOpEvaluator, // TR::vlRegLoad
TR::TreeEvaluator::unImpOpEvaluator, // TR::vfRegLoad
TR::TreeEvaluator::unImpOpEvaluator, // TR::vdRegLoad
TR::TreeEvaluator::unImpOpEvaluator, // TR::vbRegStore
TR::TreeEvaluator::unImpOpEvaluator, // TR::vsRegStore
TR::TreeEvaluator::unImpOpEvaluator, // TR::viRegStore
TR::TreeEvaluator::unImpOpEvaluator, // TR::vlRegStore
TR::TreeEvaluator::unImpOpEvaluator, // TR::vfRegStore
TR::TreeEvaluator::unImpOpEvaluator, // TR::vdRegStore
TR::TreeEvaluator::SIMDRegLoadEvaluator, // TR::vbRegLoad
TR::TreeEvaluator::SIMDRegLoadEvaluator, // TR::vsRegLoad
TR::TreeEvaluator::SIMDRegLoadEvaluator, // TR::viRegLoad
TR::TreeEvaluator::SIMDRegLoadEvaluator, // TR::vlRegLoad
TR::TreeEvaluator::SIMDRegLoadEvaluator, // TR::vfRegLoad
TR::TreeEvaluator::SIMDRegLoadEvaluator, // TR::vdRegLoad
TR::TreeEvaluator::SIMDRegStoreEvaluator, // TR::vbRegStore
TR::TreeEvaluator::SIMDRegStoreEvaluator, // TR::vsRegStore
TR::TreeEvaluator::SIMDRegStoreEvaluator, // TR::viRegStore
TR::TreeEvaluator::SIMDRegStoreEvaluator, // TR::vlRegStore
TR::TreeEvaluator::SIMDRegStoreEvaluator, // TR::vfRegStore
TR::TreeEvaluator::SIMDRegStoreEvaluator, // TR::vdRegStore

/*
*END OF OPCODES REQUIRED BY OMR
Expand Down
61 changes: 61 additions & 0 deletions compiler/x/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5262,3 +5262,64 @@ OMR::X86::TreeEvaluator::ibyteswapEvaluator(TR::Node *node, TR::CodeGenerator *c
cg->decReferenceCount(child);
return target;
}

enum BinaryArithmeticOps : uint32_t
{
BinaryArithmeticInvalid,
BinaryArithmeticAdd,
BinaryArithmeticSub,
BinaryArithmeticMul,
BinaryArithmeticDiv,
NumBinaryArithmeticOps
};
static const TR_X86OpCodes BinaryArithmeticOpCodes[TR::NumOMRTypes][NumBinaryArithmeticOps] =
{
// Invalid, Add, Sub, Mul, Div
{ BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op }, // NoType
{ BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op }, // Int8
{ BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op }, // Int16
{ BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op }, // Int32
{ BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op }, // Int64
{ BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op }, // Float
{ BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op }, // Double
{ BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op }, // Address
{ BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op }, // VectorInt8
{ BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op }, // VectorInt16
{ BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op }, // VectorInt32
{ BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op }, // VectorInt64
{ BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op }, // VectorFloat
{ BADIA32Op, ADDPDRegReg, BADIA32Op, MULPDRegReg, BADIA32Op }, // VectorDouble
{ BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op, BADIA32Op }, // Aggregate
};
// For ILOpCode that can be translated to single SSE/AVX instructions
TR::Register* OMR::X86::TreeEvaluator::FloatingPointAndVectorBinaryArithmeticEvaluator(TR::Node* node, TR::CodeGenerator* cg)
{
auto arithmetic = BinaryArithmeticInvalid;
switch (node->getOpCodeValue())
{
case TR::vadd:
arithmetic = BinaryArithmeticAdd;
break;
case TR::vmul:
arithmetic = BinaryArithmeticMul;
break;
default:
TR_ASSERT(false, "Unsupported OpCode");
}
TR::Node* operandNode0 = node->getChild(0);
TR::Node* operandNode1 = node->getChild(1);
TR::Register* operandReg0 = cg->evaluate(operandNode0);
TR::Register* operandReg1 = cg->evaluate(operandNode1);

TR::Register* resultReg = cg->allocateRegister(operandReg0->getKind());
generateRegRegInstruction(MOVDQURegReg, node, resultReg, operandReg0, cg);

TR_X86OpCodes opCode = BinaryArithmeticOpCodes[node->getDataType()][arithmetic];
TR_ASSERT(opCode != BADIA32Op, "FloatingPointAndVectorBinaryArithmeticEvaluator: unsupported data type or arithmetic.");
generateRegRegInstruction(opCode, node, resultReg, operandReg1, cg);

node->setRegister(resultReg);
cg->decReferenceCount(operandNode0);
cg->decReferenceCount(operandNode1);
return resultReg;
}
10 changes: 10 additions & 0 deletions compiler/x/codegen/OMRTreeEvaluator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,16 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator
// routines for floating point values that can fit in one GPR
static TR::Register *floatingPointStoreEvaluator(TR::Node *node, TR::CodeGenerator *cg);

// For ILOpCode that can be translated to single SSE/AVX instructions
static TR::Register *FloatingPointAndVectorBinaryArithmeticEvaluator(TR::Node *node, TR::CodeGenerator *cg);

// SIMD evaluators
static TR::Register *SIMDRegLoadEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *SIMDRegStoreEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *SIMDloadEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *SIMDstoreEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *SIMDsplatsEvaluator(TR::Node *node, TR::CodeGenerator *cg);

static TR::Register *icmpsetEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *bztestnsetEvaluator(TR::Node *node, TR::CodeGenerator *cg);

Expand Down
153 changes: 153 additions & 0 deletions compiler/x/codegen/SIMDTreeEvaluator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/*******************************************************************************
*
* (c) Copyright IBM Corp. 2017, 2017
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Public License v1.0 and
* Apache License v2.0 which accompanies this distribution.
*
* The Eclipse Public License is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* The Apache License v2.0 is available at
* http://www.opensource.org/licenses/apache2.0.php
*
* Contributors:
* Multiple authors (IBM Corp.) - initial implementation and documentation
*******************************************************************************/

#include "codegen/CodeGenerator.hpp" // for CodeGenerator, etc
#include "codegen/MemoryReference.hpp"
#include "codegen/TreeEvaluator.hpp"
#include "il/ILOpCodes.hpp" // for ILOpCodes, etc
#include "il/ILOps.hpp" // for ILOpCode
#include "il/Node.hpp" // for Node, etc
#include "il/Node_inlines.hpp"
#include "infra/Assert.hpp" // for TR_ASSERT
#include "x/codegen/X86Instruction.hpp"
#include "x/codegen/X86Ops.hpp" // for ::LABEL, ::JE4, etc

namespace TR { class Instruction; }

static TR::MemoryReference* ConvertToPatchableMemoryReference(TR::MemoryReference* mr, TR::Node* node, TR::CodeGenerator* cg)
{
if (mr->getSymbolReference().isUnresolved())
{
// The load instructions may be wider than 8-bytes (our patching window)
// but we won't know that for sure until after register assignment.
// Hence, the unresolved memory reference must be evaluated into a register first.
//
TR::Register* tempReg = cg->allocateRegister();
generateRegMemInstruction(LEARegMem(cg), node, tempReg, mr, cg);
mr = generateX86MemoryReference(tempReg, 0, cg);
cg->stopUsingRegister(tempReg);
}
return mr;
}

TR::Register* OMR::X86::TreeEvaluator::SIMDRegLoadEvaluator(TR::Node* node, TR::CodeGenerator* cg)
{
TR::Register* globalReg = node->getRegister();
if (!globalReg)
{
globalReg = cg->allocateRegister(TR_VRF);
node->setRegister(globalReg);
}
return globalReg;
}

TR::Register* OMR::X86::TreeEvaluator::SIMDRegStoreEvaluator(TR::Node* node, TR::CodeGenerator* cg)
{
TR::Node* child = node->getFirstChild();
TR::Register* globalReg = cg->evaluate(child);
cg->machine()->setXMMGlobalRegister(node->getGlobalRegisterNumber() - cg->machine()->getNumGlobalGPRs(), globalReg);
cg->decReferenceCount(child);
return globalReg;
}

TR::Register* OMR::X86::TreeEvaluator::SIMDloadEvaluator(TR::Node* node, TR::CodeGenerator* cg)
{
TR::MemoryReference* tempMR = generateX86MemoryReference(node, cg);
tempMR = ConvertToPatchableMemoryReference(tempMR, node, cg);
TR::Register* resultReg = cg->allocateRegister(TR_VRF);

TR_X86OpCodes opCode = BADIA32Op;
switch (node->getSize())
{
case 16:
opCode = MOVDQURegMem;
break;
default:
if (cg->comp()->getOption(TR_TraceCG))
traceMsg(cg->comp(), "Unsupported fill size: Node = %p\n", node);
TR_ASSERT(false, "Unsupported fill size");
break;
}

TR::Instruction* instr = generateRegMemInstruction(opCode, node, resultReg, tempMR, cg);
if (node->getOpCode().isIndirect())
cg->setImplicitExceptionPoint(instr);
node->setRegister(resultReg);
tempMR->decNodeReferenceCounts(cg);
return resultReg;
}

TR::Register* OMR::X86::TreeEvaluator::SIMDstoreEvaluator(TR::Node* node, TR::CodeGenerator* cg)
{
TR::Node* valueNode = node->getChild(node->getOpCode().isIndirect() ? 1 : 0);
TR::MemoryReference* tempMR = generateX86MemoryReference(node, cg);
tempMR = ConvertToPatchableMemoryReference(tempMR, node, cg);
TR::Register* valueReg = cg->evaluate(valueNode);

TR_X86OpCodes opCode = BADIA32Op;
switch (node->getSize())
{
case 16:
opCode = MOVDQUMemReg;
break;
default:
if (cg->comp()->getOption(TR_TraceCG))
traceMsg(cg->comp(), "Unsupported fill size: Node = %p\n", node);
TR_ASSERT(false, "Unsupported fill size");
break;
}

TR::Instruction* instr = generateMemRegInstruction(opCode, node, tempMR, valueReg, cg);

cg->decReferenceCount(valueNode);
tempMR->decNodeReferenceCounts(cg);
if (node->getOpCode().isIndirect())
cg->setImplicitExceptionPoint(instr);
return NULL;
}

TR::Register* OMR::X86::TreeEvaluator::SIMDsplatsEvaluator(TR::Node* node, TR::CodeGenerator* cg)
{
TR::Node* childNode = node->getChild(0);
TR::Register* childReg = cg->evaluate(childNode);

uint8_t shufconst = 0;
switch (node->getDataType())
{
case TR::VectorInt32:
case TR::VectorFloat:
shufconst = 0x00; // 00 00 00 00 shuffle xxxA to AAAA
break;
case TR::VectorInt64:
case TR::VectorDouble:
shufconst = 0x44; // 01 00 01 00 shuffle xxBA to BABA
break;
default:
if (cg->comp()->getOption(TR_TraceCG))
traceMsg(cg->comp(), "Unsupported data type, Node = %p\n", node);
TR_ASSERT(false, "Unsupported data type");
break;
}

TR::Register* resultReg = cg->allocateRegister(TR_VRF);
generateRegRegImmInstruction(PSHUFDRegRegImm1, node, resultReg, childReg, shufconst, cg);

node->setRegister(resultReg);
cg->decReferenceCount(childNode);
return resultReg;
}
14 changes: 7 additions & 7 deletions compiler/x/i386/codegen/TreeEvaluatorTable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@
TR::TreeEvaluator::unImpOpEvaluator, // TR::vnot
TR::TreeEvaluator::unImpOpEvaluator, // TR::vselect
TR::TreeEvaluator::unImpOpEvaluator, // TR::vperm
TR::TreeEvaluator::unImpOpEvaluator, // TR::vsplats
TR::TreeEvaluator::SIMDsplatsEvaluator, // TR::vsplats
TR::TreeEvaluator::unImpOpEvaluator, // TR::vdmergel
TR::TreeEvaluator::unImpOpEvaluator, // TR::vdmergeh
TR::TreeEvaluator::unImpOpEvaluator, // TR::vdsetelem
Expand Down Expand Up @@ -460,9 +460,9 @@
TR::TreeEvaluator::unImpOpEvaluator, // TR::vdec
TR::TreeEvaluator::unImpOpEvaluator, // TR::vneg
TR::TreeEvaluator::unImpOpEvaluator, // TR::vcom
TR::TreeEvaluator::unImpOpEvaluator, // TR::vadd
TR::TreeEvaluator::FloatingPointAndVectorBinaryArithmeticEvaluator, // TR::vadd
TR::TreeEvaluator::unImpOpEvaluator, // TR::vsub
TR::TreeEvaluator::unImpOpEvaluator, // TR::vmul
TR::TreeEvaluator::FloatingPointAndVectorBinaryArithmeticEvaluator, // TR::vmul
TR::TreeEvaluator::unImpOpEvaluator, // TR::vdiv
TR::TreeEvaluator::unImpOpEvaluator, // TR::vrem
TR::TreeEvaluator::unImpOpEvaluator, // TR::vand
Expand All @@ -481,10 +481,10 @@
TR::TreeEvaluator::unImpOpEvaluator, // TR::vucmple
TR::TreeEvaluator::unImpOpEvaluator, // TR::vcmpge
TR::TreeEvaluator::unImpOpEvaluator, // TR::vucmpge
TR::TreeEvaluator::unImpOpEvaluator, // TR::vload
TR::TreeEvaluator::unImpOpEvaluator, // TR::vloadi
TR::TreeEvaluator::unImpOpEvaluator, // TR::vstore
TR::TreeEvaluator::unImpOpEvaluator, // TR::vstorei
TR::TreeEvaluator::SIMDloadEvaluator, // TR::vload
TR::TreeEvaluator::SIMDloadEvaluator, // TR::vloadi
TR::TreeEvaluator::SIMDstoreEvaluator, // TR::vstore
TR::TreeEvaluator::SIMDstoreEvaluator, // TR::vstorei
TR::TreeEvaluator::unImpOpEvaluator, // TR::vrand
TR::TreeEvaluator::unImpOpEvaluator, // TR::vreturn
TR::TreeEvaluator::unImpOpEvaluator, // TR::vcall
Expand Down
1 change: 1 addition & 0 deletions fvtest/compilertest/build/files/target/x.mk
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ JIT_PRODUCT_BACKEND_SOURCES+=\
$(JIT_OMR_DIRTY_DIR)/x/codegen/FPBinaryArithmeticAnalyser.cpp \
$(JIT_OMR_DIRTY_DIR)/x/codegen/FPCompareAnalyser.cpp \
$(JIT_OMR_DIRTY_DIR)/x/codegen/FPTreeEvaluator.cpp \
$(JIT_OMR_DIRTY_DIR)/x/codegen/SIMDTreeEvaluator.cpp \
$(JIT_OMR_DIRTY_DIR)/x/codegen/HelperCallSnippet.cpp \
$(JIT_OMR_DIRTY_DIR)/x/codegen/IA32LinkageUtils.cpp \
$(JIT_OMR_DIRTY_DIR)/x/codegen/IntegerMultiplyDecomposer.cpp \
Expand Down
1 change: 1 addition & 0 deletions jitbuilder/build/files/target/x.mk
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ JIT_PRODUCT_BACKEND_SOURCES+=\
$(JIT_OMR_DIRTY_DIR)/x/codegen/FPBinaryArithmeticAnalyser.cpp \
$(JIT_OMR_DIRTY_DIR)/x/codegen/FPCompareAnalyser.cpp \
$(JIT_OMR_DIRTY_DIR)/x/codegen/FPTreeEvaluator.cpp \
$(JIT_OMR_DIRTY_DIR)/x/codegen/SIMDTreeEvaluator.cpp \
$(JIT_OMR_DIRTY_DIR)/x/codegen/HelperCallSnippet.cpp \
$(JIT_OMR_DIRTY_DIR)/x/codegen/IA32LinkageUtils.cpp \
$(JIT_OMR_DIRTY_DIR)/x/codegen/IntegerMultiplyDecomposer.cpp \
Expand Down

0 comments on commit e30b6fa

Please sign in to comment.