-
Notifications
You must be signed in to change notification settings - Fork 705
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[spirv] Add SM 6.6 8-bit packed types and intrinsics #3325
Merged
Merged
Changes from 4 commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
7dcedff
[spirv] Support for SM 6.6 pack_u8 and pack_s8.
ehsannas 9ea1f5f
[spirv] Add support for SM 6.6 pack_clamp_{u|s}8.
ehsannas eefdfbc
[spirv] Support SM 6.6 unpack intrinsics.
ehsannas 9be3f32
[spirv] Remove unused variable.
ehsannas 37665a5
Remove comment.
ehsannas File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7452,6 +7452,20 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) { | |
} | ||
break; | ||
} | ||
case hlsl::IntrinsicOp::IOP_pack_s8: | ||
case hlsl::IntrinsicOp::IOP_pack_u8: | ||
case hlsl::IntrinsicOp::IOP_pack_clamp_s8: | ||
case hlsl::IntrinsicOp::IOP_pack_clamp_u8: { | ||
retVal = processIntrinsic8BitPack(callExpr, hlslOpcode); | ||
break; | ||
} | ||
case hlsl::IntrinsicOp::IOP_unpack_s8s16: | ||
case hlsl::IntrinsicOp::IOP_unpack_s8s32: | ||
case hlsl::IntrinsicOp::IOP_unpack_u8u16: | ||
case hlsl::IntrinsicOp::IOP_unpack_u8u32: { | ||
retVal = processIntrinsic8BitUnpack(callExpr, hlslOpcode); | ||
break; | ||
} | ||
// DXR raytracing intrinsics | ||
case hlsl::IntrinsicOp::IOP_DispatchRaysDimensions: | ||
case hlsl::IntrinsicOp::IOP_DispatchRaysIndex: | ||
|
@@ -9812,6 +9826,164 @@ SpirvEmitter::processIntrinsicLog10(const CallExpr *callExpr) { | |
return spvBuilder.createBinaryOp(scaleOp, returnType, log2, scale, loc); | ||
} | ||
|
||
SpirvInstruction * | ||
SpirvEmitter::processIntrinsic8BitPack(const CallExpr *callExpr, | ||
hlsl::IntrinsicOp op) { | ||
const auto loc = callExpr->getExprLoc(); | ||
assert(op == hlsl::IntrinsicOp::IOP_pack_s8 || | ||
op == hlsl::IntrinsicOp::IOP_pack_u8 || | ||
op == hlsl::IntrinsicOp::IOP_pack_clamp_s8 || | ||
op == hlsl::IntrinsicOp::IOP_pack_clamp_u8); | ||
|
||
// Here's the signature for the pack intrinsic operations: | ||
// | ||
// uint8_t4_packed pack_u8(uint32_t4 unpackedVal); | ||
// uint8_t4_packed pack_u8(uint16_t4 unpackedVal); | ||
// int8_t4_packed pack_s8(int32_t4 unpackedVal); | ||
// int8_t4_packed pack_s8(int16_t4 unpackedVal); | ||
// | ||
// These functions take a vec4 of 16-bit or 32-bit integers as input. For each | ||
// element of the vec4, they pick the lower 8 bits, and drop the other bits. | ||
// The result is four 8-bit values (32 bits in total) which are packed in an | ||
// unsigned uint32_t. | ||
// | ||
// | ||
// Here's the signature for the pack_clamp intrinsic operations: | ||
// | ||
// uint8_t4_packed pack_clamp_u8(int32_t4 val); // Pack and Clamp [0, 255] | ||
// uint8_t4_packed pack_clamp_u8(int16_t4 val); // Pack and Clamp [0, 255] | ||
// | ||
// int8_t4_packed pack_clamp_s8(int32_t4 val); // Pack and Clamp [-128, 127] | ||
// int8_t4_packed pack_clamp_s8(int16_t4 val); // Pack and Clamp [-128, 127] | ||
// | ||
// These functions take a vec4 of 16-bit or 32-bit integers as input. For each | ||
// element of the vec4, they first clamp the value to a range (depending on | ||
// the signedness) then pick the lower 8 bits, and drop the other bits. | ||
// The result is four 8-bit values (32 bits in total) which are packed in an | ||
// unsigned uint32_t. | ||
// | ||
// Note: uint8_t4_packed and int8_t4_packed are NOT vector types! They are | ||
// both scalar 32-bit unsigned integer types where each byte represents one | ||
// value. | ||
// | ||
// Note: In pack_clamp_{s|u}8 intrinsics, an input of 0x100 will be turned | ||
// into 0xFF, not 0x00. Therefore, it is important to perform a clamp first, | ||
// and then a truncation. | ||
|
||
// Steps: | ||
// Use GLSL extended instruction set's clamp (only for clamp instructions). | ||
// Use OpUConvert/OpSConvert to truncate each element of the vec4 to 8 bits. | ||
// Use OpBitcast to make a 32-bit uint out of the new vec4. | ||
auto *arg = callExpr->getArg(0); | ||
const auto argType = arg->getType(); | ||
SpirvInstruction *argInstr = doExpr(arg); | ||
QualType elemType = {}; | ||
uint32_t elemCount = 0; | ||
(void)isVectorType(argType, &elemType, &elemCount); | ||
const bool isSigned = elemType->isSignedIntegerType(); | ||
assert(elemCount == 4); | ||
|
||
const bool doesClamp = op == hlsl::IntrinsicOp::IOP_pack_clamp_s8 || | ||
op == hlsl::IntrinsicOp::IOP_pack_clamp_u8; | ||
if (doesClamp) { | ||
const auto bitwidth = getElementSpirvBitwidth( | ||
astContext, elemType, spirvOptions.enable16BitTypes); | ||
int32_t clampMin = op == hlsl::IntrinsicOp::IOP_pack_clamp_u8 ? 0 : -128; | ||
int32_t clampMax = op == hlsl::IntrinsicOp::IOP_pack_clamp_u8 ? 255 : 127; | ||
auto *minInstr = spvBuilder.getConstantInt( | ||
elemType, llvm::APInt(bitwidth, clampMin, isSigned)); | ||
auto *maxInstr = spvBuilder.getConstantInt( | ||
elemType, llvm::APInt(bitwidth, clampMax, isSigned)); | ||
auto *minVec = spvBuilder.getConstantComposite( | ||
argType, {minInstr, minInstr, minInstr, minInstr}); | ||
auto *maxVec = spvBuilder.getConstantComposite( | ||
argType, {maxInstr, maxInstr, maxInstr, maxInstr}); | ||
auto clampOp = isSigned ? GLSLstd450SClamp : GLSLstd450UClamp; | ||
// ehsan | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ouch! 😄 how did this sneak in! Good catch. Thanks |
||
argInstr = spvBuilder.createGLSLExtInst(argType, clampOp, | ||
{argInstr, minVec, maxVec}, loc); | ||
} | ||
|
||
if (isSigned) { | ||
QualType v4Int8Type = | ||
astContext.getExtVectorType(astContext.SignedCharTy, 4); | ||
auto *bytesVecInstr = spvBuilder.createUnaryOp(spv::Op::OpSConvert, | ||
v4Int8Type, argInstr, loc); | ||
return spvBuilder.createUnaryOp( | ||
spv::Op::OpBitcast, astContext.Int8_4PackedTy, bytesVecInstr, loc); | ||
} else { | ||
QualType v4Uint8Type = | ||
astContext.getExtVectorType(astContext.UnsignedCharTy, 4); | ||
auto *bytesVecInstr = spvBuilder.createUnaryOp(spv::Op::OpUConvert, | ||
v4Uint8Type, argInstr, loc); | ||
return spvBuilder.createUnaryOp( | ||
spv::Op::OpBitcast, astContext.UInt8_4PackedTy, bytesVecInstr, loc); | ||
} | ||
} | ||
|
||
SpirvInstruction * | ||
SpirvEmitter::processIntrinsic8BitUnpack(const CallExpr *callExpr, | ||
hlsl::IntrinsicOp op) { | ||
const auto loc = callExpr->getExprLoc(); | ||
assert(op == hlsl::IntrinsicOp::IOP_unpack_s8s16 || | ||
op == hlsl::IntrinsicOp::IOP_unpack_s8s32 || | ||
op == hlsl::IntrinsicOp::IOP_unpack_u8u16 || | ||
op == hlsl::IntrinsicOp::IOP_unpack_u8u32); | ||
|
||
// Here's the signature for the pack intrinsic operations: | ||
// | ||
// int16_t4 unpack_s8s16(int8_t4_packed packedVal); // Sign Extended | ||
// uint16_t4 unpack_u8u16(uint8_t4_packed packedVal); // Non-Sign Extended | ||
// int32_t4 unpack_s8s32(int8_t4_packed packedVal); // Sign Extended | ||
// uint32_t4 unpack_u8u32(uint8_t4_packed packedVal); // Non-Sign Extended | ||
// | ||
// These functions take a 32-bit unsigned integer as input (where each byte of | ||
// the input represents one value, i.e. it's packed). They first unpack the | ||
// 32-bit integer to a vector of 4 bytes. Then for each element of the vec4, | ||
// they zero-extend or sign-extend the byte in order to achieve a 16-bit or | ||
// 32-bit vector of integers. | ||
// | ||
// Note: uint8_t4_packed and int8_t4_packed are NOT vector types! They are | ||
// both scalar 32-bit unsigned integer types where each byte represents one | ||
// value. | ||
|
||
// Steps: | ||
// Use OpBitcast to make a vec4 of bytes from a 32-bit value. | ||
// Use OpUConvert/OpSConvert to zero-extend/sign-extend each element of the | ||
// vec4 to 16 or 32 bits. | ||
auto *arg = callExpr->getArg(0); | ||
SpirvInstruction *argInstr = doExpr(arg); | ||
|
||
const bool isSigned = op == hlsl::IntrinsicOp::IOP_unpack_s8s16 || | ||
op == hlsl::IntrinsicOp::IOP_unpack_s8s32; | ||
|
||
QualType resultType = {}; | ||
if (op == hlsl::IntrinsicOp::IOP_unpack_s8s16 || | ||
op == hlsl::IntrinsicOp::IOP_unpack_u8u16) { | ||
resultType = astContext.getExtVectorType( | ||
isSigned ? astContext.ShortTy : astContext.UnsignedShortTy, 4); | ||
} else { | ||
resultType = astContext.getExtVectorType( | ||
isSigned ? astContext.IntTy : astContext.UnsignedIntTy, 4); | ||
} | ||
|
||
if (isSigned) { | ||
QualType v4Int8Type = | ||
astContext.getExtVectorType(astContext.SignedCharTy, 4); | ||
auto *bytesVecInstr = | ||
spvBuilder.createUnaryOp(spv::Op::OpBitcast, v4Int8Type, argInstr, loc); | ||
return spvBuilder.createUnaryOp(spv::Op::OpSConvert, resultType, | ||
bytesVecInstr, loc); | ||
} else { | ||
QualType v4Uint8Type = | ||
astContext.getExtVectorType(astContext.UnsignedCharTy, 4); | ||
auto *bytesVecInstr = spvBuilder.createUnaryOp(spv::Op::OpBitcast, | ||
v4Uint8Type, argInstr, loc); | ||
return spvBuilder.createUnaryOp(spv::Op::OpUConvert, resultType, | ||
bytesVecInstr, loc); | ||
} | ||
} | ||
|
||
SpirvInstruction *SpirvEmitter::processRayBuiltins(const CallExpr *callExpr, | ||
hlsl::IntrinsicOp op) { | ||
spv::BuiltIn builtin = spv::BuiltIn::Max; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
66 changes: 66 additions & 0 deletions
66
tools/clang/test/CodeGenSPIRV/intrinsics.sm6_6.pack_clamp_s8u8.hlsl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
// Run: %dxc -E main -T ps_6_6 -enable-16bit-types | ||
|
||
float4 main(int16_t4 input1 : Inputs1, int16_t4 input2 : Inputs2) : SV_Target { | ||
int16_t4 v4int16_var; | ||
int32_t4 v4int32_var; | ||
|
||
// Note: pack_clamp_s8 and pack_clamp_u8 do NOT accept an unsigned argument. | ||
|
||
// CHECK: [[glsl_set:%\d+]] = OpExtInstImport "GLSL.std.450" | ||
|
||
// CHECK: %short = OpTypeInt 16 1 | ||
// CHECK: %v4short = OpTypeVector %short 4 | ||
|
||
// CHECK: [[const_v4short_n128:%\d+]] = OpConstantComposite %v4short %short_n128 %short_n128 %short_n128 %short_n128 | ||
// CHECK: [[const_v4short_127:%\d+]] = OpConstantComposite %v4short %short_127 %short_127 %short_127 %short_127 | ||
|
||
// CHECK: [[const_v4int_n128:%\d+]] = OpConstantComposite %v4int %int_n128 %int_n128 %int_n128 %int_n128 | ||
// CHECK: [[const_v4int_127:%\d+]] = OpConstantComposite %v4int %int_127 %int_127 %int_127 %int_127 | ||
|
||
// CHECK: [[const_v4short_0:%\d+]] = OpConstantComposite %v4short %short_0 %short_0 %short_0 %short_0 | ||
// CHECK: [[const_v4short_255:%\d+]] = OpConstantComposite %v4short %short_255 %short_255 %short_255 %short_255 | ||
|
||
// CHECK: [[const_v4int_0:%\d+]] = OpConstantComposite %v4int %int_0 %int_0 %int_0 %int_0 | ||
// CHECK: [[const_v4int_255:%\d+]] = OpConstantComposite %v4int %int_255 %int_255 %int_255 %int_255 | ||
|
||
// CHECK: %char = OpTypeInt 8 1 | ||
// CHECK: %v4char = OpTypeVector %char 4 | ||
|
||
//////////////////////////// | ||
// pack_clamp_s8 variants // | ||
//////////////////////////// | ||
|
||
// CHECK: [[v4int16_var:%\d+]] = OpLoad %v4short %v4int16_var | ||
// CHECK: [[clamped:%\d+]] = OpExtInst %v4short [[glsl_set]] SClamp [[v4int16_var]] [[const_v4short_n128]] [[const_v4short_127]] | ||
// CHECK: [[truncated:%\d+]] = OpSConvert %v4char [[clamped]] | ||
// CHECK: [[packed:%\d+]] = OpBitcast %uint [[truncated]] | ||
// CHECK: OpStore %ps1 [[packed]] | ||
int8_t4_packed ps1 = pack_clamp_s8(v4int16_var); | ||
|
||
// CHECK: [[v4int16_var:%\d+]] = OpLoad %v4int %v4int32_var | ||
// CHECK: [[clamped:%\d+]] = OpExtInst %v4int [[glsl_set]] SClamp [[v4int16_var]] [[const_v4int_n128]] [[const_v4int_127]] | ||
// CHECK: [[truncated:%\d+]] = OpSConvert %v4char [[clamped]] | ||
// CHECK: [[packed:%\d+]] = OpBitcast %uint [[truncated]] | ||
// CHECK: OpStore %ps3 [[packed]] | ||
int8_t4_packed ps3 = pack_clamp_s8(v4int32_var); | ||
|
||
//////////////////////////// | ||
// pack_clamp_u8 variants // | ||
//////////////////////////// | ||
|
||
// CHECK: [[v4int16_var:%\d+]] = OpLoad %v4short %v4int16_var | ||
// CHECK: [[clamped:%\d+]] = OpExtInst %v4short [[glsl_set]] SClamp [[v4int16_var]] [[const_v4short_0]] [[const_v4short_255]] | ||
// CHECK: [[truncated:%\d+]] = OpSConvert %v4char [[clamped]] | ||
// CHECK: [[packed:%\d+]] = OpBitcast %uint [[truncated]] | ||
// CHECK: OpStore %pu1 [[packed]] | ||
uint8_t4_packed pu1 = pack_clamp_u8(v4int16_var); | ||
|
||
// CHECK: [[v4int32_var:%\d+]] = OpLoad %v4int %v4int32_var | ||
// CHECK: [[clamped:%\d+]] = OpExtInst %v4int [[glsl_set]] SClamp [[v4int32_var]] [[const_v4int_0]] [[const_v4int_255]] | ||
// CHECK: [[truncated:%\d+]] = OpSConvert %v4char [[clamped]] | ||
// CHECK: [[packed:%\d+]] = OpBitcast %uint [[truncated]] | ||
// CHECK: OpStore %pu3 [[packed]] | ||
uint8_t4_packed pu3 = pack_clamp_u8(v4int32_var); | ||
|
||
return 0.xxxx; | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1. I like your description here.