Skip to content

Commit

Permalink
[X86] Fold not(pcmpeq(and(X,CstPow2),0)) -> pcmpeq(and(X,CstPow2),Cst…
Browse files Browse the repository at this point in the history
…Pow2)

Fixes #78888
  • Loading branch information
RKSimon committed Jan 24, 2024
1 parent 6255bae commit 72f10f7
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 26 deletions.
20 changes: 20 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49328,6 +49328,26 @@ static SDValue combineOrXorWithSETCC(SDNode *N, SDValue N0, SDValue N1,
}
}

// not(pcmpeq(and(X,CstPow2),0)) -> pcmpeq(and(X,CstPow2),CstPow2)
if (N->getOpcode() == ISD::XOR && N0.getOpcode() == X86ISD::PCMPEQ &&
N0.getOperand(0).getOpcode() == ISD::AND &&
ISD::isBuildVectorAllOnes(N1.getNode())) {
MVT VT = N->getSimpleValueType(0);
APInt UndefElts;
SmallVector<APInt> EltBits;
if (getTargetConstantBitsFromNode(N0.getOperand(0).getOperand(1),
VT.getScalarSizeInBits(), UndefElts,
EltBits)) {
bool IsPow2OrUndef = true;
for (unsigned I = 0, E = EltBits.size(); I != E; ++I)
IsPow2OrUndef &= UndefElts[I] || EltBits[I].isPowerOf2();

if (IsPow2OrUndef)
return DAG.getNode(X86ISD::PCMPEQ, SDLoc(N), VT, N0.getOperand(0),
N0.getOperand(0).getOperand(1));
}
}

return SDValue();
}

Expand Down
41 changes: 15 additions & 26 deletions llvm/test/CodeGen/X86/icmp-pow2-mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,35 +11,29 @@ define <8 x i16> @pow2_mask_v16i8(i8 zeroext %0) {
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [128,128,64,64,32,32,16,16,8,8,4,4,2,2,1,1]
; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: pow2_mask_v16i8:
; SSE41: # %bb.0:
; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm1
; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,u,u,u,u,u,u,u,u]
; SSE41-NEXT: pand %xmm1, %xmm0
; SSE41-NEXT: pcmpeqb %xmm1, %xmm0
; SSE41-NEXT: pmovsxbw %xmm0, %xmm0
; SSE41-NEXT: retq
;
; AVX2-LABEL: pow2_mask_v16i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX2-NEXT: retq
;
Expand Down Expand Up @@ -103,33 +97,28 @@ define i64 @pow2_mask_v8i8(i8 zeroext %0) {
; SSE-NEXT: movd %edi, %xmm0
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
; SSE-NEXT: pxor %xmm1, %xmm0
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,u,u,u,u,u,u,u,u]
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: pcmpeqb %xmm1, %xmm0
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: retq
;
; AVX2-LABEL: pow2_mask_v8i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: retq
;
; AVX512-LABEL: pow2_mask_v8i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastb %edi, %xmm0
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: retq
%vec = insertelement <1 x i8> poison, i8 %0, i64 0
Expand Down

1 comment on commit 72f10f7

@mikaelholmen
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This patch was reverted in b9483d3

Revert "[X86] Fold not(pcmpeq(and(X,CstPow2),0)) -> pcmpeq(and(X,CstPow2),CstPow2)"

This reverts commit 72f10f7eb536da58cb79e13974895cd97d4e1a5f.

This change was causing a miscompile on an internal test and is being reverted at the author's request until it can be fixed.

I saw a miscompile with the patch in my own testing as well, not sure if it's the same as the one mentioned in the revert.

But anyway, with the patch

clang -o bbi-91556.out bbi-91556.c -O2
./bbi-91556.out

results in

RESULT: 0x0
RESULT: 0x2
RESULT: 0x0
RESULT: 0x2
RESULT: 0x0
RESULT: 0x1
RESULT: 0x0
RESULT: 0x3
RESULT: 0x0
RESULT: 0x0
RESULT: 0x3
RESULT: 0x0
RESULT: 0x0
RESULT: 0x1
RESULT: 0x0
RESULT: 0x0

and without (and after revert) we get

RESULT: 0x0
RESULT: 0x3
RESULT: 0x0
RESULT: 0x3
RESULT: 0x0
RESULT: 0x1
RESULT: 0x0
RESULT: 0x4
RESULT: 0x0
RESULT: 0x0
RESULT: 0x3
RESULT: 0x0
RESULT: 0x0
RESULT: 0x1
RESULT: 0x0
RESULT: 0x0

bbi-91556.c.gz

Please sign in to comment.