From dca7f087109debc1f376946dc244ed5d7e27a257 Mon Sep 17 00:00:00 2001 From: David Stuttard Date: Tue, 9 Aug 2022 08:46:55 +0100 Subject: AMDGPU: mbcnt allow for non-zero src1 for known-bits Src1 for mbcnt can be a non-zero literal or register. Take this into account when calculating known bits. Differential Revision: https://reviews.llvm.org/D131478 (cherry picked from commit 1d1cc05539e275ae7666fc4b44bf725ec335078a) --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'llvm/lib') diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index bf520a560404..c0a94cc758bb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4600,9 +4600,16 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( case Intrinsic::amdgcn_mbcnt_hi: { const GCNSubtarget &ST = DAG.getMachineFunction().getSubtarget(); - // These return at most the wavefront size - 1. + // These return at most the (wavefront size - 1) + src1 + // As long as src1 is an immediate we can calc known bits + KnownBits Src1Known = DAG.computeKnownBits(Op.getOperand(2), Depth + 1); + unsigned Src1ValBits = Src1Known.countMaxActiveBits(); + unsigned MaxActiveBits = std::max(Src1ValBits, ST.getWavefrontSizeLog2()); + // Cater for potential carry + MaxActiveBits += Src1ValBits ? 1 : 0; unsigned Size = Op.getValueType().getSizeInBits(); - Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2()); + if (MaxActiveBits < Size) + Known.Zero.setHighBits(Size - MaxActiveBits); break; } case Intrinsic::amdgcn_workitem_id_x: -- cgit v1.2.3-65-gdbad