diff options
author | David Stuttard <david.stuttard@amd.com> | 2022-08-09 08:46:55 +0100 |
---|---|---|
committer | Tobias Hieta <tobias@hieta.se> | 2022-09-07 08:37:18 +0200 |
commit | dca7f087109debc1f376946dc244ed5d7e27a257 (patch) | |
tree | d35b946d24977435d2101f70d805965befb732f1 | |
parent | [RISCV][ReleaseNotes] Added LLVM and Clang release notes for RISC-V 15.0.0 (diff) | |
download | llvm-project-dca7f087109debc1f376946dc244ed5d7e27a257.tar.gz llvm-project-dca7f087109debc1f376946dc244ed5d7e27a257.tar.bz2 llvm-project-dca7f087109debc1f376946dc244ed5d7e27a257.zip |
AMDGPU: mbcnt allow for non-zero src1 for known-bits
Src1 for mbcnt can be a non-zero literal or register. Take this into account
when calculating known bits.
Differential Revision: https://reviews.llvm.org/D131478
(cherry picked from commit 1d1cc05539e275ae7666fc4b44bf725ec335078a)
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 11 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll | 67 |
2 files changed, 70 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index bf520a560404..c0a94cc758bb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4600,9 +4600,16 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( case Intrinsic::amdgcn_mbcnt_hi: { const GCNSubtarget &ST = DAG.getMachineFunction().getSubtarget<GCNSubtarget>(); - // These return at most the wavefront size - 1. + // These return at most the (wavefront size - 1) + src1 + // As long as src1 is an immediate we can calc known bits + KnownBits Src1Known = DAG.computeKnownBits(Op.getOperand(2), Depth + 1); + unsigned Src1ValBits = Src1Known.countMaxActiveBits(); + unsigned MaxActiveBits = std::max(Src1ValBits, ST.getWavefrontSizeLog2()); + // Cater for potential carry + MaxActiveBits += Src1ValBits ? 1 : 0; unsigned Size = Op.getValueType().getSizeInBits(); - Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2()); + if (MaxActiveBits < Size) + Known.Zero.setHighBits(Size - MaxActiveBits); break; } case Intrinsic::amdgcn_workitem_id_x: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll index 04405470aff0..88d6bea38b10 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll @@ -14,24 +14,79 @@ main_body: ret void } -; GCN-LABEL: {{^}}mbcnt_lo_known_bits: +; GCN-LABEL: {{^}}mbcnt_lo_known_bits_1: ; GCN: v_mbcnt_lo_u32_b32 -; GCN-NOT: and -define i32 @mbcnt_lo_known_bits(i32 %x, i32 %y) #0 { +; GCN: v_and_b32_e32 +define i32 @mbcnt_lo_known_bits_1(i32 %x, i32 %y) #0 { %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 %y) %mask = and i32 %lo, 63 ret i32 %mask } -; GCN-LABEL: {{^}}mbcnt_hi_known_bits: -; GCN: v_mbcnt_hi_u32_b32 +; GCN-LABEL: {{^}}mbcnt_lo_known_bits_2: +; GCN: v_mbcnt_lo_u32_b32 +; GCN-NOT: and +define i32 @mbcnt_lo_known_bits_2(i32 %x) #0 { + %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 0) + %mask = and i32 %lo, 63 + ret i32 %mask +} + +; GCN-LABEL: {{^}}mbcnt_lo_known_bits_3: +; GCN: v_mbcnt_lo_u32_b32 ; GCN-NOT: and -define i32 @mbcnt_hi_known_bits(i32 %x, i32 %y) #0 { +define i32 @mbcnt_lo_known_bits_3(i32 %x) #0 { + %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 15) + %mask = and i32 %lo, 127 + ret i32 %mask +} + +; GCN-LABEL: {{^}}mbcnt_lo_known_bits_4: +; GCN: v_mbcnt_lo_u32_b32 +; GCN: v_and_b32_e32 +define i32 @mbcnt_lo_known_bits_4(i32 %x) #0 { + %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 15) + %mask = and i32 %lo, 63 + ret i32 %mask +} + + +; GCN-LABEL: {{^}}mbcnt_hi_known_bits_1: +; GCN: v_mbcnt_hi_u32_b32 +; GCN: v_and_b32_e32 +define i32 @mbcnt_hi_known_bits_1(i32 %x, i32 %y) #0 { %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 %y) %mask = and i32 %hi, 63 ret i32 %mask } +; GCN-LABEL: {{^}}mbcnt_hi_known_bits_2: +; GCN: v_mbcnt_hi_u32_b32 +; GCN-NOT: and +define i32 @mbcnt_hi_known_bits_2(i32 %x) #0 { + %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 0) + %mask = and i32 %hi, 63 + ret i32 %mask +} + +; GCN-LABEL: {{^}}mbcnt_hi_known_bits_3: +; GCN: v_mbcnt_hi_u32_b32 +; GCN-NOT: and +define i32 @mbcnt_hi_known_bits_3(i32 %x) #0 { + %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 15) + %mask = and i32 %hi, 127 + ret i32 %mask +} + +; GCN-LABEL: {{^}}mbcnt_hi_known_bits_4: +; GCN: v_mbcnt_hi_u32_b32 +; GCN: v_and_b32_e32 +define i32 @mbcnt_hi_known_bits_4(i32 %x) #0 { + %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 15) + %mask = and i32 %hi, 63 + ret i32 %mask +} + declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1 |