Skip to content

Commit 156a1b5

Browse files
committed
AMDGPU: Make signext/zeroext behave more sensibly over > i32
Interpret these as extending to the next multiple of 32-bits. This had no effect with i48 for example, which is really split into {i32, i16}, which should extend the high part.
1 parent 71269a1 commit 156a1b5

File tree

3 files changed

+62
-0
lines changed

3 files changed

+62
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,17 @@ bool AMDGPUTargetLowering::allUsesHaveSourceMods(const SDNode *N,
617617
return true;
618618
}
619619

620+
EVT AMDGPUTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
621+
ISD::NodeType ExtendKind) const {
622+
assert(!VT.isVector() && "only scalar expected");
623+
624+
// Round to the next multiple of 32-bits.
625+
unsigned Size = VT.getSizeInBits();
626+
if (Size <= 32)
627+
return MVT::i32;
628+
return EVT::getIntegerVT(Context, 32 * ((Size + 31) / 32));
629+
}
630+
620631
MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const {
621632
return MVT::i32;
622633
}

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

+3
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,9 @@ class AMDGPUTargetLowering : public TargetLowering {
178178

179179
bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
180180

181+
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
182+
ISD::NodeType ExtendKind) const override;
183+
181184
MVT getVectorIdxTy(const DataLayout &) const override;
182185
bool isSelectSupported(SelectSupportKind) const override;
183186

llvm/test/CodeGen/AMDGPU/function-returns.ll

+48
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,54 @@ define i48 @i48_func_void() #0 {
104104
ret i48 %val
105105
}
106106

107+
; GCN-LABEL: {{^}}i48_zeroext_func_void:
108+
; GCN: buffer_load_dword v0, off
109+
; GCN-NEXT: buffer_load_ushort v1, off
110+
; GCN-NEXT: s_waitcnt vmcnt(0)
111+
; GCN-NEXT: s_setpc_b64
112+
define zeroext i48 @i48_zeroext_func_void() #0 {
113+
%val = load i48, i48 addrspace(1)* undef, align 8
114+
ret i48 %val
115+
}
116+
117+
; GCN-LABEL: {{^}}i48_signext_func_void:
118+
; GCN: buffer_load_dword v0, off
119+
; GCN-NEXT: buffer_load_sshort v1, off
120+
; GCN-NEXT: s_waitcnt vmcnt(0)
121+
; GCN-NEXT: s_setpc_b64
122+
define signext i48 @i48_signext_func_void() #0 {
123+
%val = load i48, i48 addrspace(1)* undef, align 8
124+
ret i48 %val
125+
}
126+
127+
; GCN-LABEL: {{^}}i63_func_void:
128+
; GCN: s_waitcnt
129+
; GCN-NEXT: s_setpc_b64
130+
define i63 @i63_func_void(i63 %val) #0 {
131+
ret i63 %val
132+
}
133+
134+
; GCN-LABEL: {{^}}i63_zeroext_func_void:
135+
; GCN: s_waitcnt
136+
; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
137+
; GCN-NEXT: s_setpc_b64
138+
define zeroext i63 @i63_zeroext_func_void(i63 %val) #0 {
139+
ret i63 %val
140+
}
141+
142+
; GCN-LABEL: {{^}}i63_signext_func_void:
143+
; GCN: s_waitcnt
144+
; CI-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
145+
; CI-NEXT: v_ashr_i64 v[0:1], v[0:1], 1
146+
147+
; GFX89-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
148+
; GFX89-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
149+
150+
; GCN-NEXT: s_setpc_b64
151+
define signext i63 @i63_signext_func_void(i63 %val) #0 {
152+
ret i63 %val
153+
}
154+
107155
; GCN-LABEL: {{^}}i64_func_void:
108156
; GCN: buffer_load_dwordx2 v[0:1], off
109157
; GCN-NEXT: s_waitcnt vmcnt(0)

0 commit comments

Comments
 (0)