Skip to content

Commit

Permalink
[AMDGPU] Check vector sizes for physical register constraints in inli…
Browse files Browse the repository at this point in the history
…ne asm (llvm#109955)

For register constraints that require specific register ranges, the
width of the range should match the type of the associated
parameter/return value. With this PR, we error out when that is not the
case. Previously, these cases would hit assertions or llvm_unreachables.

The handling of register constraints that require only a single register
remains more lenient to allow narrower non-vector types for the
associated IR values. For example, constraining an i16 or i8 value to a
32-bit register is still allowed.

Fixes llvm#101190.

---------

Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
  • Loading branch information
ritter-x2a and arsenm authored Oct 1, 2024
1 parent 0cf4cb4 commit 3ba4092
Show file tree
Hide file tree
Showing 4 changed files with 298 additions and 19 deletions.
7 changes: 7 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15510,6 +15510,10 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
Failed |= !RegName.consume_back("]");
if (!Failed) {
uint32_t Width = (End - Idx + 1) * 32;
// Prohibit constraints for register ranges with a width that does not
// match the required type.
if (VT.SimpleTy != MVT::Other && Width != VT.getSizeInBits())
return std::pair(0U, nullptr);
MCRegister Reg = RC->getRegister(Idx);
if (SIRegisterInfo::isVGPRClass(RC))
RC = TRI->getVGPRClassForBitWidth(Width);
Expand All @@ -15523,6 +15527,9 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
}
}
} else {
// Check for lossy scalar/vector conversions.
if (VT.isVector() && VT.getSizeInBits() != 32)
return std::pair(0U, nullptr);
bool Failed = RegName.getAsInteger(10, Idx);
if (!Failed && Idx < RC->getNumRegs())
return std::pair(RC->getRegister(Idx), RC);
Expand Down
44 changes: 25 additions & 19 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@
; RUN: FileCheck -check-prefix=ERR %s < %t

; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' %sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:12]}"()' (in function: return_type_is_too_big_vector)
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' %sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:10]}"()' (in function: return_type_is_too_small_vector)
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' %reg = call i64 asm sideeffect "; def $0", "={v8}"()' (in function: return_type_is_too_big_scalar)
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' %reg = call i32 asm sideeffect "; def $0", "={v[8:9]}"()' (in function: return_type_is_too_small_scalar)
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' %reg = call ptr addrspace(1) asm sideeffect "; def $0", "={v8}"()' (in function: return_type_is_too_big_pointer)
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' %reg = call ptr addrspace(3) asm sideeffect "; def $0", "={v[8:9]}"()' (in function: return_type_is_too_small_pointer)
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' call void asm sideeffect "; use $0", "{v[0:9]}"(<8 x i32> %arg)' (in function: use_vector_too_big)
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' call void asm sideeffect "; use $0", "{v0}"(i64 %arg)' (in function: use_scalar_too_small)
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' call void asm sideeffect "; use $0", "{v[0:1]}"(i32 %arg)' (in function: use_scalar_too_big)
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' call void asm sideeffect "; use $0", "{v0}"(ptr addrspace(1) %arg)' (in function: use_pointer_too_small)
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' call void asm sideeffect "; use $0", "{v[0:1]}"(ptr addrspace(3) %arg)' (in function: use_pointer_too_big)

Expand All @@ -24,18 +27,25 @@ define amdgpu_kernel void @return_type_is_too_big_vector() {
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1 (%ir-block.0):
; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
%sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:12]}" ()
call void asm sideeffect "; use $0", "s"(<4 x i32> %sgpr) #0
ret void
}

; FIXME: This is crashing in the DAG
; define amdgpu_kernel void @return_type_is_too_small_vector() {
; %sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:10]}" ()
; call void asm sideeffect "; use $0", "s"(<4 x i32> %sgpr) #0
; ret void
; }
; This is broken because it requests 3 32-bit sgprs to handle a 4xi32 result.
define amdgpu_kernel void @return_type_is_too_small_vector() {
; CHECK-LABEL: name: return_type_is_too_small_vector
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1 (%ir-block.0):
%sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:10]}" ()
call void asm sideeffect "; use $0", "s"(<4 x i32> %sgpr) #0
ret void
}

define i64 @return_type_is_too_big_scalar() {
; CHECK-LABEL: name: return_type_is_too_big_scalar
Expand All @@ -50,12 +60,10 @@ define i64 @return_type_is_too_big_scalar() {

define i32 @return_type_is_too_small_scalar() {
; CHECK-LABEL: name: return_type_is_too_small_scalar
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr8_vgpr9
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1 (%ir-block.0):
%reg = call i32 asm sideeffect "; def $0", "={v[8:9]}" ()
ret i32 %reg
}
Expand All @@ -77,7 +85,6 @@ define ptr addrspace(3) @return_type_is_too_small_pointer() {
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1 (%ir-block.0):
; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr8_vgpr9
%reg = call ptr addrspace(3) asm sideeffect "; def $0", "={v[8:9]}" ()
ret ptr addrspace(3) %reg
}
Expand Down Expand Up @@ -141,14 +148,13 @@ define void @use_scalar_too_small(i64 %arg) {

define void @use_scalar_too_big(i32 %arg) {
; CHECK-LABEL: name: use_scalar_too_big
; CHECK: bb.1 (%ir-block.0):
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $vgpr0_vgpr1
; CHECK-NEXT: SI_RETURN
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1 (%ir-block.0):
call void asm sideeffect "; use $0", "{v[0:1]}"(i32 %arg)
ret void
}
Expand Down
104 changes: 104 additions & 0 deletions llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size-error.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR %s

; Diagnose register constraints that are not wide enough.

; ERR: error: couldn't allocate output register for constraint '{v[8:15]}'
define <9 x i32> @inline_asm_9xi32_in_8v_def() {
%asm = call <9 x i32> asm sideeffect "; def $0", "={v[8:15]}"()
ret <9 x i32> %asm
}

; ERR: error: couldn't allocate input reg for constraint '{v[8:15]}'
define void @inline_asm_9xi32_in_8v_use(<9 x i32> %val) {
call void asm sideeffect "; use $0", "{v[8:15]}"(<9 x i32> %val)
ret void
}

; ERR: error: couldn't allocate output register for constraint '{s[8:15]}'
define <9 x i32> @inline_asm_9xi32_in_8s_def() {
%asm = call <9 x i32> asm sideeffect "; def $0", "={s[8:15]}"()
ret <9 x i32> %asm
}


; Diagnose register constraints that are too wide.

; ERR: error: couldn't allocate output register for constraint '{v[8:16]}'
define <8 x i32> @inline_asm_8xi32_in_9v_def() {
%asm = call <8 x i32> asm sideeffect "; def $0", "={v[8:16]}"()
ret <8 x i32> %asm
}

; ERR: error: couldn't allocate input reg for constraint '{v[8:16]}'
define void @inline_asm_8xi32_in_9v_use(<8 x i32> %val) {
call void asm sideeffect "; use $0", "{v[8:16]}"(<8 x i32> %val)
ret void
}

; ERR: error: couldn't allocate output register for constraint '{s[8:16]}'
define <8 x i32> @inline_asm_8xi32_in_9s_def() {
%asm = call <8 x i32> asm sideeffect "; def $0", "={s[8:16]}"()
ret <8 x i32> %asm
}


; Diagnose mismatched scalars with register ranges

; ERR: error: couldn't allocate output register for constraint '{s[4:5]}'
define void @inline_asm_scalar_read_too_wide() {
%asm = call i32 asm sideeffect "; def $0 ", "={s[4:5]}"()
ret void
}

; ERR: error: couldn't allocate output register for constraint '{s[4:4]}'
define void @inline_asm_scalar_read_too_narrow() {
%asm = call i64 asm sideeffect "; def $0 ", "={s[4:4]}"()
ret void
}

; Single registers for vector types that are too wide or too narrow should be
; diagnosed.

; ERR: error: couldn't allocate input reg for constraint '{v8}'
define void @inline_asm_4xi32_in_v_use(<4 x i32> %val) {
call void asm sideeffect "; use $0", "{v8}"(<4 x i32> %val)
ret void
}

; ERR: error: couldn't allocate output register for constraint '{v8}'
define <4 x i32> @inline_asm_4xi32_in_v_def() {
%asm = call <4 x i32> asm sideeffect "; def $0", "={v8}"()
ret <4 x i32> %asm
}

; ERR: error: couldn't allocate output register for constraint '{s8}'
define <4 x i32> @inline_asm_4xi32_in_s_def() {
%asm = call <4 x i32> asm sideeffect "; def $0", "={s8}"()
ret <4 x i32> %asm
}

; ERR: error: couldn't allocate input reg for constraint '{v8}'
; ERR: error: couldn't allocate input reg for constraint 'v'
define void @inline_asm_2xi8_in_v_use(<2 x i8> %val) {
call void asm sideeffect "; use $0", "{v8}"(<2 x i8> %val)
call void asm sideeffect "; use $0", "v"(<2 x i8> %val)
ret void
}

; ERR: error: couldn't allocate output register for constraint '{v8}'
; ERR: error: couldn't allocate output register for constraint 'v'
define <2 x i8> @inline_asm_2xi8_in_v_def() {
%phys = call <2 x i8> asm sideeffect "; def $0", "={v8}"()
%virt = call <2 x i8> asm sideeffect "; def $0", "=v"()
%r = and <2 x i8> %phys, %virt
ret <2 x i8> %r
}

; ERR: error: couldn't allocate output register for constraint '{s8}'
; ERR: error: couldn't allocate output register for constraint 's'
define <2 x i8> @inline_asm_2xi8_in_s_def() {
%phys = call <2 x i8> asm sideeffect "; def $0", "={s8}"()
%virt = call <2 x i8> asm sideeffect "; def $0", "=s"()
%r = and <2 x i8> %phys, %virt
ret <2 x i8> %r
}
162 changes: 162 additions & 0 deletions llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=CHECK %s

; Allow single registers that are too wide for the IR type:

define i16 @inline_asm_i16_in_v_def() {
; CHECK-LABEL: inline_asm_i16_in_v_def:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v8
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_and_b32_e32 v0, v8, v0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%phys = call i16 asm sideeffect "; def $0", "={v8}"()
%virt = call i16 asm sideeffect "; def $0", "=v"()
%r = and i16 %phys, %virt
ret i16 %r
}

define void @inline_asm_i16_in_v_use(i16 %val) {
; CHECK-LABEL: inline_asm_i16_in_v_use:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_and_b32_e32 v8, 0xffff, v0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use v8
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use v8
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use $0", "{v8}"(i16 %val)
call void asm sideeffect "; use $0", "v"(i16 %val)
ret void
}

define i16 @inline_asm_i16_in_s_def() {
; CHECK-LABEL: inline_asm_i16_in_s_def:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s8
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s4
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_and_b32 s4, s8, s4
; CHECK-NEXT: v_mov_b32_e32 v0, s4
; CHECK-NEXT: s_setpc_b64 s[30:31]
%phys = call i16 asm sideeffect "; def $0", "={s8}"()
%virt = call i16 asm sideeffect "; def $0", "=s"()
%r = and i16 %phys, %virt
ret i16 %r
}

define i8 @inline_asm_i8_in_v_def() {
; CHECK-LABEL: inline_asm_i8_in_v_def:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v8
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_mov_b32_e32 v0, v8
; CHECK-NEXT: s_setpc_b64 s[30:31]
%phys = call i8 asm sideeffect "; def $0", "={v8}"()
; %virt = call i8 asm sideeffect "; def $0", "=v"() ; currently fails
; %r = and i8 %phys, %virt
; ret i8 %r
ret i8 %phys
}

; currently broken, v8 should be set to v0 & 0xFF
define void @inline_asm_i8_in_v_use(i8 %val) {
; CHECK-LABEL: inline_asm_i8_in_v_use:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v8, v0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use v8
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use $0", "{v8}"(i8 %val)
; call void asm sideeffect "; use $0", "v"(i8 %val) ; currently fails
ret void
}

define i8 @inline_asm_i8_in_sphys_def() {
; CHECK-LABEL: inline_asm_i8_in_sphys_def:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s8
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_mov_b32_e32 v0, s8
; CHECK-NEXT: s_setpc_b64 s[30:31]
%phys = call i8 asm sideeffect "; def $0", "={s8}"()
; %virt = call i8 asm sideeffect "; def $0", "=s"() ; currently fails
; %r = and i8 %phys, %virt
; ret i8 %r
ret i8 %phys
}


; Single registers for vector types that fit are fine.

define void @inline_asm_2xi16_in_v_use(<2 x i16> %val) {
; CHECK-LABEL: inline_asm_2xi16_in_v_use:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v8, v0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use v8
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use v0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; use $0", "{v8}"(<2 x i16> %val)
call void asm sideeffect "; use $0", "v"(<2 x i16> %val)
ret void
}

define <2 x i16> @inline_asm_2xi16_in_v_def() {
; CHECK-LABEL: inline_asm_2xi16_in_v_def:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v8
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_and_b32_e32 v0, v8, v0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%phys = call <2 x i16> asm sideeffect "; def $0", "={v8}"()
%virt = call <2 x i16> asm sideeffect "; def $0", "=v"()
%r = and <2 x i16> %phys, %virt
ret <2 x i16> %r
}

define <2 x i16> @inline_asm_2xi16_in_s_def() {
; CHECK-LABEL: inline_asm_2xi16_in_s_def:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s8
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s4
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_and_b32 s4, s8, s4
; CHECK-NEXT: v_mov_b32_e32 v0, s4
; CHECK-NEXT: s_setpc_b64 s[30:31]
%phys = call <2 x i16> asm sideeffect "; def $0", "={s8}"()
%virt = call <2 x i16> asm sideeffect "; def $0", "=s"()
%r = and <2 x i16> %phys, %virt
ret <2 x i16> %r
}

0 comments on commit 3ba4092

Please sign in to comment.