AMDGPU: Fix sext_inreg for i1 in i16

This produces worse code when i16 is legal, mostly
due to combines getting confused by conversions inserted
for uniform 16-bit operations.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291717 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2017-01-11 22:35:22 +00:00
parent 8c7e9845cf
commit fac51240d9
2 changed files with 138 additions and 0 deletions

View File

@ -870,6 +870,11 @@ def : Pat <
(S_BFE_I64 i64:$src, (i32 0x10000)) // 0 | 1 << 16
>;
def : Pat <
(i16 (sext_inreg i16:$src, i1)),
(S_BFE_I32 $src, (i32 0x00010000)) // 0 | 1 << 16
>;
def : Pat <
(i16 (sext_inreg i16:$src, i8)),
(S_BFE_I32 $src, (i32 0x80000)) // 0 | 8 << 16

View File

@ -2,6 +2,8 @@
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FIXME: i16 promotion pass ruins the scalar cases when legal.
; FUNC-LABEL: {{^}}sext_in_reg_i1_i32:
; GCN: s_load_dword [[ARG:s[0-9]+]],
; GCN: s_bfe_i32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
@ -659,6 +661,137 @@ define void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrs
ret void
}
; FUNC-LABEL: {{^}}s_sext_in_reg_i1_i16:
; GCN: s_load_dword [[VAL:s[0-9]+]]
; SI: s_bfe_i32 [[BFE:s[0-9]+]], [[VAL]], 0x10000
; SI: v_mov_b32_e32 [[VBFE:v[0-9]+]], [[BFE]]
; SI: buffer_store_short [[VBFE]]
; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
define void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
%ld = load i32, i32 addrspace(2)* %ptr
%in = trunc i32 %ld to i16
%shl = shl i16 %in, 15
%sext = ashr i16 %shl, 15
store i16 %sext, i16 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}s_sext_in_reg_i2_i16:
; GCN: s_load_dword [[VAL:s[0-9]+]]
; SI: s_bfe_i32 [[BFE:s[0-9]+]], [[VAL]], 0x20000
; SI: v_mov_b32_e32 [[VBFE:v[0-9]+]], [[BFE]]
; SI: buffer_store_short [[VBFE]]
; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
define void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
%ld = load i32, i32 addrspace(2)* %ptr
%in = trunc i32 %ld to i16
%shl = shl i16 %in, 14
%sext = ashr i16 %shl, 14
store i16 %sext, i16 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}v_sext_in_reg_i1_i16:
; GCN: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]]
; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[VAL]], 0, 1{{$}}
; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]]
define void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addrspace(1)* %ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
%gep = getelementptr i16, i16 addrspace(1)* %ptr, i32 %tid
%out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid
%in = load i16, i16 addrspace(1)* %gep
%shl = shl i16 %in, 15
%sext = ashr i16 %shl, 15
store i16 %sext, i16 addrspace(3)* %out.gep
ret void
}
; FUNC-LABEL: {{^}}v_sext_in_reg_i1_i16_nonload:
; GCN: {{buffer|flat}}_load_ushort [[VAL0:v[0-9]+]]
; GCN: {{buffer|flat}}_load_ushort [[VAL1:v[0-9]+]]
; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], [[VAL1]], [[VAL0]]
; VI: v_lshlrev_b16_e32 [[REG:v[0-9]+]], [[VAL1]], [[VAL0]]
; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[REG]], 0, 1{{$}}
; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]]
define void @v_sext_in_reg_i1_i16_nonload(i16 addrspace(3)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 %s.val) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%a.gep = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
%out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid
%a = load volatile i16, i16 addrspace(1)* %a.gep, align 2
%b = load volatile i16, i16 addrspace(1)* %b.gep, align 2
%c = shl i16 %a, %b
%shl = shl i16 %c, 15
%ashr = ashr i16 %shl, 15
store i16 %ashr, i16 addrspace(3)* %out.gep, align 2
ret void
}
; FUNC-LABEL: {{^}}s_sext_in_reg_i2_i16_arg:
; GCN: s_load_dword [[VAL:s[0-9]+]]
; SI: s_bfe_i32 [[BFE:s[0-9]+]], [[VAL]], 0x20000
; SI: v_mov_b32_e32 [[VBFE:v[0-9]+]], [[BFE]]
; SI: buffer_store_short [[VBFE]]
; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}}
; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}}
define void @s_sext_in_reg_i2_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
%shl = shl i16 %in, 14
%sext = ashr i16 %shl, 14
store i16 %sext, i16 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}s_sext_in_reg_i8_i16_arg:
; GCN: s_load_dword [[VAL:s[0-9]+]]
; SI: s_sext_i32_i8 [[SSEXT:s[0-9]+]], [[VAL]]
; SI: v_mov_b32_e32 [[VSEXT:v[0-9]+]], [[SSEXT]]
; SI: buffer_store_short [[VBFE]]
; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}}
; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}}
define void @s_sext_in_reg_i8_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
%shl = shl i16 %in, 8
%sext = ashr i16 %shl, 8
store i16 %sext, i16 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}s_sext_in_reg_i15_i16_arg:
; GCN: s_load_dword [[VAL:s[0-9]+]]
; SI: s_bfe_i32 [[BFE:s[0-9]+]], [[VAL]], 0xf0000
; SI: v_mov_b32_e32 [[VBFE:v[0-9]+]], [[BFE]]
; SI: buffer_store_short [[VBFE]]
; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}}
; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}}
define void @s_sext_in_reg_i15_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
%shl = shl i16 %in, 1
%sext = ashr i16 %shl, 1
store i16 %sext, i16 addrspace(1)* %out
ret void
}
declare i32 @llvm.r600.read.tidig.x() #1
attributes #0 = { nounwind }