mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-13 23:18:58 +00:00
[DAGCombine] Catch the case where extract_vector_elt can cause an any_ext while processing AND SDNodes
Summary: extract_vector_elt can cause an implicit any_ext if the types don't match. When processing the following pattern: (and (extract_vector_elt (load ([non_ext|any_ext|zero_ext] V))), c) DAGCombine was ignoring the possible extend, and sometimes removing the AND even though it was required to maintain some of the bits in the result to 0, resulting in a miscompile. This change fixes the issue by limiting the transformation only to cases where the extract_vector_elt doesn't perform the implicit extend. Reviewers: t.p.northover, jmolloy Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D18247 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@263935 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ff9be37421
commit
bec2ec108f
@ -3089,6 +3089,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
|
||||
// the 'X' node here can either be nothing or an extract_vector_elt to catch
|
||||
// more cases.
|
||||
if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
|
||||
N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
|
||||
N0.getOperand(0).getOpcode() == ISD::LOAD) ||
|
||||
N0.getOpcode() == ISD::LOAD) {
|
||||
LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
|
||||
|
@ -19,12 +19,48 @@ define float @f(<4 x i16>* nocapture %in) {
|
||||
ret float %7
|
||||
}
|
||||
|
||||
; CHECK-LABEL: g:
|
||||
define float @g(<4 x i16>* nocapture %in) {
|
||||
; CHECK: vldr
|
||||
%1 = load <4 x i16>, <4 x i16>* %in
|
||||
; CHECK-NOT: uxth
|
||||
|
||||
; For now we're generating a vmov.16 and a uxth instruction.
|
||||
; The uxth is redundant, and we should be able to extend without
|
||||
; having to generate cross-domain copies. Once we can do this
|
||||
; we should modify the checks below.
|
||||
|
||||
; CHECK: uxth
|
||||
%2 = extractelement <4 x i16> %1, i32 0
|
||||
; CHECK: vcvt.f32.u32
|
||||
%3 = uitofp i16 %2 to float
|
||||
ret float %3
|
||||
}
|
||||
|
||||
; The backend generates for the following code an
|
||||
; (and 0xff (i32 extract_vector_elt (zext load <4 x i8> to 4 x i16)))
|
||||
;
|
||||
; The and is not redundant and cannot be removed. Since
|
||||
; extract_vector_elt is doing an implicit any_ext, the and
|
||||
; is required to guarantee that the top bits are set to zero.
|
||||
|
||||
; Ideally should be a zext from <4 x i8> to <4 x 32>.
|
||||
|
||||
; CHECK-LABEL: h:
|
||||
; CHECK: vld1.32
|
||||
; CHECK: uxtb
|
||||
define <4 x i32> @h(<4 x i8> *%in) {
|
||||
%1 = load <4 x i8>, <4 x i8>* %in, align 4
|
||||
%2 = extractelement <4 x i8> %1, i32 0
|
||||
%3 = zext i8 %2 to i32
|
||||
%4 = insertelement <4 x i32> undef, i32 %3, i32 0
|
||||
%5 = extractelement <4 x i8> %1, i32 1
|
||||
%6 = zext i8 %5 to i32
|
||||
%7 = insertelement <4 x i32> %4, i32 %6, i32 1
|
||||
%8 = extractelement <4 x i8> %1, i32 2
|
||||
%9 = zext i8 %8 to i32
|
||||
%10 = insertelement <4 x i32> %7, i32 %9, i32 2
|
||||
%11 = extractelement <4 x i8> %1, i32 3
|
||||
%12 = zext i8 %11 to i32
|
||||
%13 = insertelement <4 x i32> %10, i32 %12, i32 3
|
||||
ret <4 x i32> %13
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user