llvm/test/CodeGen/X86/insertps-O0-bug.ll
David Blaikie 7c9c6ed761 [opaque pointer type] Add textual IR support for explicit type parameter to load instruction
Essentially the same as the GEP change in r230786.

A similar migration script can be used to update test cases, though a few more
test case improvements/changes were required this time around: (r229269-r229278)

import fileinput
import sys
import re

pat = re.compile(r"((?:=|:|^)\s*load (?:atomic )?(?:volatile )?(.*?))(| addrspace\(\d+\) *)\*($| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$)")

for line in sys.stdin:
  sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line))

Reviewers: rafael, dexonsmith, grosser

Differential Revision: http://reviews.llvm.org/D7649

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230794 91177308-0d34-0410-b5e6-96231b3b80d8
2015-02-27 21:17:42 +00:00

53 lines
2.1 KiB
LLVM

; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -O0 < %s | FileCheck %s
; Check that at -O0, the backend doesn't attempt to canonicalize a vector load
; used by an INSERTPS into a scalar load plus scalar_to_vector.
;
; In order to fold a load into the memory operand of an INSERTPSrm, the backend
; tries to canonicalize a vector load in input to an INSERTPS node into a
; scalar load plus scalar_to_vector. This would allow ISel to match the
; INSERTPSrm variant rather than a load plus INSERTPSrr.
;
; However, ISel can only select an INSERTPSrm if folding a load into the operand
; of an insertps is considered to be profitable.
;
; In the example below:
;
; __m128 test(__m128 a, __m128 *b) {
; __m128 c = _mm_insert_ps(a, *b, 1 << 6);
; return c;
; }
;
; At -O0, the backend would attempt to canonicalize the load to 'b' into
; a scalar load in the hope of matching an INSERTPSrm.
; However, ISel would fail to recognize an INSERTPSrm since load folding is
; always considered unprofitable at -O0. This would leave the insertps mask
; in an invalid state.
;
; The problem with the canonicalization rule performed by the backend is that
; it assumes ISel to always be able to match an INSERTPSrm. This assumption is
; not always correct at -O0. In this example, FastISel fails to lower the
; arguments needed by the entry block. This is enough to enable the DAGCombiner
; and eventually trigger the canonicalization on the INSERTPS node.
;
; This test checks that the vector load in input to the insertps is not
; canonicalized into a scalar load plus scalar_to_vector (a movss).
define <4 x float> @test(<4 x float> %a, <4 x float>* %b) {
; CHECK-LABEL: test:
; CHECK: movaps (%rdi), [[REG:%[a-z0-9]+]]
; CHECK-NOT: movss
; CHECK: insertps $64, [[REG]],
; CHECK: ret
entry:
%0 = load <4 x float>, <4 x float>* %b, align 16
%1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %0, i32 64)
%2 = alloca <4 x float>, align 16
store <4 x float> %1, <4 x float>* %2, align 16
%3 = load <4 x float>, <4 x float>* %2, align 16
ret <4 x float> %3
}
declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32)