Replace more uses of sse41 with sse4.1.

llc using the host cpu features and *waning* on unknown features is probably not a good thing :-( git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189144 91177308-0d34-0410-b5e6-96231b3b80d8
2025-02-03 02:53:06 +00:00 · 2013-08-23 20:39:19 +00:00 · 2013-08-23 20:39:19 +00:00 · 7cb1b5f5bf
commit 7cb1b5f5bf
parent 56afd5e90b
39 changed files with 46 additions and 46 deletions
--- a/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
+++ b/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -mattr=+sse41
+; RUN: llc < %s -mattr=+sse4.1
 ; rdar://5886601
 ; gcc testsuite:  gcc.target/i386/sse4_1-pblendw.c
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@ -1,6 +1,6 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "4 machine-licm"
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "4 machine-licm"
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn | FileCheck %s
 ; rdar://6627786
 ; rdar://7792037

--- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
+++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
-; RUN:     -mcpu=generic -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
+; RUN:     -mcpu=generic -disable-fp-elim -mattr=-sse4.1,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
 ; RUN:   FileCheck %s
 ; rdar://6808032

--- a/test/CodeGen/X86/2011-09-18-sse2cmp.ll
+++ b/test/CodeGen/X86/2011-09-18-sse2cmp.ll
@ -1,4 +1,4 @@
-;RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse41 | FileCheck %s
+;RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse4.1 | FileCheck %s

 ;CHECK: @max
 ;CHECK: cmplepd
--- a/test/CodeGen/X86/2011-09-21-setcc-bug.ll
+++ b/test/CodeGen/X86/2011-09-21-setcc-bug.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse4.1

 ; Make sure we are not crashing on this code.

--- a/test/CodeGen/X86/2011-10-11-srl.ll
+++ b/test/CodeGen/X86/2011-10-11-srl.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=-sse41 
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=-sse4.1

 target triple = "x86_64-unknown-linux-gnu"

--- a/test/CodeGen/X86/2011-12-15-vec_shift.ll
+++ b/test/CodeGen/X86/2011-12-15-vec_shift.ll
@ -1,5 +1,5 @@
-; RUN: llc -march=x86-64 -mattr=+sse41 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
-; RUN: llc -march=x86-64 -mattr=-sse41 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
+; RUN: llc -march=x86-64 -mattr=+sse4.1 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
+; RUN: llc -march=x86-64 -mattr=-sse4.1 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
 ; Test case for r146671
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.7"
--- a/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
+++ b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 -mattr=-sse42,+sse41 < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mattr=-sse42,+sse4.1 < %s | FileCheck %s
 ; Make sure we don't load from the location pointed to by %p
 ; twice: it has non-obvious performance implications, and
 ; the relevant transformation doesn't know how to update
--- a/test/CodeGen/X86/blend-msb.ll
+++ b/test/CodeGen/X86/blend-msb.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s


 ; In this test we check that sign-extend of the mask bit is performed by
--- a/test/CodeGen/X86/crash-nosse.ll
+++ b/test/CodeGen/X86/crash-nosse.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=corei7 -mattr=-sse2,-sse41 -verify-machineinstrs
+; RUN: llc < %s -mcpu=corei7 -mattr=-sse2,-sse4.1 -verify-machineinstrs
 target triple = "x86_64-unknown-linux-gnu"

 ; PR10503
--- a/test/CodeGen/X86/floor-soft-float.ll
+++ b/test/CodeGen/X86/floor-soft-float.ll
@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx -soft-float=0 | FileCheck %s --check-prefix=CHECK-HARD-FLOAT
-; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx -soft-float=1 | FileCheck %s --check-prefix=CHECK-SOFT-FLOAT
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.1,-avx -soft-float=0 | FileCheck %s --check-prefix=CHECK-HARD-FLOAT
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.1,-avx -soft-float=1 | FileCheck %s --check-prefix=CHECK-SOFT-FLOAT

 target triple = "x86_64-unknown-linux-gnu"

--- a/test/CodeGen/X86/fold-load-vec.ll
+++ b/test/CodeGen/X86/fold-load-vec.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s

 ; rdar://12721174
 ; We should not fold movss into pshufd since pshufd expects m128 while movss
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X64

 define i32 @test1() nounwind readonly {
 entry:
--- a/test/CodeGen/X86/neg_fp.ll
+++ b/test/CodeGen/X86/neg_fp.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
+; RUN: llc < %s -march=x86 -mattr=+sse4.1 -o %t
 ; RUN: grep xorps %t | count 1

 ; Test that when we don't -enable-unsafe-fp-math, we don't do the optimization
--- a/test/CodeGen/X86/opt-shuff-tstore.ll
+++ b/test/CodeGen/X86/opt-shuff-tstore.ll
@ -1,4 +1,4 @@
-; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s  -mattr=+sse2,+sse41 | FileCheck %s
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s  -mattr=+sse2,+sse4.1 | FileCheck %s

 ; CHECK: func_4_8
 ; A single memory write
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -mcpu=nehalem -stack-alignment=16 > %t
+; RUN: llc < %s -march=x86 -mattr=sse4.1 -mcpu=nehalem -stack-alignment=16 > %t
 ; RUN: grep pmul %t | count 12
 ; RUN: grep mov %t | count 11

--- a/test/CodeGen/X86/pmulld.ll
+++ b/test/CodeGen/X86/pmulld.ll
@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse41 -asm-verbose=0 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse41 -asm-verbose=0 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse4.1 -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse4.1 -asm-verbose=0 | FileCheck %s -check-prefix=WIN64

 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
 ; CHECK-LABEL: test1:
--- a/test/CodeGen/X86/pr10523.ll
+++ b/test/CodeGen/X86/pr10523.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1

 ; No check in a crash test

--- a/test/CodeGen/X86/pr10524.ll
+++ b/test/CodeGen/X86/pr10524.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1

 ; No check in a crash test

--- a/test/CodeGen/X86/pr10525.ll
+++ b/test/CodeGen/X86/pr10525.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1

 ; No check in a crash test

--- a/test/CodeGen/X86/pr10526.ll
+++ b/test/CodeGen/X86/pr10526.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1

 ; No check in a crash test

--- a/test/CodeGen/X86/pr12312.ll
+++ b/test/CodeGen/X86/pr12312.ll
@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse41,-avx < %s | FileCheck %s --check-prefix SSE41
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1,-avx < %s | FileCheck %s --check-prefix SSE41
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx,-avx2 < %s | FileCheck %s --check-prefix AVX

 define i32 @veccond128(<4 x i32> %input) {
--- a/test/CodeGen/X86/rounding-ops.ll
+++ b/test/CodeGen/X86/rounding-ops.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse41 | FileCheck -check-prefix=CHECK-SSE %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse4.1 | FileCheck -check-prefix=CHECK-SSE %s
 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx | FileCheck -check-prefix=CHECK-AVX %s

 define float @test1(float %x) nounwind  {
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse4.1 | FileCheck %s

 ; CHECK: vsel_float
 ; CHECK: pandn
--- a/test/CodeGen/X86/sse41-blend.ll
+++ b/test/CodeGen/X86/sse41-blend.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s

 ;CHECK-LABEL: vsel_float:
 ;CHECK: blendvps
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse41 -mcpu=penryn | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse41 -mcpu=penryn | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s -check-prefix=X64

@g16 = external global i16

--- a/test/CodeGen/X86/trunc-ext-ld-st.ll
+++ b/test/CodeGen/X86/trunc-ext-ld-st.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s

 ;CHECK-LABEL: load_2_i8:
 ; A single 16-bit load
--- a/test/CodeGen/X86/vec_extract-sse4.ll
+++ b/test/CodeGen/X86/vec_extract-sse4.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse41 -o %t
+; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse4.1 -o %t
 ; RUN: not grep extractps   %t
 ; RUN: not grep pextrd      %t
 ; RUN: not grep pshufd  %t
--- a/test/CodeGen/X86/vec_fpext.ll
+++ b/test/CodeGen/X86/vec_fpext.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.1,-avx | FileCheck %s
 ; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s

 ; PR11674
--- a/test/CodeGen/X86/vec_insert-8.ll
+++ b/test/CodeGen/X86/vec_insert-8.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
+; RUN: llc < %s -march=x86 -mattr=+sse4.1 -o %t

 ; tests variable insert and extract of a 4 x i32

--- a/test/CodeGen/X86/vec_insert-9.ll
+++ b/test/CodeGen/X86/vec_insert-9.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse4.1 > %t
 ; RUN: grep pinsrd %t | count 1

 define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind  {
--- a/test/CodeGen/X86/vec_shift4.ll
+++ b/test/CodeGen/X86/vec_shift4.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.1 | FileCheck %s

 define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
 entry:
--- a/test/CodeGen/X86/vec_shuffle-25.ll
+++ b/test/CodeGen/X86/vec_shuffle-25.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: llc < %s -march=x86 -mattr=sse4.1 -o %t
 ; RUN: grep unpcklps %t | count 3
 ; RUN: grep unpckhps %t | count 1
 
--- a/test/CodeGen/X86/vec_shuffle-27.ll
+++ b/test/CodeGen/X86/vec_shuffle-27.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse4.1 | FileCheck %s

 ; ModuleID = 'vec_shuffle-27.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
--- a/test/CodeGen/X86/vec_shuffle-36.ll
+++ b/test/CodeGen/X86/vec_shuffle-36.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=sse41 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=sse4.1 | FileCheck %s

 define <8 x i16> @shuf6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 ; CHECK: pshufb
--- a/test/CodeGen/X86/vec_splat-3.ll
+++ b/test/CodeGen/X86/vec_splat-3.ll
@ -1,4 +1,4 @@
-; RUN: llc <%s -march=x86 -mcpu=penryn -mattr=sse41 | FileCheck %s
+; RUN: llc <%s -march=x86 -mcpu=penryn -mattr=sse4.1 | FileCheck %s

 ; Splat test for v8i16
 define <8 x i16> @shuf_8i16_0(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
--- a/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse,+sse2,+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2,+sse4.1 | FileCheck %s

 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8.7.2"
--- a/test/CodeGen/X86/vector-variable-idx2.ll
+++ b/test/CodeGen/X86/vector-variable-idx2.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse41
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.1

 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin11.0.0"
--- a/test/CodeGen/X86/widen_cast-6.ll
+++ b/test/CodeGen/X86/widen_cast-6.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.1 | FileCheck %s
 ; CHECK: movd

 ; Test bit convert that requires widening in the operand.