update this.

llvm-svn: 113116
This commit is contained in:
Chris Lattner 2010-09-05 20:22:09 +00:00
parent e54b9e2332
commit 908d8e9de2

View File

@ -20,7 +20,28 @@ __m128i shift_right(__m128i value, unsigned long offset) {
//===---------------------------------------------------------------------===//
SSE has instructions for doing operations on complex numbers, we should pattern
match them. Compiling this:
match them. For example, this should turn into a horizontal add:
typedef float __attribute__((vector_size(16))) v4f32;
float f32(v4f32 A) {
return A[0]+A[1]+A[2]+A[3];
}
Instead we get this:
_f32: ## @f32
pshufd $1, %xmm0, %xmm1 ## xmm1 = xmm0[1,0,0,0]
addss %xmm0, %xmm1
pshufd $3, %xmm0, %xmm2 ## xmm2 = xmm0[3,0,0,0]
movhlps %xmm0, %xmm0 ## xmm0 = xmm0[1,1]
movaps %xmm0, %xmm3
addss %xmm1, %xmm3
movdqa %xmm2, %xmm0
addss %xmm3, %xmm0
ret
Also, there are cases where some simple local SLP would improve codegen a bit.
compiling this:
_Complex float f32(_Complex float A, _Complex float B) {
return A+B;
@ -28,19 +49,17 @@ _Complex float f32(_Complex float A, _Complex float B) {
into:
_f32:
_f32: ## @f32
movdqa %xmm0, %xmm2
addss %xmm1, %xmm2
pshufd $16, %xmm2, %xmm2
pshufd $1, %xmm1, %xmm1
pshufd $1, %xmm0, %xmm0
addss %xmm1, %xmm0
pshufd $16, %xmm0, %xmm1
movdqa %xmm2, %xmm0
unpcklps %xmm1, %xmm0
pshufd $1, %xmm1, %xmm1 ## xmm1 = xmm1[1,0,0,0]
pshufd $1, %xmm0, %xmm3 ## xmm3 = xmm0[1,0,0,0]
addss %xmm1, %xmm3
movaps %xmm2, %xmm0
unpcklps %xmm3, %xmm0 ## xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
ret
seems silly.
seems silly when it could just be one addps.
//===---------------------------------------------------------------------===//