mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-26 20:57:15 +00:00
dc58d1e099
lowering to support both anyext and zext and to custom lower for many different microarchitectures. Using this allows us to get *exactly* the right code for zext and anyext shuffles in all the vector sizes. For v16i8, the improvement is *huge*. The new SSE2 test case added I refused to add before this because it was sooooo muny instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218143 91177308-0d34-0410-b5e6-96231b3b80d8
19 lines
610 B
LLVM
19 lines
610 B
LLVM
; RUN: llc < %s -mcpu=x86-64 -x86-experimental-vector-widening-legalization -x86-experimental-vector-shuffle-lowering | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-unknown-unknown"
|
|
|
|
define <4 x i32> @zext_v4i8_to_v4i32(<4 x i8>* %ptr) {
|
|
; CHECK-LABEL: zext_v4i8_to_v4i32:
|
|
;
|
|
; CHECK: movd (%{{.*}}), %[[X:xmm[0-9]+]]
|
|
; CHECK-NEXT: pxor %[[Z:xmm[0-9]+]], %[[Z]]
|
|
; CHECK-NEXT: punpcklbw %[[Z]], %[[X]]
|
|
; CHECK-NEXT: punpcklwd %[[Z]], %[[X]]
|
|
; CHECK-NEXT: ret
|
|
|
|
%val = load <4 x i8>* %ptr
|
|
%ext = zext <4 x i8> %val to <4 x i32>
|
|
ret <4 x i32> %ext
|
|
}
|