llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll
David Blaikie 5a70dd1d82 [opaque pointer type] Add textual IR support for explicit type parameter to gep operator
Similar to gep (r230786) and load (r230794) changes.

Similar migration script can be used to update test cases, which
successfully migrated all of LLVM and Polly, but about 4 test cases
needed manually changes in Clang.

(this script will read the contents of stdin and massage it into stdout
- wrap it in the 'apply.sh' script shown in previous commits + xargs to
apply it over a large set of test cases)

import fileinput
import sys
import re

rep = re.compile(r"(getelementptr(?:\s+inbounds)?\s*\()((<\d*\s+x\s+)?([^@]*?)(|\s*addrspace\(\d+\))\s*\*(?(3)>)\s*)(?=$|%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|zeroinitializer|<|\[\[[a-zA-Z]|\{\{)", re.MULTILINE | re.DOTALL)

def conv(match):
  line = match.group(1)
  line += match.group(4)
  line += ", "
  line += match.group(2)
  return line

line = sys.stdin.read()
off = 0
for match in re.finditer(rep, line):
  sys.stdout.write(line[off:match.start()])
  sys.stdout.write(conv(match))
  off = match.end()
sys.stdout.write(line[off:])

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232184 91177308-0d34-0410-b5e6-96231b3b80d8
2015-03-13 18:20:45 +00:00

602 lines
23 KiB
LLVM

; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
; rdar://9428579
%type1 = type { <16 x i8> }
%type2 = type { <8 x i8> }
%type3 = type { <4 x i16> }
define hidden fastcc void @t1(%type1** %argtable) nounwind {
entry:
; CHECK-LABEL: t1:
; CHECK: ldr x[[REG:[0-9]+]], [x0]
; CHECK: str q0, [x[[REG]]]
%tmp1 = load %type1*, %type1** %argtable, align 8
%tmp2 = getelementptr inbounds %type1, %type1* %tmp1, i64 0, i32 0
store <16 x i8> zeroinitializer, <16 x i8>* %tmp2, align 16
ret void
}
define hidden fastcc void @t2(%type2** %argtable) nounwind {
entry:
; CHECK-LABEL: t2:
; CHECK: ldr x[[REG:[0-9]+]], [x0]
; CHECK: str d0, [x[[REG]]]
%tmp1 = load %type2*, %type2** %argtable, align 8
%tmp2 = getelementptr inbounds %type2, %type2* %tmp1, i64 0, i32 0
store <8 x i8> zeroinitializer, <8 x i8>* %tmp2, align 8
ret void
}
; add a bunch of tests for rdar://11246289
@globalArray64x2 = common global <2 x i64>* null, align 8
@globalArray32x4 = common global <4 x i32>* null, align 8
@globalArray16x8 = common global <8 x i16>* null, align 8
@globalArray8x16 = common global <16 x i8>* null, align 8
@globalArray64x1 = common global <1 x i64>* null, align 8
@globalArray32x2 = common global <2 x i32>* null, align 8
@globalArray16x4 = common global <4 x i16>* null, align 8
@globalArray8x8 = common global <8 x i8>* null, align 8
@floatglobalArray64x2 = common global <2 x double>* null, align 8
@floatglobalArray32x4 = common global <4 x float>* null, align 8
@floatglobalArray64x1 = common global <1 x double>* null, align 8
@floatglobalArray32x2 = common global <2 x float>* null, align 8
define void @fct1_64x2(<2 x i64>* nocapture %array, i64 %offset) nounwind ssp {
entry:
; CHECK-LABEL: fct1_64x2:
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
%arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 %offset
%tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16
%tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8
%arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 %offset
store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
ret void
}
define void @fct2_64x2(<2 x i64>* nocapture %array) nounwind ssp {
entry:
; CHECK-LABEL: fct2_64x2:
; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
%arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 3
%tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16
%tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8
%arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 5
store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
ret void
}
define void @fct1_32x4(<4 x i32>* nocapture %array, i64 %offset) nounwind ssp {
entry:
; CHECK-LABEL: fct1_32x4:
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
%arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 %offset
%tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16
%tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8
%arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 %offset
store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
ret void
}
define void @fct2_32x4(<4 x i32>* nocapture %array) nounwind ssp {
entry:
; CHECK-LABEL: fct2_32x4:
; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
%arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 3
%tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16
%tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8
%arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 5
store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
ret void
}
define void @fct1_16x8(<8 x i16>* nocapture %array, i64 %offset) nounwind ssp {
entry:
; CHECK-LABEL: fct1_16x8:
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
%arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 %offset
%tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16
%tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8
%arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 %offset
store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
ret void
}
define void @fct2_16x8(<8 x i16>* nocapture %array) nounwind ssp {
entry:
; CHECK-LABEL: fct2_16x8:
; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
%arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 3
%tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16
%tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8
%arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 5
store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
ret void
}
define void @fct1_8x16(<16 x i8>* nocapture %array, i64 %offset) nounwind ssp {
entry:
; CHECK-LABEL: fct1_8x16:
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
%arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 %offset
%tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16
%tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8
%arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 %offset
store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
ret void
}
define void @fct2_8x16(<16 x i8>* nocapture %array) nounwind ssp {
entry:
; CHECK-LABEL: fct2_8x16:
; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
%arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 3
%tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16
%tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8
%arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 5
store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
ret void
}
define void @fct1_64x1(<1 x i64>* nocapture %array, i64 %offset) nounwind ssp {
entry:
; CHECK-LABEL: fct1_64x1:
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
%arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 %offset
%tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8
%tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8
%arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 %offset
store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
ret void
}
define void @fct2_64x1(<1 x i64>* nocapture %array) nounwind ssp {
entry:
; CHECK-LABEL: fct2_64x1:
; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
%arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 3
%tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8
%tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8
%arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 5
store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
ret void
}
define void @fct1_32x2(<2 x i32>* nocapture %array, i64 %offset) nounwind ssp {
entry:
; CHECK-LABEL: fct1_32x2:
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
%arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 %offset
%tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8
%tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8
%arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 %offset
store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
ret void
}
define void @fct2_32x2(<2 x i32>* nocapture %array) nounwind ssp {
entry:
; CHECK-LABEL: fct2_32x2:
; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
%arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 3
%tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8
%tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8
%arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 5
store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
ret void
}
define void @fct1_16x4(<4 x i16>* nocapture %array, i64 %offset) nounwind ssp {
entry:
; CHECK-LABEL: fct1_16x4:
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
%arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 %offset
%tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8
%tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8
%arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 %offset
store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
ret void
}
define void @fct2_16x4(<4 x i16>* nocapture %array) nounwind ssp {
entry:
; CHECK-LABEL: fct2_16x4:
; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
%arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 3
%tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8
%tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8
%arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 5
store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
ret void
}
define void @fct1_8x8(<8 x i8>* nocapture %array, i64 %offset) nounwind ssp {
entry:
; CHECK-LABEL: fct1_8x8:
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
%arrayidx = getelementptr inbounds <8 x i8>, <8 x i8>* %array, i64 %offset
%tmp = load <8 x i8>, <8 x i8>* %arrayidx, align 8
%tmp1 = load <8 x i8>*, <8 x i8>** @globalArray8x8, align 8
%arrayidx1 = getelementptr inbounds <8 x i8>, <8 x i8>* %tmp1, i64 %offset
store <8 x i8> %tmp, <8 x i8>* %arrayidx1, align 8
ret void
}
; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
; registers for unscaled vector accesses
@str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1
define <1 x i64> @fct0() nounwind readonly ssp {
entry:
; CHECK-LABEL: fct0:
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
%0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
ret <1 x i64> %0
}
define <2 x i32> @fct1() nounwind readonly ssp {
entry:
; CHECK-LABEL: fct1:
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
%0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
ret <2 x i32> %0
}
define <4 x i16> @fct2() nounwind readonly ssp {
entry:
; CHECK-LABEL: fct2:
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
%0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
ret <4 x i16> %0
}
define <8 x i8> @fct3() nounwind readonly ssp {
entry:
; CHECK-LABEL: fct3:
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
%0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
ret <8 x i8> %0
}
define <2 x i64> @fct4() nounwind readonly ssp {
entry:
; CHECK-LABEL: fct4:
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
%0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
ret <2 x i64> %0
}
define <4 x i32> @fct5() nounwind readonly ssp {
entry:
; CHECK-LABEL: fct5:
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
%0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
ret <4 x i32> %0
}
define <8 x i16> @fct6() nounwind readonly ssp {
entry:
; CHECK-LABEL: fct6:
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
%0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
ret <8 x i16> %0
}
define <16 x i8> @fct7() nounwind readonly ssp {
entry:
; CHECK-LABEL: fct7:
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
%0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
ret <16 x i8> %0
}
define void @fct8() nounwind ssp {
entry:
; CHECK-LABEL: fct8:
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
%0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8
ret void
}
define void @fct9() nounwind ssp {
entry:
; CHECK-LABEL: fct9:
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
%0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8
ret void
}
define void @fct10() nounwind ssp {
entry:
; CHECK-LABEL: fct10:
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
%0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8
ret void
}
define void @fct11() nounwind ssp {
entry:
; CHECK-LABEL: fct11:
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
%0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8
ret void
}
define void @fct12() nounwind ssp {
entry:
; CHECK-LABEL: fct12:
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
%0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16
ret void
}
define void @fct13() nounwind ssp {
entry:
; CHECK-LABEL: fct13:
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
%0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16
ret void
}
define void @fct14() nounwind ssp {
entry:
; CHECK-LABEL: fct14:
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
%0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16
ret void
}
define void @fct15() nounwind ssp {
entry:
; CHECK-LABEL: fct15:
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
%0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16
ret void
}
; Check the building of vector from a single loaded value.
; Part of <rdar://problem/14170854>
;
; Single loads with immediate offset.
define <8 x i8> @fct16(i8* nocapture %sp0) {
; CHECK-LABEL: fct16:
; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
entry:
%addr = getelementptr i8, i8* %sp0, i64 1
%pix_sp0.0.copyload = load i8, i8* %addr, align 1
%vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <8 x i8> %vec, %vec
ret <8 x i8> %vmull.i
}
define <16 x i8> @fct17(i8* nocapture %sp0) {
; CHECK-LABEL: fct17:
; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
entry:
%addr = getelementptr i8, i8* %sp0, i64 1
%pix_sp0.0.copyload = load i8, i8* %addr, align 1
%vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <16 x i8> %vec, %vec
ret <16 x i8> %vmull.i
}
define <4 x i16> @fct18(i16* nocapture %sp0) {
; CHECK-LABEL: fct18:
; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
entry:
%addr = getelementptr i16, i16* %sp0, i64 1
%pix_sp0.0.copyload = load i16, i16* %addr, align 1
%vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <4 x i16> %vec, %vec
ret <4 x i16> %vmull.i
}
define <8 x i16> @fct19(i16* nocapture %sp0) {
; CHECK-LABEL: fct19:
; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
entry:
%addr = getelementptr i16, i16* %sp0, i64 1
%pix_sp0.0.copyload = load i16, i16* %addr, align 1
%vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <8 x i16> %vec, %vec
ret <8 x i16> %vmull.i
}
define <2 x i32> @fct20(i32* nocapture %sp0) {
; CHECK-LABEL: fct20:
; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
entry:
%addr = getelementptr i32, i32* %sp0, i64 1
%pix_sp0.0.copyload = load i32, i32* %addr, align 1
%vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <2 x i32> %vec, %vec
ret <2 x i32> %vmull.i
}
define <4 x i32> @fct21(i32* nocapture %sp0) {
; CHECK-LABEL: fct21:
; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
entry:
%addr = getelementptr i32, i32* %sp0, i64 1
%pix_sp0.0.copyload = load i32, i32* %addr, align 1
%vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <4 x i32> %vec, %vec
ret <4 x i32> %vmull.i
}
define <1 x i64> @fct22(i64* nocapture %sp0) {
; CHECK-LABEL: fct22:
; CHECK: ldr d0, [x0, #8]
entry:
%addr = getelementptr i64, i64* %sp0, i64 1
%pix_sp0.0.copyload = load i64, i64* %addr, align 1
%vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
ret <1 x i64> %vec
}
define <2 x i64> @fct23(i64* nocapture %sp0) {
; CHECK-LABEL: fct23:
; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
entry:
%addr = getelementptr i64, i64* %sp0, i64 1
%pix_sp0.0.copyload = load i64, i64* %addr, align 1
%vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
ret <2 x i64> %vec
}
;
; Single loads with register offset.
define <8 x i8> @fct24(i8* nocapture %sp0, i64 %offset) {
; CHECK-LABEL: fct24:
; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
entry:
%addr = getelementptr i8, i8* %sp0, i64 %offset
%pix_sp0.0.copyload = load i8, i8* %addr, align 1
%vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <8 x i8> %vec, %vec
ret <8 x i8> %vmull.i
}
define <16 x i8> @fct25(i8* nocapture %sp0, i64 %offset) {
; CHECK-LABEL: fct25:
; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
entry:
%addr = getelementptr i8, i8* %sp0, i64 %offset
%pix_sp0.0.copyload = load i8, i8* %addr, align 1
%vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <16 x i8> %vec, %vec
ret <16 x i8> %vmull.i
}
define <4 x i16> @fct26(i16* nocapture %sp0, i64 %offset) {
; CHECK-LABEL: fct26:
; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
entry:
%addr = getelementptr i16, i16* %sp0, i64 %offset
%pix_sp0.0.copyload = load i16, i16* %addr, align 1
%vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <4 x i16> %vec, %vec
ret <4 x i16> %vmull.i
}
define <8 x i16> @fct27(i16* nocapture %sp0, i64 %offset) {
; CHECK-LABEL: fct27:
; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
entry:
%addr = getelementptr i16, i16* %sp0, i64 %offset
%pix_sp0.0.copyload = load i16, i16* %addr, align 1
%vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <8 x i16> %vec, %vec
ret <8 x i16> %vmull.i
}
define <2 x i32> @fct28(i32* nocapture %sp0, i64 %offset) {
; CHECK-LABEL: fct28:
; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
entry:
%addr = getelementptr i32, i32* %sp0, i64 %offset
%pix_sp0.0.copyload = load i32, i32* %addr, align 1
%vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <2 x i32> %vec, %vec
ret <2 x i32> %vmull.i
}
define <4 x i32> @fct29(i32* nocapture %sp0, i64 %offset) {
; CHECK-LABEL: fct29:
; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
entry:
%addr = getelementptr i32, i32* %sp0, i64 %offset
%pix_sp0.0.copyload = load i32, i32* %addr, align 1
%vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <4 x i32> %vec, %vec
ret <4 x i32> %vmull.i
}
define <1 x i64> @fct30(i64* nocapture %sp0, i64 %offset) {
; CHECK-LABEL: fct30:
; CHECK: ldr d0, [x0, x1, lsl #3]
entry:
%addr = getelementptr i64, i64* %sp0, i64 %offset
%pix_sp0.0.copyload = load i64, i64* %addr, align 1
%vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
ret <1 x i64> %vec
}
define <2 x i64> @fct31(i64* nocapture %sp0, i64 %offset) {
; CHECK-LABEL: fct31:
; CHECK: ldr d0, [x0, x1, lsl #3]
entry:
%addr = getelementptr i64, i64* %sp0, i64 %offset
%pix_sp0.0.copyload = load i64, i64* %addr, align 1
%vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
ret <2 x i64> %vec
}