[CGP] use narrower types in memcmp expansion when possible

This only affects very small memcmp on x86 for now, but it
will become more important if we allow vector-sized load and
compares.

llvm-svn: 309711
This commit is contained in:
Sanjay Patel 2017-08-01 17:24:54 +00:00
parent 4f93d4b76f
commit 4d111202e7
4 changed files with 97 additions and 195 deletions

View File

@ -2271,8 +2271,12 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
return false;
}
// Early exit from expansion if size greater than max bytes to load.
// Scale the max size down if the target can load more bytes than we need.
uint64_t SizeVal = SizeCast->getZExtValue();
if (MaxLoadSize > SizeVal)
MaxLoadSize = 1 << SizeCast->getValue().logBase2();
// Calculate how many load pairs are needed for the constant size.
unsigned NumLoads = 0;
unsigned RemainingSize = SizeVal;
unsigned LoadSize = MaxLoadSize;
@ -2282,6 +2286,7 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
LoadSize = LoadSize / 2;
}
// Don't expand if this will require more loads than desired by the target.
if (NumLoads > TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) {
NumMemCmpGreaterThanMax++;
return false;

View File

@ -117,9 +117,7 @@ define i32 @length3(i8* %X, i8* %Y) nounwind optsize {
; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %dx
; X86-NEXT: rolw $8, %si
; X86-NEXT: movzwl %dx, %edx
; X86-NEXT: movzwl %si, %esi
; X86-NEXT: cmpl %esi, %edx
; X86-NEXT: cmpw %si, %dx
; X86-NEXT: jne .LBB4_1
; X86-NEXT: # BB#2: # %loadbb1
; X86-NEXT: movzbl 2(%eax), %eax
@ -131,7 +129,7 @@ define i32 @length3(i8* %X, i8* %Y) nounwind optsize {
; X86-NEXT: incl %ecx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: decl %eax
; X86-NEXT: cmpl %esi, %edx
; X86-NEXT: cmpw %si, %dx
; X86-NEXT: cmovael %ecx, %eax
; X86-NEXT: .LBB4_3: # %endblock
; X86-NEXT: popl %esi
@ -143,9 +141,7 @@ define i32 @length3(i8* %X, i8* %Y) nounwind optsize {
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: cmpw %cx, %ax
; X64-NEXT: jne .LBB4_1
; X64-NEXT: # BB#2: # %loadbb1
; X64-NEXT: movzbl 2(%rdi), %eax
@ -306,7 +302,7 @@ define i32 @length5(i8* %X, i8* %Y) nounwind optsize {
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax
; X64-NEXT: bswapl %ecx
; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB9_1
; X64-NEXT: # BB#2: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax

View File

@ -117,9 +117,7 @@ define i32 @length3(i8* %X, i8* %Y) nounwind {
; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %dx
; X86-NEXT: rolw $8, %si
; X86-NEXT: movzwl %dx, %edx
; X86-NEXT: movzwl %si, %esi
; X86-NEXT: cmpl %esi, %edx
; X86-NEXT: cmpw %si, %dx
; X86-NEXT: jne .LBB4_1
; X86-NEXT: # BB#2: # %loadbb1
; X86-NEXT: movzbl 2(%eax), %eax
@ -140,9 +138,7 @@ define i32 @length3(i8* %X, i8* %Y) nounwind {
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: cmpw %cx, %ax
; X64-NEXT: jne .LBB4_1
; X64-NEXT: # BB#2: # %loadbb1
; X64-NEXT: movzbl 2(%rdi), %eax
@ -299,7 +295,7 @@ define i32 @length5(i8* %X, i8* %Y) nounwind {
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax
; X64-NEXT: bswapl %ecx
; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB9_1
; X64-NEXT: # BB#2: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax

View File

@ -22,63 +22,32 @@ define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-LABEL: @cmp3(
; X32-NEXT: loadbb:
; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i16*
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i16*
; X32-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
; X32-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
; X32-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP6]], [[TMP7]]
; X32-NEXT: br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; X32: res_block:
; X32-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP6]], [[TMP7]]
; X32-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
; X32-NEXT: br label [[ENDBLOCK:%.*]]
; X32: loadbb1:
; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[X]], i8 2
; X32-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 2
; X32-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
; X32-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP12]]
; X32-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
; X32-NEXT: [[TMP16:%.*]] = zext i8 [[TMP14]] to i32
; X32-NEXT: [[TMP17:%.*]] = sub i32 [[TMP15]], [[TMP16]]
; X32-NEXT: br label [[ENDBLOCK]]
; X32: endblock:
; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP17]], [[LOADBB1]] ], [ [[TMP10]], [[RES_BLOCK]] ]
; X32-NEXT: ret i32 [[PHI_RES]]
;
; X64-LABEL: @cmp3(
; X64-NEXT: loadbb:
; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i16*
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i16*
; X64-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
; X64-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i64
; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i64
; X64-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]]
; X64-NEXT: br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; X64: res_block:
; X64-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb1:
; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[X]], i8 2
; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 2
; X64-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
; X64-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP12]]
; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
; X64-NEXT: [[TMP16:%.*]] = zext i8 [[TMP14]] to i32
; X64-NEXT: [[TMP17:%.*]] = sub i32 [[TMP15]], [[TMP16]]
; X64-NEXT: br label [[ENDBLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP17]], [[LOADBB1]] ], [ [[TMP10]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
; ALL-LABEL: @cmp3(
; ALL-NEXT: loadbb:
; ALL-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i16*
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i16*
; ALL-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
; ALL-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
; ALL-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
; ALL-NEXT: [[TMP6:%.*]] = icmp eq i16 [[TMP4]], [[TMP5]]
; ALL-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; ALL: res_block:
; ALL-NEXT: [[TMP7:%.*]] = icmp ult i16 [[TMP4]], [[TMP5]]
; ALL-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
; ALL-NEXT: br label [[ENDBLOCK:%.*]]
; ALL: loadbb1:
; ALL-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[X]], i8 2
; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i8 2
; ALL-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]]
; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]]
; ALL-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
; ALL-NEXT: [[TMP15:%.*]] = sub i32 [[TMP13]], [[TMP14]]
; ALL-NEXT: br label [[ENDBLOCK]]
; ALL: endblock:
; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP15]], [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
; ALL-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
ret i32 %call
@ -104,134 +73,70 @@ define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-LABEL: @cmp5(
; X32-NEXT: loadbb:
; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; X32: res_block:
; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]]
; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
; X32-NEXT: br label [[ENDBLOCK:%.*]]
; X32: loadbb1:
; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[X]], i8 4
; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i8 4
; X32-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]]
; X32-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]]
; X32-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
; X32-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
; X32-NEXT: [[TMP15:%.*]] = sub i32 [[TMP13]], [[TMP14]]
; X32-NEXT: br label [[ENDBLOCK]]
; X32: endblock:
; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP15]], [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
; X32-NEXT: ret i32 [[PHI_RES]]
;
; X64-LABEL: @cmp5(
; X64-NEXT: loadbb:
; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
; X64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
; X64-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
; X64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
; X64-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]]
; X64-NEXT: br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; X64: res_block:
; X64-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb1:
; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[X]], i8 4
; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 4
; X64-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
; X64-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP12]]
; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
; X64-NEXT: [[TMP16:%.*]] = zext i8 [[TMP14]] to i32
; X64-NEXT: [[TMP17:%.*]] = sub i32 [[TMP15]], [[TMP16]]
; X64-NEXT: br label [[ENDBLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP17]], [[LOADBB1]] ], [ [[TMP10]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
; ALL-LABEL: @cmp5(
; ALL-NEXT: loadbb:
; ALL-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; ALL-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; ALL-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
; ALL-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
; ALL-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
; ALL-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; ALL: res_block:
; ALL-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]]
; ALL-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
; ALL-NEXT: br label [[ENDBLOCK:%.*]]
; ALL: loadbb1:
; ALL-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[X]], i8 4
; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i8 4
; ALL-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]]
; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]]
; ALL-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
; ALL-NEXT: [[TMP15:%.*]] = sub i32 [[TMP13]], [[TMP14]]
; ALL-NEXT: br label [[ENDBLOCK]]
; ALL: endblock:
; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP15]], [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
; ALL-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
ret i32 %call
}
define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-LABEL: @cmp6(
; X32-NEXT: loadbb:
; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; X32: res_block:
; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ]
; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
; X32-NEXT: br label [[ENDBLOCK:%.*]]
; X32: loadbb1:
; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i16*
; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i16*
; X32-NEXT: [[TMP11:%.*]] = getelementptr i16, i16* [[TMP9]], i16 2
; X32-NEXT: [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 2
; X32-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP11]]
; X32-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]]
; X32-NEXT: [[TMP15:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP13]])
; X32-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
; X32-NEXT: [[TMP17]] = zext i16 [[TMP15]] to i32
; X32-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32
; X32-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP17]], [[TMP18]]
; X32-NEXT: br i1 [[TMP19]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X32: endblock:
; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
; X32-NEXT: ret i32 [[PHI_RES]]
;
; X64-LABEL: @cmp6(
; X64-NEXT: loadbb:
; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
; X64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
; X64-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
; X64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
; X64-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]]
; X64-NEXT: br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; X64: res_block:
; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP6]], [[LOADBB:%.*]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP7]], [[LOADBB]] ], [ [[TMP20:%.*]], [[LOADBB1]] ]
; X64-NEXT: [[TMP9:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
; X64-NEXT: br label [[ENDBLOCK:%.*]]
; X64: loadbb1:
; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[X]] to i16*
; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[Y]] to i16*
; X64-NEXT: [[TMP13:%.*]] = getelementptr i16, i16* [[TMP11]], i16 2
; X64-NEXT: [[TMP14:%.*]] = getelementptr i16, i16* [[TMP12]], i16 2
; X64-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]]
; X64-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP14]]
; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
; X64-NEXT: [[TMP18:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP16]])
; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i64
; X64-NEXT: [[TMP20]] = zext i16 [[TMP18]] to i64
; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
; X64-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; X64: endblock:
; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP10]], [[RES_BLOCK]] ]
; X64-NEXT: ret i32 [[PHI_RES]]
; ALL-LABEL: @cmp6(
; ALL-NEXT: loadbb:
; ALL-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; ALL-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; ALL-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
; ALL-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
; ALL-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
; ALL-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
; ALL: res_block:
; ALL-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
; ALL-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ]
; ALL-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
; ALL-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
; ALL-NEXT: br label [[ENDBLOCK:%.*]]
; ALL: loadbb1:
; ALL-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i16*
; ALL-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i16*
; ALL-NEXT: [[TMP11:%.*]] = getelementptr i16, i16* [[TMP9]], i16 2
; ALL-NEXT: [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 2
; ALL-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP11]]
; ALL-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]]
; ALL-NEXT: [[TMP15:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP13]])
; ALL-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
; ALL-NEXT: [[TMP17]] = zext i16 [[TMP15]] to i32
; ALL-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32
; ALL-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP17]], [[TMP18]]
; ALL-NEXT: br i1 [[TMP19]], label [[ENDBLOCK]], label [[RES_BLOCK]]
; ALL: endblock:
; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
; ALL-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
ret i32 %call