From bec2ec108febcc380b0a725ecd87d634b90e648d Mon Sep 17 00:00:00 2001
From: Silviu Baranga <silviu.baranga@arm.com>
Date: Mon, 21 Mar 2016 11:43:46 +0000
Subject: [PATCH] [DAGCombine] Catch the case where extract_vector_elt can
 cause an any_ext while processing AND SDNodes

Summary:
extract_vector_elt can cause an implicit any_ext if the types don't
match. When processing the following pattern:

  (and (extract_vector_elt (load ([non_ext|any_ext|zero_ext] V))), c)

DAGCombine was ignoring the possible extend, and sometimes removing
the AND even though it was required to maintain some of the bits
in the result to 0, resulting in a miscompile.

This change fixes the issue by limiting the transformation only to
cases where the extract_vector_elt doesn't perform the implicit
extend.

Reviewers: t.p.northover, jmolloy

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D18247

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@263935 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp      |  1 +
 .../CodeGen/ARM/dagcombine-anyexttozeroext.ll | 38 ++++++++++++++++++-
 2 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 420907998bb..a8faa75205a 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3089,6 +3089,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   // the 'X' node here can either be nothing or an extract_vector_elt to catch
   // more cases.
   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+       N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
        N0.getOperand(0).getOpcode() == ISD::LOAD) ||
       N0.getOpcode() == ISD::LOAD) {
     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
diff --git a/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll b/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
index 8b7153503b1..206371a8f4e 100644
--- a/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
+++ b/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
@@ -19,12 +19,48 @@ define float @f(<4 x i16>* nocapture %in) {
   ret float %7
 }
 
+; CHECK-LABEL: g:
 define float @g(<4 x i16>* nocapture %in) {
   ; CHECK: vldr
   %1 = load <4 x i16>, <4 x i16>* %in
-  ; CHECK-NOT: uxth
+
+  ; For now we're generating a vmov.16 and a uxth instruction.
+  ; The uxth is redundant, and we should be able to extend without
+  ; having to generate cross-domain copies. Once we can do this
+  ; we should modify the checks below.
+
+  ; CHECK: uxth
   %2 = extractelement <4 x i16> %1, i32 0
   ; CHECK: vcvt.f32.u32
   %3 = uitofp i16 %2 to float
   ret float %3
 }
+
+; The backend generates for the following code an
+; (and 0xff (i32 extract_vector_elt (zext load <4 x i8> to 4 x i16)))
+;
+; The and is not redundant and cannot be removed. Since
+; extract_vector_elt is doing an implicit any_ext, the and
+; is required to guarantee that the top bits are set to zero.
+
+; Ideally should be a zext from <4 x i8> to <4 x 32>.
+
+; CHECK-LABEL: h:
+; CHECK: vld1.32
+; CHECK: uxtb
+define <4 x i32> @h(<4 x i8> *%in) {
+  %1 = load <4 x i8>, <4 x i8>* %in, align 4
+  %2 = extractelement <4 x i8> %1, i32 0
+  %3 = zext i8 %2 to i32
+  %4 = insertelement <4 x i32> undef, i32 %3, i32 0
+  %5 = extractelement <4 x i8> %1, i32 1
+  %6 = zext i8 %5 to i32
+  %7 = insertelement <4 x i32> %4, i32 %6, i32 1
+  %8 = extractelement <4 x i8> %1, i32 2
+  %9 = zext i8 %8 to i32
+  %10 = insertelement <4 x i32> %7, i32 %9, i32 2
+  %11 = extractelement <4 x i8> %1, i32 3
+  %12 = zext i8 %11 to i32
+  %13 = insertelement <4 x i32> %10, i32 %12, i32 3
+  ret <4 x i32> %13
+}