968 Commits

Author SHA1 Message Date
Chris Lattner
b3518a838c Fix a thinko. If the operand is promoted, pass the promoted value into
the new zero extend, not the original operand.  This fixes cast bool -> long
on ppc.

Add an unrelated fixme

llvm-svn: 21196
2005-04-10 01:13:15 +00:00
Chris Lattner
034716de24 add a little peephole optimization. This allows us to codegen:
int a(short i) {
        return i & 1;
}

as

_a:
        andi. r3, r3, 1
        blr

instead of:

_a:
        rlwinm r2, r3, 0, 16, 31
        andi. r3, r2, 1
        blr

on ppc.  It should also help the other risc targets.

llvm-svn: 21189
2005-04-09 21:43:54 +00:00
Chris Lattner
afa0001d54 recognize some patterns as fabs operations, so that fabs at the source level
is deconstructed then reconstructed here.  This catches 19 fabs's in 177.mesa
9 in 168.wupwise, 5 in 171.swim, 3 in 172.mgrid, and 14 in 173.applu out of
specfp2000.

This allows the X86 code generator to make MUCH better code than before for
each of these and saves one instr on ppc.

This depends on the previous CFE patch to expose these correctly.

llvm-svn: 21171
2005-04-09 05:15:53 +00:00
Chris Lattner
da902bdf1b print and fold BRCONDTWOWAY correctly
llvm-svn: 21165
2005-04-09 03:27:28 +00:00
Chris Lattner
31170cd2ec canonicalize a bunch of operations involving fneg
llvm-svn: 21160
2005-04-09 03:02:46 +00:00
Chris Lattner
9a56ef5693 If a target zero or sign extends the result of its setcc, allow folding of
this into sign/zero extension instructions later.

On PPC, for example, this testcase:

%G = external global sbyte
implementation
void %test(int %X, int %Y) {
  %C = setlt int %X, %Y
  %D = cast bool %C to sbyte
  store sbyte %D, sbyte* %G
  ret void
}

Now codegens to:

        cmpw cr0, r3, r4
        li r3, 1
        li r4, 0
        blt .LBB_test_2 ;
.LBB_test_1:    ;
        or r3, r4, r4
.LBB_test_2:    ;
        addis r2, r2, ha16(L_G$non_lazy_ptr-"L00000$pb")
        lwz r2, lo16(L_G$non_lazy_ptr-"L00000$pb")(r2)
        stb r3, 0(r2)

instead of:

        cmpw cr0, r3, r4
        li r3, 1
        li r4, 0
        blt .LBB_test_2 ;
.LBB_test_1:    ;
        or r3, r4, r4
.LBB_test_2:    ;
***     rlwinm r3, r3, 0, 31, 31
        addis r2, r2, ha16(L_G$non_lazy_ptr-"L00000$pb")
        lwz r2, lo16(L_G$non_lazy_ptr-"L00000$pb")(r2)
        stb r3, 0(r2)

llvm-svn: 21148
2005-04-07 19:43:53 +00:00
Chris Lattner
bbe0e9e9db Remove somethign I had for testing
llvm-svn: 21144
2005-04-07 18:58:54 +00:00
Chris Lattner
ee836c7b32 This patch does two things. First, it canonicalizes 'X >= C' -> 'X > C-1'
(likewise for <= >=u >=u).

Second, it implements a special case hack to turn 'X gtu SINTMAX' -> 'X lt 0'

On powerpc, for example, this changes this:

        lis r2, 32767
        ori r2, r2, 65535
        cmplw cr0, r3, r2
        bgt .LBB_test_2

into:

        cmpwi cr0, r3, 0
        blt .LBB_test_2

llvm-svn: 21142
2005-04-07 18:14:58 +00:00
Chris Lattner
22bbc2351e Fix a really scary bug that Nate found where we weren't deleting the right
elements auto of the autoCSE maps.

llvm-svn: 21128
2005-04-07 00:30:13 +00:00
Nate Begeman
12af81407b Add MULHU and MULHS nodes for the high part of an (un)signed 32x32=64b
multiply.

llvm-svn: 21102
2005-04-05 22:36:56 +00:00
Chris Lattner
c8f36868e6 print fneg/fabs
llvm-svn: 21008
2005-04-02 04:58:41 +00:00
Chris Lattner
8be5696874 fix some bugs in the implementation of SHL_PARTS and friends.
llvm-svn: 21004
2005-04-02 04:00:59 +00:00
Chris Lattner
33ca1ce8e0 Print some new nodes
llvm-svn: 21001
2005-04-02 03:30:42 +00:00
Nate Begeman
4034852ba9 Add ISD::UNDEF node
Teach the SelectionDAG code how to expand and promote it
Have PPC32 LowerCallTo generate ISD::UNDEF for int arg regs used up by fp
  arguments, but not shadowing their value.  This allows us to do the right
  thing with both fixed and vararg floating point arguments.

llvm-svn: 20988
2005-04-01 22:34:39 +00:00
Andrew Lenharth
7db3834ecf PCMarker support for DAG and Alpha
llvm-svn: 20965
2005-03-31 21:24:06 +00:00
Chris Lattner
fa9e43b38c Fix a bug where we would incorrectly do a sign ext instead of a zero ext
because we were checking the wrong thing.  Thanks to andrew for pointing
this out!

llvm-svn: 20554
2005-03-10 20:55:51 +00:00
Chris Lattner
e0d0c64c8a constant fold FP_ROUND_INREG, ZERO_EXTEND_INREG, and SIGN_EXTEND_INREG
This allows the alpha backend to compile:

bool %test(uint %P) {
        %c = seteq uint %P, 0
        ret bool %c
}

into:

test:
        ldgp $29, 0($27)
        ZAP $16,240,$0
        CMPEQ $0,0,$0
        AND $0,1,$0
        ret $31,($26),1

instead of:

test:
        ldgp $29, 0($27)
        ZAP $16,240,$0
        ldiq $1,0
        ZAP $1,240,$1
        CMPEQ $0,$1,$0
        AND $0,1,$0
        ret $31,($26),1

... and fixes PR534.

llvm-svn: 20534
2005-03-09 18:37:12 +00:00
Chris Lattner
89105cec43 Don't rely on doubles comparing identical to each other, which doesn't work
for 0.0 and -0.0.

llvm-svn: 20230
2005-02-17 20:17:32 +00:00
Chris Lattner
3165569ba9 Remove the 3 HACK HACK HACKs I put in before, fixing them properly with
the new TLI that is available.

Implement support for handling out of range shifts.  This allows us to
compile this code (a 64-bit rotate):

unsigned long long f3(unsigned long long x) {
  return (x << 32) | (x >> (64-32));
}

into this:

f3:
        mov %EDX, DWORD PTR [%ESP + 4]
        mov %EAX, DWORD PTR [%ESP + 8]
        ret

GCC produces this:

$ gcc t.c -masm=intel -O3 -S -o - -fomit-frame-pointer
..
f3:
        push    %ebx
        mov     %ebx, DWORD PTR [%esp+12]
        mov     %ecx, DWORD PTR [%esp+8]
        mov     %eax, %ebx
        mov     %edx, %ecx
        pop     %ebx
        ret

The Simple ISEL produces (eww gross):

f3:
        sub %ESP, 4
        mov DWORD PTR [%ESP], %ESI
        mov %EDX, DWORD PTR [%ESP + 8]
        mov %ECX, DWORD PTR [%ESP + 12]
        mov %EAX, 0
        mov %ESI, 0
        or %EAX, %ECX
        or %EDX, %ESI
        mov %ESI, DWORD PTR [%ESP]
        add %ESP, 4
        ret

llvm-svn: 19780
2005-01-23 04:39:44 +00:00
Chris Lattner
97f35a7a07 More bugfixes for IA64 shifts.
llvm-svn: 19739
2005-01-22 00:33:03 +00:00
Chris Lattner
42e239ed58 Add a nasty hack to fix Alpha/IA64 multiplies by a power of two.
llvm-svn: 19737
2005-01-22 00:20:42 +00:00
Chris Lattner
e724100870 Remove unneeded line.
llvm-svn: 19736
2005-01-21 23:43:12 +00:00
Chris Lattner
a974e215a5 test commit
llvm-svn: 19735
2005-01-21 23:38:56 +00:00
Chris Lattner
392ddf430b Unary token factor nodes are unneeded.
llvm-svn: 19727
2005-01-21 18:01:22 +00:00
Chris Lattner
4086a7a803 implement add_parts/sub_parts.
llvm-svn: 19714
2005-01-20 18:50:55 +00:00
Chris Lattner
027c97e93e Know some identities about tokenfactor nodes.
llvm-svn: 19699
2005-01-19 18:01:40 +00:00
Chris Lattner
7114e8a527 Know some simple identities. This improves codegen for (1LL << N).
llvm-svn: 19698
2005-01-19 17:29:49 +00:00
Chris Lattner
eea485de1f Keep track of the retval type as well.
llvm-svn: 19670
2005-01-18 19:26:36 +00:00
Chris Lattner
95307053ec Allow setcc operations to have nonbool types.
llvm-svn: 19656
2005-01-18 02:52:03 +00:00
Chris Lattner
906541da95 Fix the completely broken FP constant folds for setcc's.
llvm-svn: 19651
2005-01-18 02:11:55 +00:00
Chris Lattner
49a1f3a109 Refactor code into a new method.
llvm-svn: 19635
2005-01-17 17:15:02 +00:00
Chris Lattner
4517b8af97 Add assertions.
llvm-svn: 19596
2005-01-16 02:23:22 +00:00
Chris Lattner
9785def2cd Eliminate unneeded extensions.
llvm-svn: 19577
2005-01-16 00:17:20 +00:00
Chris Lattner
f3fd0c6a93 Print extra type for nodes with extra type info.
llvm-svn: 19575
2005-01-15 21:11:37 +00:00
Chris Lattner
191ac9c589 Common code factored out.
llvm-svn: 19572
2005-01-15 07:14:32 +00:00
Chris Lattner
2f65e8798f Add new SIGN_EXTEND_INREG, ZERO_EXTEND_INREG, and FP_ROUND_INREG operators.
llvm-svn: 19568
2005-01-15 06:17:04 +00:00
Chris Lattner
2dfbc4fddd Adjust to CopyFromReg changes, implement deletion of truncating/extending
stores/loads.

llvm-svn: 19562
2005-01-14 22:38:01 +00:00
Chris Lattner
0974002024 Start implementing truncating stores and extending loads.
llvm-svn: 19559
2005-01-14 22:08:15 +00:00
Chris Lattner
7a8788c9ac Add new ImplicitDef node, rename CopyRegSDNode class to RegSDNode.
llvm-svn: 19535
2005-01-13 20:50:02 +00:00
Chris Lattner
86b19c5605 Print new node.
llvm-svn: 19526
2005-01-13 17:59:10 +00:00
Chris Lattner
93cb0148f8 Do not fold (zero_ext (sign_ext V)) -> (sign_ext V), they are not the same.
This fixes llvm-test/SingleSource/Regression/C/casts.c

llvm-svn: 19519
2005-01-12 18:51:15 +00:00
Chris Lattner
e97b0e1358 New method
llvm-svn: 19517
2005-01-12 18:37:47 +00:00
Chris Lattner
6b9082114f Print new operations.
llvm-svn: 19464
2005-01-11 05:57:01 +00:00
Chris Lattner
2eacd11a86 shift X, 0 -> X
llvm-svn: 19453
2005-01-11 04:25:13 +00:00
Chris Lattner
0307506841 Split out SDNode::getOperationName into its own method.
llvm-svn: 19443
2005-01-10 23:25:25 +00:00
Chris Lattner
02236df007 Implement a couple of more simplifications. This lets us codegen:
int test2(int * P, int* Q, int A, int B) {
        return P+A == P;
}

into:

test2:
        movl 4(%esp), %eax
        movl 12(%esp), %eax
        shll $2, %eax
        cmpl $0, %eax
        sete %al
        movzbl %al, %eax
        ret

instead of:

test2:
        movl 4(%esp), %eax
        movl 12(%esp), %ecx
        leal (%eax,%ecx,4), %ecx
        cmpl %eax, %ecx
        sete %al
        movzbl %al, %eax
        ret

ICC is producing worse code:

test2:
        movl      4(%esp), %eax                                 #8.5
        movl      12(%esp), %edx                                #8.5
        lea       (%edx,%edx), %ecx                             #9.9
        addl      %ecx, %ecx                                    #9.9
        addl      %eax, %ecx                                    #9.9
        cmpl      %eax, %ecx                                    #9.16
        movl      $0, %eax                                      #9.16
        sete      %al                                           #9.16
        ret                                                     #9.16

as is GCC (looks like our old code):

test2:
        movl    4(%esp), %edx
        movl    12(%esp), %eax
        leal    (%edx,%eax,4), %ecx
        cmpl    %edx, %ecx
        sete    %al
        movzbl  %al, %eax
        ret

llvm-svn: 19430
2005-01-10 02:03:02 +00:00
Chris Lattner
8d09b03ed1 Fix incorrect constant folds, fixing Stepanov after the SHR patch.
llvm-svn: 19429
2005-01-10 01:16:03 +00:00
Chris Lattner
9d479d4a34 Constant fold shifts, turning this loop:
.LBB_Z5test0PdS__3:     # no_exit.1
        fldl data(,%eax,8)
        fldl 24(%esp)
        faddp %st(1)
        fstl 24(%esp)
        incl %eax
        movl $16000, %ecx
        sarl $3, %ecx
        cmpl %eax, %ecx
        fstpl 16(%esp)
        #FP_REG_KILL
        jg .LBB_Z5test0PdS__3   # no_exit.1

into:

.LBB_Z5test0PdS__3:     # no_exit.1
        fldl data(,%eax,8)
        fldl 24(%esp)
        faddp %st(1)
        fstl 24(%esp)
        incl %eax
        cmpl $2000, %eax
        fstpl 16(%esp)
        #FP_REG_KILL
        jl .LBB_Z5test0PdS__3   # no_exit.1

llvm-svn: 19427
2005-01-10 00:07:15 +00:00
Chris Lattner
59d7066da8 Add some folds for == and != comparisons. This allows us to
codegen this loop in stepanov:

no_exit.i:              ; preds = %entry, %no_exit.i, %then.i, %_Z5checkd.exit
        %i.0.0 = phi int [ 0, %entry ], [ %i.0.0, %no_exit.i ], [ %inc.0, %_Z5checkd.exit ], [ %inc.012, %then.i ]              ; <int> [#uses=3]
        %indvar = phi uint [ %indvar.next, %no_exit.i ], [ 0, %entry ], [ 0, %then.i ], [ 0, %_Z5checkd.exit ]          ; <uint> [#uses=3]
        %result_addr.i.0 = phi double [ %tmp.4.i.i, %no_exit.i ], [ 0.000000e+00, %entry ], [ 0.000000e+00, %then.i ], [ 0.000000e+00, %_Z5checkd.exit ]          ; <double> [#uses=1]
        %first_addr.0.i.2.rec = cast uint %indvar to int                ; <int> [#uses=1]
        %first_addr.0.i.2 = getelementptr [2000 x double]* %data, int 0, uint %indvar           ; <double*> [#uses=1]
        %inc.i.rec = add int %first_addr.0.i.2.rec, 1           ; <int> [#uses=1]
        %inc.i = getelementptr [2000 x double]* %data, int 0, int %inc.i.rec            ; <double*> [#uses=1]
        %tmp.3.i.i = load double* %first_addr.0.i.2             ; <double> [#uses=1]
        %tmp.4.i.i = add double %result_addr.i.0, %tmp.3.i.i            ; <double> [#uses=2]
        %tmp.2.i = seteq double* %inc.i, getelementptr ([2000 x double]* %data, int 0, int 2000)                ; <bool> [#uses=1]
        %indvar.next = add uint %indvar, 1              ; <uint> [#uses=1]
        br bool %tmp.2.i, label %_Z10accumulateIPddET0_T_S2_S1_.exit, label %no_exit.i

To this:

.LBB_Z4testIPddEvT_S1_T0__1:    # no_exit.i
        fldl data(,%eax,8)
        fldl 16(%esp)
        faddp %st(1)
        fstpl 16(%esp)
        incl %eax
        movl %eax, %ecx
        shll $3, %ecx
        cmpl $16000, %ecx
        #FP_REG_KILL
        jne .LBB_Z4testIPddEvT_S1_T0__1 # no_exit.i

instead of this:

.LBB_Z4testIPddEvT_S1_T0__1:    # no_exit.i
        fldl data(,%eax,8)
        fldl 16(%esp)
        faddp %st(1)
        fstpl 16(%esp)
        incl %eax
        leal data(,%eax,8), %ecx
        leal data+16000, %edx
        cmpl %edx, %ecx
        #FP_REG_KILL
        jne .LBB_Z4testIPddEvT_S1_T0__1 # no_exit.i

llvm-svn: 19425
2005-01-09 20:52:51 +00:00
Jeff Cohen
91dd6d2d20 Fix VC++ compilation error
llvm-svn: 19423
2005-01-09 20:41:56 +00:00