mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-11 04:06:20 +00:00
![Tobias Grosser](/assets/img/avatar_default.png)
As the namings of the scops have changed, polly was not able to read in the user given .jscop files. By renaming the provided files, polly now finds them again and can use them to optimize the matmul function. We also update the generated files to reflect the very latest version of Polly. llvm-svn: 182265
397 lines
12 KiB
ArmAsm
397 lines
12 KiB
ArmAsm
.file "matmul.polly.interchanged+tiled+vector.ll"
|
|
.section .rodata.cst8,"aM",@progbits,8
|
|
.align 8
|
|
.LCPI0_0:
|
|
.quad 4602678819172646912 # double 0.5
|
|
.text
|
|
.globl init_array
|
|
.align 16, 0x90
|
|
.type init_array,@function
|
|
init_array: # @init_array
|
|
.cfi_startproc
|
|
# BB#0: # %entry
|
|
pushq %rbp
|
|
.Ltmp2:
|
|
.cfi_def_cfa_offset 16
|
|
.Ltmp3:
|
|
.cfi_offset %rbp, -16
|
|
movq %rsp, %rbp
|
|
.Ltmp4:
|
|
.cfi_def_cfa_register %rbp
|
|
xorl %r8d, %r8d
|
|
vmovsd .LCPI0_0(%rip), %xmm0
|
|
.align 16, 0x90
|
|
.LBB0_1: # %polly.loop_preheader3
|
|
# =>This Loop Header: Depth=1
|
|
# Child Loop BB0_2 Depth 2
|
|
xorl %ecx, %ecx
|
|
.align 16, 0x90
|
|
.LBB0_2: # %polly.loop_header2
|
|
# Parent Loop BB0_1 Depth=1
|
|
# => This Inner Loop Header: Depth=2
|
|
movl %ecx, %edx
|
|
imull %r8d, %edx
|
|
movl %edx, %esi
|
|
sarl $31, %esi
|
|
shrl $22, %esi
|
|
addl %edx, %esi
|
|
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
|
negl %esi
|
|
movq %r8, %rax
|
|
shlq $11, %rax
|
|
leal 1(%rdx,%rsi), %edi
|
|
leaq (%rax,%rax,2), %rsi
|
|
leaq 1(%rcx), %rdx
|
|
cmpq $1536, %rdx # imm = 0x600
|
|
vcvtsi2sdl %edi, %xmm0, %xmm1
|
|
vmulsd %xmm0, %xmm1, %xmm1
|
|
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
|
vmovss %xmm1, A(%rsi,%rcx,4)
|
|
vmovss %xmm1, B(%rsi,%rcx,4)
|
|
movq %rdx, %rcx
|
|
jne .LBB0_2
|
|
# BB#3: # %polly.loop_exit4
|
|
# in Loop: Header=BB0_1 Depth=1
|
|
incq %r8
|
|
cmpq $1536, %r8 # imm = 0x600
|
|
jne .LBB0_1
|
|
# BB#4: # %polly.loop_exit
|
|
popq %rbp
|
|
ret
|
|
.Ltmp5:
|
|
.size init_array, .Ltmp5-init_array
|
|
.cfi_endproc
|
|
|
|
.globl print_array
|
|
.align 16, 0x90
|
|
.type print_array,@function
|
|
print_array: # @print_array
|
|
.cfi_startproc
|
|
# BB#0: # %entry
|
|
pushq %rbp
|
|
.Ltmp9:
|
|
.cfi_def_cfa_offset 16
|
|
.Ltmp10:
|
|
.cfi_offset %rbp, -16
|
|
movq %rsp, %rbp
|
|
.Ltmp11:
|
|
.cfi_def_cfa_register %rbp
|
|
pushq %r15
|
|
pushq %r14
|
|
pushq %r12
|
|
pushq %rbx
|
|
.Ltmp12:
|
|
.cfi_offset %rbx, -48
|
|
.Ltmp13:
|
|
.cfi_offset %r12, -40
|
|
.Ltmp14:
|
|
.cfi_offset %r14, -32
|
|
.Ltmp15:
|
|
.cfi_offset %r15, -24
|
|
xorl %r14d, %r14d
|
|
movl $C, %r15d
|
|
.align 16, 0x90
|
|
.LBB1_1: # %for.cond1.preheader
|
|
# =>This Loop Header: Depth=1
|
|
# Child Loop BB1_2 Depth 2
|
|
movq stdout(%rip), %rax
|
|
movq %r15, %r12
|
|
xorl %ebx, %ebx
|
|
.align 16, 0x90
|
|
.LBB1_2: # %for.body3
|
|
# Parent Loop BB1_1 Depth=1
|
|
# => This Inner Loop Header: Depth=2
|
|
vmovss (%r12), %xmm0
|
|
vcvtss2sd %xmm0, %xmm0, %xmm0
|
|
movq %rax, %rdi
|
|
movl $.L.str, %esi
|
|
movb $1, %al
|
|
callq fprintf
|
|
movslq %ebx, %rax
|
|
imulq $1717986919, %rax, %rcx # imm = 0x66666667
|
|
movq %rcx, %rdx
|
|
shrq $63, %rdx
|
|
sarq $37, %rcx
|
|
addl %edx, %ecx
|
|
imull $80, %ecx, %ecx
|
|
subl %ecx, %eax
|
|
cmpl $79, %eax
|
|
jne .LBB1_4
|
|
# BB#3: # %if.then
|
|
# in Loop: Header=BB1_2 Depth=2
|
|
movq stdout(%rip), %rsi
|
|
movl $10, %edi
|
|
callq fputc
|
|
.LBB1_4: # %for.inc
|
|
# in Loop: Header=BB1_2 Depth=2
|
|
addq $4, %r12
|
|
incq %rbx
|
|
movq stdout(%rip), %rax
|
|
cmpq $1536, %rbx # imm = 0x600
|
|
jne .LBB1_2
|
|
# BB#5: # %for.end
|
|
# in Loop: Header=BB1_1 Depth=1
|
|
movl $10, %edi
|
|
movq %rax, %rsi
|
|
callq fputc
|
|
addq $6144, %r15 # imm = 0x1800
|
|
incq %r14
|
|
cmpq $1536, %r14 # imm = 0x600
|
|
jne .LBB1_1
|
|
# BB#6: # %for.end12
|
|
popq %rbx
|
|
popq %r12
|
|
popq %r14
|
|
popq %r15
|
|
popq %rbp
|
|
ret
|
|
.Ltmp16:
|
|
.size print_array, .Ltmp16-print_array
|
|
.cfi_endproc
|
|
|
|
.section .rodata.cst8,"aM",@progbits,8
|
|
.align 8
|
|
.LCPI2_0:
|
|
.quad 4602678819172646912 # double 0.5
|
|
.text
|
|
.globl main
|
|
.align 16, 0x90
|
|
.type main,@function
|
|
main: # @main
|
|
.cfi_startproc
|
|
# BB#0: # %entry
|
|
pushq %rbp
|
|
.Ltmp20:
|
|
.cfi_def_cfa_offset 16
|
|
.Ltmp21:
|
|
.cfi_offset %rbp, -16
|
|
movq %rsp, %rbp
|
|
.Ltmp22:
|
|
.cfi_def_cfa_register %rbp
|
|
pushq %r15
|
|
pushq %r14
|
|
pushq %r13
|
|
pushq %r12
|
|
pushq %rbx
|
|
subq $56, %rsp
|
|
.Ltmp23:
|
|
.cfi_offset %rbx, -56
|
|
.Ltmp24:
|
|
.cfi_offset %r12, -48
|
|
.Ltmp25:
|
|
.cfi_offset %r13, -40
|
|
.Ltmp26:
|
|
.cfi_offset %r14, -32
|
|
.Ltmp27:
|
|
.cfi_offset %r15, -24
|
|
xorl %ebx, %ebx
|
|
vmovsd .LCPI2_0(%rip), %xmm0
|
|
.align 16, 0x90
|
|
.LBB2_1: # %polly.loop_preheader3.i
|
|
# =>This Loop Header: Depth=1
|
|
# Child Loop BB2_2 Depth 2
|
|
xorl %ecx, %ecx
|
|
.align 16, 0x90
|
|
.LBB2_2: # %polly.loop_header2.i
|
|
# Parent Loop BB2_1 Depth=1
|
|
# => This Inner Loop Header: Depth=2
|
|
movl %ecx, %edx
|
|
imull %ebx, %edx
|
|
movl %edx, %esi
|
|
sarl $31, %esi
|
|
shrl $22, %esi
|
|
addl %edx, %esi
|
|
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
|
negl %esi
|
|
movq %rbx, %rax
|
|
shlq $11, %rax
|
|
leal 1(%rdx,%rsi), %edi
|
|
leaq (%rax,%rax,2), %rsi
|
|
leaq 1(%rcx), %rdx
|
|
cmpq $1536, %rdx # imm = 0x600
|
|
vcvtsi2sdl %edi, %xmm0, %xmm1
|
|
vmulsd %xmm0, %xmm1, %xmm1
|
|
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
|
vmovss %xmm1, A(%rsi,%rcx,4)
|
|
vmovss %xmm1, B(%rsi,%rcx,4)
|
|
movq %rdx, %rcx
|
|
jne .LBB2_2
|
|
# BB#3: # %polly.loop_exit4.i
|
|
# in Loop: Header=BB2_1 Depth=1
|
|
incq %rbx
|
|
cmpq $1536, %rbx # imm = 0x600
|
|
jne .LBB2_1
|
|
# BB#4: # %polly.loop_preheader3.preheader
|
|
movl $C, %edi
|
|
xorl %esi, %esi
|
|
movl $9437184, %edx # imm = 0x900000
|
|
callq memset
|
|
xorl %esi, %esi
|
|
movl $C+16, %eax
|
|
movq %rax, -88(%rbp) # 8-byte Spill
|
|
.align 16, 0x90
|
|
.LBB2_5: # %polly.loop_preheader17
|
|
# =>This Loop Header: Depth=1
|
|
# Child Loop BB2_15 Depth 2
|
|
# Child Loop BB2_8 Depth 3
|
|
# Child Loop BB2_11 Depth 4
|
|
# Child Loop BB2_17 Depth 5
|
|
# Child Loop BB2_18 Depth 6
|
|
movq %rsi, -56(%rbp) # 8-byte Spill
|
|
movq %rsi, %rax
|
|
orq $63, %rax
|
|
movq %rax, -72(%rbp) # 8-byte Spill
|
|
leaq -1(%rax), %rax
|
|
movq %rax, -48(%rbp) # 8-byte Spill
|
|
xorl %edx, %edx
|
|
.align 16, 0x90
|
|
.LBB2_15: # %polly.loop_preheader24
|
|
# Parent Loop BB2_5 Depth=1
|
|
# => This Loop Header: Depth=2
|
|
# Child Loop BB2_8 Depth 3
|
|
# Child Loop BB2_11 Depth 4
|
|
# Child Loop BB2_17 Depth 5
|
|
# Child Loop BB2_18 Depth 6
|
|
movq %rdx, -80(%rbp) # 8-byte Spill
|
|
leaq -4(%rdx), %rcx
|
|
movq %rdx, %rax
|
|
decq %rax
|
|
cmovsq %rcx, %rax
|
|
movq %rax, %r15
|
|
sarq $63, %r15
|
|
shrq $62, %r15
|
|
addq %rax, %r15
|
|
andq $-4, %r15
|
|
movq %rdx, %r13
|
|
orq $63, %r13
|
|
leaq -4(%r13), %rdx
|
|
xorl %r10d, %r10d
|
|
movq -88(%rbp), %rax # 8-byte Reload
|
|
leaq (%rax,%r15,4), %rax
|
|
movq %rax, -64(%rbp) # 8-byte Spill
|
|
leaq B+16(,%r15,4), %rbx
|
|
leaq 4(%r15), %r12
|
|
.align 16, 0x90
|
|
.LBB2_8: # %polly.loop_header23
|
|
# Parent Loop BB2_5 Depth=1
|
|
# Parent Loop BB2_15 Depth=2
|
|
# => This Loop Header: Depth=3
|
|
# Child Loop BB2_11 Depth 4
|
|
# Child Loop BB2_17 Depth 5
|
|
# Child Loop BB2_18 Depth 6
|
|
cmpq -72(%rbp), %rsi # 8-byte Folded Reload
|
|
jg .LBB2_13
|
|
# BB#9: # %polly.loop_header30.preheader
|
|
# in Loop: Header=BB2_8 Depth=3
|
|
movq %r10, %rax
|
|
orq $63, %rax
|
|
cmpq %rax, %r10
|
|
jg .LBB2_13
|
|
# BB#10: # in Loop: Header=BB2_8 Depth=3
|
|
decq %rax
|
|
movq -64(%rbp), %r14 # 8-byte Reload
|
|
movq -56(%rbp), %r11 # 8-byte Reload
|
|
.align 16, 0x90
|
|
.LBB2_11: # %polly.loop_header37.preheader
|
|
# Parent Loop BB2_5 Depth=1
|
|
# Parent Loop BB2_15 Depth=2
|
|
# Parent Loop BB2_8 Depth=3
|
|
# => This Loop Header: Depth=4
|
|
# Child Loop BB2_17 Depth 5
|
|
# Child Loop BB2_18 Depth 6
|
|
cmpq %r13, %r12
|
|
movq %rbx, %r8
|
|
movq %r10, %rsi
|
|
jg .LBB2_12
|
|
.align 16, 0x90
|
|
.LBB2_17: # %polly.loop_header46.preheader
|
|
# Parent Loop BB2_5 Depth=1
|
|
# Parent Loop BB2_15 Depth=2
|
|
# Parent Loop BB2_8 Depth=3
|
|
# Parent Loop BB2_11 Depth=4
|
|
# => This Loop Header: Depth=5
|
|
# Child Loop BB2_18 Depth 6
|
|
leaq (%r11,%r11,2), %rcx
|
|
shlq $11, %rcx
|
|
vbroadcastss A(%rcx,%rsi,4), %xmm0
|
|
movq %r14, %rdi
|
|
movq %r8, %r9
|
|
movq %r15, %rcx
|
|
.LBB2_18: # %polly.loop_header46
|
|
# Parent Loop BB2_5 Depth=1
|
|
# Parent Loop BB2_15 Depth=2
|
|
# Parent Loop BB2_8 Depth=3
|
|
# Parent Loop BB2_11 Depth=4
|
|
# Parent Loop BB2_17 Depth=5
|
|
# => This Inner Loop Header: Depth=6
|
|
vmulps (%r9), %xmm0, %xmm1
|
|
vaddps (%rdi), %xmm1, %xmm1
|
|
vmovaps %xmm1, (%rdi)
|
|
addq $16, %rdi
|
|
addq $16, %r9
|
|
addq $4, %rcx
|
|
cmpq %rdx, %rcx
|
|
jle .LBB2_18
|
|
# BB#16: # %polly.loop_exit48
|
|
# in Loop: Header=BB2_17 Depth=5
|
|
addq $6144, %r8 # imm = 0x1800
|
|
cmpq %rax, %rsi
|
|
leaq 1(%rsi), %rsi
|
|
jle .LBB2_17
|
|
.align 16, 0x90
|
|
.LBB2_12: # %polly.loop_exit39
|
|
# in Loop: Header=BB2_11 Depth=4
|
|
addq $6144, %r14 # imm = 0x1800
|
|
cmpq -48(%rbp), %r11 # 8-byte Folded Reload
|
|
leaq 1(%r11), %r11
|
|
jle .LBB2_11
|
|
.align 16, 0x90
|
|
.LBB2_13: # %polly.loop_exit32
|
|
# in Loop: Header=BB2_8 Depth=3
|
|
addq $393216, %rbx # imm = 0x60000
|
|
cmpq $1472, %r10 # imm = 0x5C0
|
|
leaq 64(%r10), %r10
|
|
movq -56(%rbp), %rsi # 8-byte Reload
|
|
jl .LBB2_8
|
|
# BB#14: # %polly.loop_exit25
|
|
# in Loop: Header=BB2_15 Depth=2
|
|
movq -80(%rbp), %rdx # 8-byte Reload
|
|
cmpq $1472, %rdx # imm = 0x5C0
|
|
leaq 64(%rdx), %rdx
|
|
jl .LBB2_15
|
|
# BB#6: # %polly.loop_exit18
|
|
# in Loop: Header=BB2_5 Depth=1
|
|
addq $393216, -88(%rbp) # 8-byte Folded Spill
|
|
# imm = 0x60000
|
|
cmpq $1472, %rsi # imm = 0x5C0
|
|
leaq 64(%rsi), %rsi
|
|
jl .LBB2_5
|
|
# BB#7: # %polly.loop_exit11
|
|
xorl %eax, %eax
|
|
addq $56, %rsp
|
|
popq %rbx
|
|
popq %r12
|
|
popq %r13
|
|
popq %r14
|
|
popq %r15
|
|
popq %rbp
|
|
ret
|
|
.Ltmp28:
|
|
.size main, .Ltmp28-main
|
|
.cfi_endproc
|
|
|
|
.type A,@object # @A
|
|
.comm A,9437184,16
|
|
.type B,@object # @B
|
|
.comm B,9437184,16
|
|
.type .L.str,@object # @.str
|
|
.section .rodata.str1.1,"aMS",@progbits,1
|
|
.L.str:
|
|
.asciz "%lf "
|
|
.size .L.str, 5
|
|
|
|
.type C,@object # @C
|
|
.comm C,9437184,16
|
|
|
|
.section ".note.GNU-stack","",@progbits
|