mirror of
https://github.com/libretro/scummvm.git
synced 2025-01-05 09:10:29 +00:00
a1f12c25ca
The first jump to 'thin:' didn't leave r14 (remaining width) in the right state.
311 lines
8.8 KiB
ArmAsm
311 lines
8.8 KiB
ArmAsm
@ ScummVM - Graphic Adventure Engine
|
|
@
|
|
@ ScummVM is the legal property of its developers, whose names
|
|
@ are too numerous to list here. Please refer to the COPYRIGHT
|
|
@ file distributed with this source distribution.
|
|
@
|
|
@ This program is free software@ you can redistribute it and/or
|
|
@ modify it under the terms of the GNU General Public License
|
|
@ as published by the Free Software Foundation@ either version 2
|
|
@ of the License, or (at your option) any later version.
|
|
@
|
|
@ This program is distributed in the hope that it will be useful,
|
|
@ but WITHOUT ANY WARRANTY@ without even the implied warranty of
|
|
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
@ GNU General Public License for more details.
|
|
@
|
|
@ You should have received a copy of the GNU General Public License
|
|
@ along with this program@ if not, write to the Free Software
|
|
@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
@
|
|
@ @author Robin Watts (robin@wss.co.uk)
|
|
|
|
.text
|
|
|
|
.global Normal2xARM
|
|
.global Normal2xAspectMask
|
|
|
|
|
|
@ Assumes dst is aligned (so did the C)
|
|
@ Assumes 16bit (so did the C)
|
|
Normal2xARM:
|
|
@ r0 = src
|
|
@ r1 = srcPitch
|
|
@ r2 = dst
|
|
@ r3 = dstPitch
|
|
@ r4 = w
|
|
@ r5 = h
|
|
STMFD r13!,{r4-r11,r14}
|
|
LDR r4, [r13,#4*9] @ r4 = w
|
|
LDR r5, [r13,#4*10] @ r5 = h
|
|
ADD r12,r2, r3
|
|
SUB r1, r1, r4, LSL #1
|
|
SUB r6, r3, r4, LSL #2
|
|
ADD r3, r3, r6
|
|
yloop:
|
|
SUBS r14,r4, #4
|
|
ADDLT r14,r14, #4
|
|
BLT thin
|
|
xloop:
|
|
LDRH r6, [r0], #2
|
|
LDRH r7, [r0], #2
|
|
LDRH r8, [r0], #2
|
|
LDRH r9, [r0], #2
|
|
ORR r6, r6, r6, LSL #16
|
|
ORR r7, r7, r7, LSL #16
|
|
ORR r8, r8, r8, LSL #16
|
|
ORR r9, r9, r9, LSL #16
|
|
STMIA r2!, {r6-r9}
|
|
STMIA r12!,{r6-r9}
|
|
SUBS r14,r14,#4
|
|
BGE xloop
|
|
ADDS r14,r14,#4
|
|
BNE thin
|
|
ADD r0, r0, r1
|
|
ADD r2, r2, r3
|
|
ADD r12,r12,r3
|
|
SUBS r5, r5, #1
|
|
BGT yloop
|
|
|
|
LDMFD r13!,{r4-r11,PC}
|
|
thin:
|
|
LDRH r6, [r0], #2
|
|
ORR r6, r6, r6, LSL #16
|
|
STR r6, [r2], #4
|
|
STR r6, [r12],#4
|
|
SUBS r14,r14,#1
|
|
BGT thin
|
|
ADD r0, r0, r1
|
|
ADD r2, r2, r3
|
|
ADD r12,r12,r3
|
|
SUBS r5, r5, #1
|
|
BGT yloop
|
|
|
|
LDMFD r13!,{r4-r11,PC}
|
|
|
|
|
|
@ Horrid filter calculations
|
|
@ AAAAAAAAAAAABBBBBBBBBBBBCCCCCCCCCCCCDDDDDDDDDDDDEEEEEEEEEEEE
|
|
@ <-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+->
|
|
@ Ideal: A,A,(A*2+B*3)/5,B,(B*4+C)/5,C,C,(C+D*4)/5,D,(D*3+E*2)/5,E,E
|
|
@ Actual: A,A,(A*3+B*5)/8,B,(B*7+C)/8,C,C,(C+D*7)/8,D,(D*5+E*3)/8,E,E
|
|
|
|
@ Assumes dst is aligned (so did the C)
|
|
@ Assumes 16bit (so did the C)
|
|
Normal2xAspectMask:
|
|
@ r0 = src
|
|
@ r1 = srcPitch
|
|
@ r2 = dst
|
|
@ r3 = dstPitch
|
|
@ r4 = w
|
|
@ r5 = h
|
|
@ r12= mask
|
|
STMFD r13!,{r4-r11,r14}
|
|
LDR r4, [r13,#4*9] @ r4 = w
|
|
LDR r5, [r13,#4*10] @ r5 = h
|
|
LDR r12,[r13,#4*11] @ r12= mask
|
|
MOV r11,#2
|
|
SUB r11,r11,r1, LSL #2 @ r11= 2-srcPitch*4
|
|
MOV r14,#4
|
|
SUB r14,r14,r3, LSL #3
|
|
SUB r14,r14,r3, LSL #1
|
|
SUB r14,r14,r3 @ r14 = 4-dstPitch*11
|
|
SUBS r5, r5, #4
|
|
BLE last_few_lines
|
|
yloop_aspect:
|
|
xloop_aspect:
|
|
LDRH r6, [r0], r1 @ r6 = A
|
|
LDRH r7, [r0], r1 @ r7 = B
|
|
LDRH r8, [r0], r1 @ r8 = C
|
|
LDRH r9, [r0], r1 @ r9 = D
|
|
LDRH r10,[r0], r11 @ r10= E
|
|
ORR r6, r6, r6, LSL #16 @ r6 = output 0, 1
|
|
ORR r7, r7, r7, LSL #16 @ r7 = output 3
|
|
ORR r8, r8, r8, LSL #16 @ r8 = output 5,6
|
|
ORR r9, r9, r9, LSL #16 @ r9 = output 8
|
|
ORR r10,r10,r10,LSL #16 @ r10= output 10, 11
|
|
STR r6, [r2], r3 @ output 0 (A)
|
|
STR r6, [r2], r3 @ output 1 (A)
|
|
AND r6, r6, r12 @ r6 = A split
|
|
ADD r6, r6, r6, LSL #1 @ r6 = A*3
|
|
STR r7, [r2, r3] @ output 3 (B)
|
|
AND r7, r7, r12 @ r7 = B split
|
|
ADD r6, r6, r7 @ r6 = A*3 + B
|
|
ADD r6, r6, r7, LSL #2 @ r6 = A*3 + B*5
|
|
AND r6, r12,r6, LSR #3 @ r6 = (A*3 + B*5)>>3
|
|
ORR r6, r6, r6, ROR #16 @ r6 = output 2
|
|
STR r6, [r2], r3, LSL #1 @ output 2 (A*3+B*5)>>3
|
|
RSB r7, r7, r7, LSL #3 @ r7 = B*7
|
|
AND r6, r8, r12 @ r6 = C split
|
|
ADD r7, r7, r6 @ r7 = B*7+C
|
|
AND r7, r12,r7, LSR #3 @ r7 = (B*7 + C)>>3
|
|
ORR r7, r7, r7, ROR #16 @ r7 = output 4
|
|
STR r7, [r2], r3 @ output 4 (B*7+C)>>3
|
|
STR r8, [r2], r3 @ output 5 (C)
|
|
STR r8, [r2], r3 @ output 6 (C)
|
|
STR r9, [r2, r3] @ output 8 (D)
|
|
AND r9, r9, r12 @ r9 = D split
|
|
RSB r7, r9, r9, LSL #3 @ r7 = D*7
|
|
ADD r6, r6, r7 @ r6 = C+D*7
|
|
AND r6, r12,r6, LSR #3 @ r6 = (C + D*7)>>3
|
|
ORR r6, r6, r6, ROR #16 @ r6 = output 7
|
|
STR r6, [r2], r3, LSL #1 @ output 7 (C+D*7)>>3
|
|
ADD r9, r9, r9, LSL #2 @ r9 = D*5
|
|
AND r6, r10,r12 @ r6 = E split
|
|
ADD r9, r9, r6 @ r9 = D*5+E
|
|
ADD r9, r9, r6, LSL #1 @ r9 = D*5+E*3
|
|
AND r9, r12,r9, LSR #3 @ r9 = (D*5 + E*3)>>3
|
|
ORR r9, r9, r9, ROR #16 @ r9 = output 9
|
|
STR r9, [r2], r3 @ output 9 (D*5+E*3)>>3
|
|
STR r10,[r2], r3 @ output 10 (E)
|
|
STR r10,[r2], r14 @ output 11 (E)
|
|
SUBS r4, r4, #1
|
|
BGT xloop_aspect
|
|
LDR r4, [r13,#4*9] @ r4 = w
|
|
ADD r0, r0, r1, LSL #2
|
|
ADD r0, r0, r1
|
|
SUB r0, r0, r4, LSL #1
|
|
ADD r2, r2, r3, LSL #3
|
|
ADD r2, r2, r3, LSL #2
|
|
SUB r2, r2, r4, LSL #2
|
|
SUBS r5, r5, #5
|
|
BGT yloop_aspect
|
|
last_few_lines:
|
|
@ r5 = -4 => done, -3 => 1 left, -2 = 2 left, -1 = 3 left, 0 = 4 left
|
|
ADDS r5, r5, #3
|
|
@ r5 = -1 => done, 0 => 1 left, 1 = 2 left, 2 = 3 left, 3 = 4 left
|
|
LDMLTFD r13!,{r4-r11,PC}
|
|
BEQ last1lines
|
|
CMP r5, #2
|
|
BEQ last3lines
|
|
BGT last4lines
|
|
last2lines:
|
|
ADD r11,r11,r1
|
|
ADD r14,r14,r3
|
|
ADD r11,r11,r1, LSL #1
|
|
ADD r14,r14,r3, LSL #3
|
|
xloop_aspect2:
|
|
LDRH r6, [r0], r1 @ r6 = A
|
|
LDRH r7, [r0], r11 @ r7 = B
|
|
SUBS r4, r4, #1
|
|
ORR r6, r6, r6, LSL #16 @ r6 = output 0, 1
|
|
ORR r7, r7, r7, LSL #16 @ r7 = output 3
|
|
STR r6, [r2], r3 @ output 0 (A)
|
|
STR r6, [r2], r3 @ output 1 (A)
|
|
AND r6, r6, r12 @ r6 = A split
|
|
ADD r6, r6, r6, LSL #1 @ r6 = A*3
|
|
STR r7, [r2, r3] @ output 3 (B)
|
|
AND r7, r7, r12 @ r7 = B split
|
|
ADD r6, r6, r7 @ r6 = A*3 + B
|
|
ADD r6, r6, r7, LSL #2 @ r6 = A*3 + B*5
|
|
AND r6, r12,r6, LSR #3 @ r6 = (A*3 + B*5)>>3
|
|
ORR r6, r6, r6, ROR #16 @ r6 = output 2
|
|
STR r6, [r2], r14 @ output 2 (A*3+B*5)>>3
|
|
BGT xloop_aspect2
|
|
|
|
LDMFD r13!,{r4-r11,PC}
|
|
|
|
last1lines:
|
|
xloop_aspect1:
|
|
LDRH r6, [r0], #2 @ r6 = A
|
|
ORR r6, r6, r6, LSL #16 @ r6 = output 0, 1
|
|
STR r6, [r2, r3] @ output 0 (A)
|
|
STR r6, [r2], #4 @ output 1 (A)
|
|
SUBS r4, r4, #1
|
|
BGT xloop_aspect1
|
|
|
|
LDMFD r13!,{r4-r11,PC}
|
|
|
|
last3lines:
|
|
ADD r14,r14,r3, LSL #2
|
|
ADD r11,r11,r1
|
|
xloop_aspect3:
|
|
LDRH r6, [r0], r1 @ r6 = A
|
|
LDRH r7, [r0], r1 @ r7 = B
|
|
LDRH r8, [r0], r1 @ r8 = C
|
|
LDRH r9, [r0], r11 @ r9 = D
|
|
ORR r6, r6, r6, LSL #16 @ r6 = output 0, 1
|
|
ORR r7, r7, r7, LSL #16 @ r7 = output 3
|
|
ORR r8, r8, r8, LSL #16 @ r8 = output 5,6
|
|
ORR r9, r9, r9, LSL #16 @ r9 = output 8
|
|
STR r6, [r2], r3 @ output 0 (A)
|
|
STR r6, [r2], r3 @ output 1 (A)
|
|
AND r6, r6, r12 @ r6 = A split
|
|
ADD r6, r6, r6, LSL #1 @ r6 = A*3
|
|
STR r7, [r2, r3] @ output 3 (B)
|
|
AND r7, r7, r12 @ r7 = B split
|
|
ADD r6, r6, r7 @ r6 = A*3 + B
|
|
ADD r6, r6, r7, LSL #2 @ r6 = A*3 + B*5
|
|
AND r6, r12,r6, LSR #3 @ r6 = (A*3 + B*5)>>3
|
|
ORR r6, r6, r6, ROR #16 @ r6 = output 2
|
|
STR r6, [r2], r3, LSL #1 @ output 2 (A*3+B*5)>>3
|
|
RSB r7, r7, r7, LSL #3 @ r7 = B*7
|
|
AND r6, r8, r12 @ r6 = C split
|
|
ADD r7, r7, r6 @ r7 = B*7+C
|
|
AND r7, r12,r7, LSR #3 @ r7 = (B*7 + C)>>3
|
|
ORR r7, r7, r7, ROR #16 @ r7 = output 4
|
|
STR r7, [r2], r3 @ output 4 (B*7+C)>>3
|
|
STR r8, [r2], r3 @ output 5 (C)
|
|
STR r8, [r2], r3 @ output 6 (C)
|
|
AND r9, r9, r12 @ r9 = D split
|
|
RSB r7, r9, r9, LSL #3 @ r7 = D*7
|
|
ADD r6, r6, r7 @ r6 = C+D*7
|
|
AND r6, r12,r6, LSR #3 @ r6 = (C + D*7)>>3
|
|
ORR r6, r6, r6, ROR #16 @ r6 = output 7
|
|
STR r6, [r2], r14 @ output 7 (C+D*7)>>3
|
|
SUBS r4, r4, #1
|
|
BGT xloop_aspect3
|
|
|
|
LDMFD r13!,{r4-r11,PC}
|
|
|
|
last4lines:
|
|
ADD r14,r14,r3, LSL #1
|
|
xloop_aspect4:
|
|
LDRH r6, [r0], r1 @ r6 = A
|
|
LDRH r7, [r0], r1 @ r7 = B
|
|
LDRH r8, [r0], r1 @ r8 = C
|
|
LDRH r9, [r0], r1 @ r9 = D
|
|
LDRH r10,[r0], r11 @ r10= E
|
|
ORR r6, r6, r6, LSL #16 @ r6 = output 0, 1
|
|
ORR r7, r7, r7, LSL #16 @ r7 = output 3
|
|
ORR r8, r8, r8, LSL #16 @ r8 = output 5,6
|
|
ORR r9, r9, r9, LSL #16 @ r9 = output 8
|
|
ORR r10,r10,r10,LSL #16 @ r10= output 10, 11
|
|
STR r6, [r2], r3 @ output 0 (A)
|
|
STR r6, [r2], r3 @ output 1 (A)
|
|
AND r6, r6, r12 @ r6 = A split
|
|
ADD r6, r6, r6, LSL #1 @ r6 = A*3
|
|
STR r7, [r2, r3] @ output 3 (B)
|
|
AND r7, r7, r12 @ r7 = B split
|
|
ADD r6, r6, r7 @ r6 = A*3 + B
|
|
ADD r6, r6, r7, LSL #2 @ r6 = A*3 + B*5
|
|
AND r6, r12,r6, LSR #3 @ r6 = (A*3 + B*5)>>3
|
|
ORR r6, r6, r6, ROR #16 @ r6 = output 2
|
|
STR r6, [r2], r3, LSL #1 @ output 2 (A*3+B*5)>>3
|
|
RSB r7, r7, r7, LSL #3 @ r7 = B*7
|
|
AND r6, r8, r12 @ r6 = C split
|
|
ADD r7, r7, r6 @ r7 = B*7+C
|
|
AND r7, r12,r7, LSR #3 @ r7 = (B*7 + C)>>3
|
|
ORR r7, r7, r7, ROR #16 @ r7 = output 4
|
|
STR r7, [r2], r3 @ output 4 (B*7+C)>>3
|
|
STR r8, [r2], r3 @ output 5 (C)
|
|
STR r8, [r2], r3 @ output 6 (C)
|
|
STR r9, [r2, r3] @ output 8 (D)
|
|
AND r9, r9, r12 @ r9 = D split
|
|
RSB r7, r9, r9, LSL #3 @ r7 = D*7
|
|
ADD r6, r6, r7 @ r6 = C+D*7
|
|
AND r6, r12,r6, LSR #3 @ r6 = (C + D*7)>>3
|
|
ORR r6, r6, r6, ROR #16 @ r6 = output 7
|
|
STR r6, [r2], r3, LSL #1 @ output 7 (C+D*7)>>3
|
|
ADD r9, r9, r9, LSL #2 @ r9 = D*5
|
|
AND r6, r10,r12 @ r6 = E split
|
|
ADD r9, r9, r6 @ r9 = D*5+E
|
|
ADD r9, r9, r6, LSL #1 @ r9 = D*5+E*3
|
|
AND r9, r12,r9, LSR #3 @ r9 = (D*5 + E*3)>>3
|
|
ORR r9, r9, r9, ROR #16 @ r9 = output 9
|
|
STR r9, [r2], r14 @ output 9 (D*5+E*3)>>3
|
|
SUBS r4, r4, #1
|
|
BGT xloop_aspect4
|
|
|
|
LDMFD r13!,{r4-r11,PC}
|