mirror of
https://github.com/libretro/scummvm.git
synced 2024-12-14 21:59:17 +00:00
186 lines
6.1 KiB
ArmAsm
186 lines
6.1 KiB
ArmAsm
@ ScummVM - Graphic Adventure Engine
|
|
@
|
|
@ ScummVM is the legal property of its developers, whose names
|
|
@ are too numerous to list here. Please refer to the COPYRIGHT
|
|
@ file distributed with this source distribution.
|
|
@
|
|
@ This program is free software; you can redistribute it and/or
|
|
@ modify it under the terms of the GNU General Public License
|
|
@ as published by the Free Software Foundation; either version 2
|
|
@ of the License, or (at your option) any later version.
|
|
@
|
|
@ This program is distributed in the hope that it will be useful,
|
|
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
@ GNU General Public License for more details.
|
|
@
|
|
@ You should have received a copy of the GNU General Public License
|
|
@ along with this program; if not, write to the Free Software
|
|
@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
@
|
|
@ @author Robin Watts (robin@wss.co.uk)
|
|
|
|
.text
|
|
|
|
.global DownscaleAllByHalfARM
|
|
|
|
@ ARM implementation of DownscaleAllByHalf scaler.
|
|
@ Scales a width x height block of 16bpp pixels from srcPtr to
|
|
@ dstPtr. srcPitch and dstPitch identify how to reach subsequent
|
|
@ lines. redblueMask and round allow for one routine to do both
|
|
@ 565 and 555 formats.
|
|
.align 2
|
|
DownscaleAllByHalfARM:
|
|
@ r0 = srcPtr
|
|
@ r1 = srcPitch
|
|
@ r2 = dstPtr
|
|
@ r3 = dstPitch
|
|
MOV r12,r13
|
|
STMFD r13!,{r4-r11,r14}
|
|
LDMIA r12,{r4-r7}
|
|
@ r4 = width
|
|
@ r5 = height
|
|
@ r6 = redblueMask
|
|
@ r7 = round
|
|
|
|
SUB r3,r3,r4 @ dstPitch -= width
|
|
SUBS r5,r5,#2 @ while ((height-=2) >= 0)
|
|
BLT end
|
|
height_loop:
|
|
|
|
SUBS r11, r4, #8 @ r11= width_minus_8
|
|
BLT thin
|
|
|
|
width_loop:
|
|
@ unroll loop 4 times here
|
|
LDRH r8,[r0],r1 @ r8 = A = srcPtr[0]
|
|
LDRH r9,[r0],#2 @ r9 = C = srcPtr[dstPitch]
|
|
LDRH r12,[r0],-r1 @ r12= D = srcPtr[dstPitch+2]
|
|
LDRH r14,[r0],#2 @ r14= B = srcPtr[2]
|
|
|
|
ORR r8, r8, r8, LSL #16 @ r8 = b | g | r | b | g | r
|
|
ORR r9, r9, r9, LSL #16 @ r9 = b | g | r | b | g | r
|
|
ORR r12,r12,r12,LSL #16 @ r12= b | g | r | b | g | r
|
|
ORR r14,r14,r14,LSL #16 @ r14= b | g | r | b | g | r
|
|
AND r8, r8, r6 @ r8 = 0 | g | 0 | b | 0 | r
|
|
AND r9, r9, r6 @ r9 = 0 | g | 0 | b | 0 | r
|
|
AND r12,r12,r6 @ r12= 0 | g | 0 | b | 0 | r
|
|
AND r14,r14,r6 @ r14= 0 | g | 0 | b | 0 | r
|
|
ADD r8, r8, r9
|
|
ADD r8, r8, r12
|
|
ADD r8, r8, r14
|
|
ADD r8, r8, r7 @ r8 = summed pixels + rounding
|
|
AND r8, r6, r8, LSR #2 @ r8 = 0 | g | 0 | b | 0 | r
|
|
ORR r10,r8, r8, LSR #16 @ r10= 0 | g | 0 | b | g | r
|
|
|
|
LDRH r8,[r0],r1 @ r8 = A = srcPtr[0]
|
|
LDRH r9,[r0],#2 @ r9 = C = srcPtr[dstPitch]
|
|
LDRH r12,[r0],-r1 @ r12= D = srcPtr[dstPitch+2]
|
|
LDRH r14,[r0],#2 @ r14= B = srcPtr[2]
|
|
|
|
STRH r10,[r2],#2 @ *dstPtr++
|
|
|
|
ORR r8, r8, r8, LSL #16 @ r8 = b | g | r | b | g | r
|
|
ORR r9, r9, r9, LSL #16 @ r9 = b | g | r | b | g | r
|
|
ORR r12,r12,r12,LSL #16 @ r12= b | g | r | b | g | r
|
|
ORR r14,r14,r14,LSL #16 @ r14= b | g | r | b | g | r
|
|
AND r8, r8, r6 @ r8 = 0 | g | 0 | b | 0 | r
|
|
AND r9, r9, r6 @ r9 = 0 | g | 0 | b | 0 | r
|
|
AND r12,r12,r6 @ r12= 0 | g | 0 | b | 0 | r
|
|
AND r14,r14,r6 @ r14= 0 | g | 0 | b | 0 | r
|
|
ADD r8, r8, r9
|
|
ADD r8, r8, r12
|
|
ADD r8, r8, r14
|
|
ADD r8, r8, r7 @ r8 = summed pixels + rounding
|
|
AND r8, r6, r8, LSR #2 @ r8 = 0 | g | 0 | b | 0 | r
|
|
ORR r10,r8, r8, LSR #16 @ r10= 0 | g | 0 | b | g | r
|
|
|
|
LDRH r8,[r0],r1 @ r8 = A = srcPtr[0]
|
|
LDRH r9,[r0],#2 @ r9 = C = srcPtr[dstPitch]
|
|
LDRH r12,[r0],-r1 @ r12= D = srcPtr[dstPitch+2]
|
|
LDRH r14,[r0],#2 @ r14= B = srcPtr[2]
|
|
|
|
STRH r10,[r2],#2 @ *dstPtr++
|
|
|
|
ORR r8, r8, r8, LSL #16 @ r8 = b | g | r | b | g | r
|
|
ORR r9, r9, r9, LSL #16 @ r9 = b | g | r | b | g | r
|
|
ORR r12,r12,r12,LSL #16 @ r12= b | g | r | b | g | r
|
|
ORR r14,r14,r14,LSL #16 @ r14= b | g | r | b | g | r
|
|
AND r8, r8, r6 @ r8 = 0 | g | 0 | b | 0 | r
|
|
AND r9, r9, r6 @ r9 = 0 | g | 0 | b | 0 | r
|
|
AND r12,r12,r6 @ r12= 0 | g | 0 | b | 0 | r
|
|
AND r14,r14,r6 @ r14= 0 | g | 0 | b | 0 | r
|
|
ADD r8, r8, r9
|
|
ADD r8, r8, r12
|
|
ADD r8, r8, r14
|
|
ADD r8, r8, r7 @ r8 = summed pixels + rounding
|
|
AND r8, r6, r8, LSR #2 @ r8 = 0 | g | 0 | b | 0 | r
|
|
ORR r10,r8, r8, LSR #16 @ r10= 0 | g | 0 | b | g | r
|
|
|
|
LDRH r8,[r0],r1 @ r8 = A = srcPtr[0]
|
|
LDRH r9,[r0],#2 @ r9 = C = srcPtr[dstPitch]
|
|
LDRH r12,[r0],-r1 @ r12= D = srcPtr[dstPitch+2]
|
|
LDRH r14,[r0],#2 @ r14= B = srcPtr[2]
|
|
|
|
STRH r10,[r2],#2 @ *dstPtr++
|
|
|
|
ORR r8, r8, r8, LSL #16 @ r8 = b | g | r | b | g | r
|
|
ORR r9, r9, r9, LSL #16 @ r9 = b | g | r | b | g | r
|
|
ORR r12,r12,r12,LSL #16 @ r12= b | g | r | b | g | r
|
|
ORR r14,r14,r14,LSL #16 @ r14= b | g | r | b | g | r
|
|
AND r8, r8, r6 @ r8 = 0 | g | 0 | b | 0 | r
|
|
AND r9, r9, r6 @ r9 = 0 | g | 0 | b | 0 | r
|
|
AND r12,r12,r6 @ r12= 0 | g | 0 | b | 0 | r
|
|
AND r14,r14,r6 @ r14= 0 | g | 0 | b | 0 | r
|
|
ADD r8, r8, r9
|
|
ADD r8, r8, r12
|
|
ADD r8, r8, r14
|
|
ADD r8, r8, r7 @ r8 = summed pixels + rounding
|
|
AND r8, r6, r8, LSR #2 @ r8 = 0 | g | 0 | b | 0 | r
|
|
ORR r10, r8, r8, LSR #16 @ r8 = 0 | g | 0 | b | g | r
|
|
|
|
STRH r10,[r2],#2 @ *dstPtr++
|
|
|
|
SUBS r11,r11,#8 @ width_minus_8 -= 8
|
|
BGE width_loop @ (width_minus_8 >= 0) => do 8+ more
|
|
|
|
thin:
|
|
ADDS r11,r11,#8 @ r11= width
|
|
BEQ width_end @ if no more left to do, then bail
|
|
thin_lp:
|
|
@ single output pixels done in this bit
|
|
LDRH r8,[r0],r1 @ r8 = A = srcPtr[0]
|
|
LDRH r9,[r0],#2 @ r9 = C = srcPtr[dstPitch]
|
|
LDRH r12,[r0],-r1 @ r12= D = srcPtr[dstPitch+2]
|
|
LDRH r14,[r0],#2 @ r14= B = srcPtr[2]
|
|
|
|
ORR r8, r8, r8, LSL #16 @ r8 = b | g | r | b | g | r
|
|
ORR r9, r9, r9, LSL #16 @ r9 = b | g | r | b | g | r
|
|
ORR r12,r12,r12,LSL #16 @ r12= b | g | r | b | g | r
|
|
ORR r14,r14,r14,LSL #16 @ r14= b | g | r | b | g | r
|
|
AND r8, r8, r6 @ r8 = 0 | g | 0 | b | 0 | r
|
|
AND r9, r9, r6 @ r9 = 0 | g | 0 | b | 0 | r
|
|
AND r12,r12,r6 @ r12= 0 | g | 0 | b | 0 | r
|
|
AND r14,r14,r6 @ r14= 0 | g | 0 | b | 0 | r
|
|
ADD r8, r8, r9
|
|
ADD r8, r8, r12
|
|
ADD r8, r8, r14
|
|
ADD r8, r8, r7 @ r8 = summed pixels + rounding
|
|
AND r8, r6, r8, LSR #2 @ r8 = 0 | g | 0 | b | 0 | r
|
|
ORR r8, r8, r8, LSR #16 @ r8 = 0 | g | 0 | b | g | r
|
|
|
|
STRH r8,[r2],#2 @ *dstPtr++
|
|
|
|
SUBS r11,r11,#2
|
|
BGT thin_lp
|
|
width_end:
|
|
ADD r2,r2,r3 @ dstPtr += dstPitch
|
|
ADD r0,r0,r1,LSL #1 @ srcPtr += 2*srcPitch
|
|
SUB r0,r0,r4,LSL #1 @ srcPtr -= 2*width
|
|
|
|
SUBS r5,r5,#2
|
|
BGE height_loop
|
|
|
|
end:
|
|
LDMFD r13!,{r4-r11,PC}
|