Patch #1721826: ARM asm versions of sound rate conversion/mixing code

svn-id: r27467
This commit is contained in:
Max Horn 2007-06-16 16:46:13 +00:00
parent c7c2cb498c
commit 6498d669d0
4 changed files with 1128 additions and 1 deletions

View File

@ -25,6 +25,8 @@ DISABLE_CRUISE = 1
#DISABLE_HQ_SCALERS = 1
USE_ARM_SOUND_ASM = 1
CXX = arm-wince-pe-g++
LD = arm-wince-pe-g++
AR = arm-wince-pe-ar cru

View File

@ -16,7 +16,6 @@ MODULE_OBJS := \
mp3.o \
mpu401.o \
null.o \
rate.o \
voc.o \
vorbis.o \
wave.o \
@ -31,5 +30,14 @@ MODULE_OBJS := \
softsynth/fluidsynth.o \
softsynth/mt32.o \
ifndef USE_ARM_SOUND_ASM
MODULE_OBJS += \
rate.o
else
MODULE_OBJS += \
rate_arm.o \
rate_arm_asm.o
endif
# Include common rules
include $(srcdir)/rules.mk

428
sound/rate_arm.cpp Normal file
View File

@ -0,0 +1,428 @@
/* ScummVM - Scumm Interpreter
* Copyright (C) 2001-2006 The ScummVM project
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* $URL$
* $Id$
*
*/
/*
* The code in this file, together with the rate_arm_asm.s file offers
* an ARM optimised version of the code in rate.cpp. The operation of this
* code should be identical to that of rate.cpp, but faster. The heavy
* lifting is done in the assembler file.
*
* To be as portable as possible we implement the core routines with C
* linkage in assembly, and implement the C++ routines that call into
* the C here. The C++ symbol mangling varies wildly between compilers,
* so this is the simplest way to ensure that the C/C++ combination should
* work on as many ARM based platforms as possible.
*
* Essentially the algorithm herein is the same as that in rate.cpp, so
* anyone seeking to understand this should attempt to understand that
* first. That code was based in turn on code with Copyright 1998 Fabrice
* Bellard - part of SoX (http://sox.sourceforge.net).
* Max Horn adapted that code to the needs of ScummVM and partially rewrote
* it, in the process removing any use of floating point arithmetic. Various
* other improvments over the original code were made.
*/
#include "common/stdafx.h"
#include "sound/audiostream.h"
#include "sound/rate.h"
#include "sound/mixer.h"
#include "common/util.h"
namespace Audio {
/**
* The precision of the fractional computations used by the rate converter.
* Normally you should never have to modify this value.
*/
#define FRAC_BITS 16
/**
* The size of the intermediate input cache. Bigger values may increase
* performance, but only until some point (depends largely on cache size,
* target processor and various other factors), at which it will decrease
* again.
*/
#define INTERMEDIATE_BUFFER_SIZE 512
/**
* Audio rate converter based on simple resampling. Used when no
* interpolation is required.
*
* Limited to sampling frequency <= 65535 Hz.
*/
typedef struct {
const st_sample_t *inPtr;
int inLen;
/** position of how far output is ahead of input */
/** Holds what would have been opos-ipos */
long opos;
/** fractional position increment in the output stream */
long opos_inc;
st_sample_t inBuf[INTERMEDIATE_BUFFER_SIZE];
} SimpleRateDetails;
template<bool stereo, bool reverseStereo>
class SimpleRateConverter : public RateConverter {
protected:
SimpleRateDetails sr;
public:
SimpleRateConverter(st_rate_t inrate, st_rate_t outrate);
int flow(AudioStream &input, st_sample_t *obuf, st_size_t osamp, st_volume_t vol_l, st_volume_t vol_r);
int drain(st_sample_t *obuf, st_size_t osamp, st_volume_t vol) {
return (ST_SUCCESS);
}
};
/*
* Prepare processing.
*/
template<bool stereo, bool reverseStereo>
SimpleRateConverter<stereo, reverseStereo>::SimpleRateConverter(st_rate_t inrate, st_rate_t outrate) {
if (inrate == outrate) {
error("Input and Output rates must be different to use rate effect");
}
if ((inrate % outrate) != 0) {
error("Input rate must be a multiple of Output rate to use rate effect");
}
if (inrate >= 65536 || outrate >= 65536) {
error("rate effect can only handle rates < 65536");
}
sr.opos = 1;
/* increment */
sr.opos_inc = inrate / outrate;
sr.inLen = 0;
}
extern "C" void ARM_SimpleRate_M(AudioStream &input,
int (*fn)(Audio::AudioStream&,int16*,int),
SimpleRateDetails *sr,
st_sample_t *obuf,
st_size_t osamp,
st_volume_t vol_l,
st_volume_t vol_r);
extern "C" void ARM_SimpleRate_S(AudioStream &input,
int (*fn)(Audio::AudioStream&,int16*,int),
SimpleRateDetails *sr,
st_sample_t *obuf,
st_size_t osamp,
st_volume_t vol_l,
st_volume_t vol_r);
extern "C" void ARM_SimpleRate_R(AudioStream &input,
int (*fn)(Audio::AudioStream&,int16*,int),
SimpleRateDetails *sr,
st_sample_t *obuf,
st_size_t osamp,
st_volume_t vol_l,
st_volume_t vol_r);
extern "C" int SimpleRate_readFudge(Audio::AudioStream &input,
int16 *a, int b)
{
return input.readBuffer(a, b);
}
template<bool stereo, bool reverseStereo>
int SimpleRateConverter<stereo, reverseStereo>::flow(AudioStream &input, st_sample_t *obuf, st_size_t osamp, st_volume_t vol_l, st_volume_t vol_r) {
#ifdef DEBUG_RATECONV
fprintf(stderr, "Simple st=%d rev=%d\n", stereo, reverseStereo);
fflush(stderr);
#endif
if (!stereo) {
ARM_SimpleRate_M(input,
&SimpleRate_readFudge,
&sr,
obuf, osamp, vol_l, vol_r);
} else if (reverseStereo) {
ARM_SimpleRate_R(input,
&SimpleRate_readFudge,
&sr,
obuf, osamp, vol_l, vol_r);
} else {
ARM_SimpleRate_S(input,
&SimpleRate_readFudge,
&sr,
obuf, osamp, vol_l, vol_r);
}
return (ST_SUCCESS);
}
/**
* Audio rate converter based on simple linear Interpolation.
*
* The use of fractional increment allows us to use no buffer. It
* avoid the problems at the end of the buffer we had with the old
* method which stored a possibly big buffer of size
* lcm(in_rate,out_rate).
*
* Limited to sampling frequency <= 65535 Hz.
*/
typedef struct {
const st_sample_t *inPtr;
int inLen;
/** position of how far output is ahead of input */
/** Holds what would have been opos-ipos */
long opos;
/** integer position increment in the output stream */
long opos_inc;
/** current sample(s) in the input stream (left/right channel) */
st_sample_t icur[2];
/** last sample(s) in the input stream (left/right channel) */
st_sample_t ilast[2];
/** fractional position in the output stream */
long opos_frac;
/** fractional position increment in the output stream */
long opos_inc_frac;
st_sample_t inBuf[INTERMEDIATE_BUFFER_SIZE];
} LinearRateDetails;
extern "C" void ARM_LinearRate_M(AudioStream &input,
int (*fn)(Audio::AudioStream&,int16*,int),
LinearRateDetails *lr,
st_sample_t *obuf,
st_size_t osamp,
st_volume_t vol_l,
st_volume_t vol_r);
extern "C" void ARM_LinearRate_S(AudioStream &input,
int (*fn)(Audio::AudioStream&,int16*,int),
LinearRateDetails *lr,
st_sample_t *obuf,
st_size_t osamp,
st_volume_t vol_l,
st_volume_t vol_r);
extern "C" void ARM_LinearRate_R(AudioStream &input,
int (*fn)(Audio::AudioStream&,int16*,int),
LinearRateDetails *lr,
st_sample_t *obuf,
st_size_t osamp,
st_volume_t vol_l,
st_volume_t vol_r);
template<bool stereo, bool reverseStereo>
class LinearRateConverter : public RateConverter {
protected:
LinearRateDetails lr;
public:
LinearRateConverter(st_rate_t inrate, st_rate_t outrate);
int flow(AudioStream &input, st_sample_t *obuf, st_size_t osamp, st_volume_t vol_l, st_volume_t vol_r);
int drain(st_sample_t *obuf, st_size_t osamp, st_volume_t vol) {
return (ST_SUCCESS);
}
};
/*
* Prepare processing.
*/
template<bool stereo, bool reverseStereo>
LinearRateConverter<stereo, reverseStereo>::LinearRateConverter(st_rate_t inrate, st_rate_t outrate) {
unsigned long incr;
if (inrate == outrate) {
error("Input and Output rates must be different to use rate effect");
}
if (inrate >= 65536 || outrate >= 65536) {
error("rate effect can only handle rates < 65536");
}
lr.opos_frac = 0;
lr.opos = 1;
/* increment */
incr = (inrate << FRAC_BITS) / outrate;
lr.opos_inc_frac = incr & ((1UL << FRAC_BITS) - 1);
lr.opos_inc = incr >> FRAC_BITS;
lr.ilast[0] = lr.ilast[1] = 0;
lr.icur[0] = lr.icur[1] = 0;
lr.inLen = 0;
}
/*
* Processed signed long samples from ibuf to obuf.
* Return number of samples processed.
*/
template<bool stereo, bool reverseStereo>
int LinearRateConverter<stereo, reverseStereo>::flow(AudioStream &input, st_sample_t *obuf, st_size_t osamp, st_volume_t vol_l, st_volume_t vol_r) {
#ifdef DEBUG_RATECONV
fprintf(stderr, "Linear st=%d rev=%d\n", stereo, reverseStereo);
fflush(stderr);
#endif
if (!stereo) {
ARM_LinearRate_M(input,
&SimpleRate_readFudge,
&lr,
obuf, osamp, vol_l, vol_r);
} else if (reverseStereo) {
ARM_LinearRate_R(input,
&SimpleRate_readFudge,
&lr,
obuf, osamp, vol_l, vol_r);
} else {
ARM_LinearRate_S(input,
&SimpleRate_readFudge,
&lr,
obuf, osamp, vol_l, vol_r);
}
return (ST_SUCCESS);
}
#pragma mark -
/**
* Simple audio rate converter for the case that the inrate equals the outrate.
*/
extern "C" void ARM_CopyRate_M(st_size_t len,
st_sample_t *obuf,
st_volume_t vol_l,
st_volume_t vol_r,
st_sample_t *_buffer);
extern "C" void ARM_CopyRate_S(st_size_t len,
st_sample_t *obuf,
st_volume_t vol_l,
st_volume_t vol_r,
st_sample_t *_buffer);
extern "C" void ARM_CopyRate_R(st_size_t len,
st_sample_t *obuf,
st_volume_t vol_l,
st_volume_t vol_r,
st_sample_t *_buffer);
template<bool stereo, bool reverseStereo>
class CopyRateConverter : public RateConverter {
st_sample_t *_buffer;
st_size_t _bufferSize;
public:
CopyRateConverter() : _buffer(0), _bufferSize(0) {}
~CopyRateConverter() {
free(_buffer);
}
virtual int flow(AudioStream &input, st_sample_t *obuf, st_size_t osamp, st_volume_t vol_l, st_volume_t vol_r) {
assert(input.isStereo() == stereo);
#ifdef DEBUG_RATECONV
fprintf(stderr, "Copy st=%d rev=%d\n", stereo, reverseStereo);
fflush(stderr);
#endif
st_sample_t *ptr;
st_size_t len;
if (stereo)
osamp *= 2;
// Reallocate temp buffer, if necessary
if (osamp > _bufferSize) {
free(_buffer);
_buffer = (st_sample_t *)malloc(osamp * 2);
_bufferSize = osamp;
}
// Read up to 'osamp' samples into our temporary buffer
len = input.readBuffer(_buffer, osamp);
if (len <= 0)
return (ST_SUCCESS);
// Mix the data into the output buffer
if (stereo && reverseStereo)
ARM_CopyRate_R(len, obuf, vol_l, vol_r, _buffer);
else if (stereo)
ARM_CopyRate_S(len, obuf, vol_l, vol_r, _buffer);
else
ARM_CopyRate_M(len, obuf, vol_l, vol_r, _buffer);
return (ST_SUCCESS);
}
virtual int drain(st_sample_t *obuf, st_size_t osamp, st_volume_t vol) {
return (ST_SUCCESS);
}
};
#pragma mark -
/**
* Create and return a RateConverter object for the specified input and output rates.
*/
RateConverter *makeRateConverter(st_rate_t inrate, st_rate_t outrate, bool stereo, bool reverseStereo) {
if (inrate != outrate) {
if ((inrate % outrate) == 0) {
if (stereo) {
if (reverseStereo)
return new SimpleRateConverter<true, true>(inrate, outrate);
else
return new SimpleRateConverter<true, false>(inrate, outrate);
} else
return new SimpleRateConverter<false, false>(inrate, outrate);
} else {
if (stereo) {
if (reverseStereo)
return new LinearRateConverter<true, true>(inrate, outrate);
else
return new LinearRateConverter<true, false>(inrate, outrate);
} else
return new LinearRateConverter<false, false>(inrate, outrate);
}
} else {
if (stereo) {
if (reverseStereo)
return new CopyRateConverter<true, true>();
else
return new CopyRateConverter<true, false>();
} else
return new CopyRateConverter<false, false>();
}
}
} // End of namespace Audio

689
sound/rate_arm_asm.s Normal file
View File

@ -0,0 +1,689 @@
@ ScummVM Scumm Interpreter
@ Copyright (C) 2007 The ScummVM project
@
@ This program is free software@ you can redistribute it and/or
@ modify it under the terms of the GNU General Public License
@ as published by the Free Software Foundation@ either version 2
@ of the License, or (at your option) any later version.
@
@ This program is distributed in the hope that it will be useful,
@ but WITHOUT ANY WARRANTY@ without even the implied warranty of
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
@ GNU General Public License for more details.
@
@ You should have received a copy of the GNU General Public License
@ along with this program@ if not, write to the Free Software
@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
@
@ $URL: $
@ $Id: $
@
@ @author Robin Watts (robin@wss.co.uk)
@
@ This file, together with rate_arm.cpp, provides an ARM optimised version
@ of rate.cpp. The algorithm is essentially the same as that within rate.cpp
@ so to understand this file you should understand rate.cpp first.
.text
.global ARM_CopyRate_M
.global ARM_CopyRate_S
.global ARM_CopyRate_R
.global ARM_SimpleRate_M
.global ARM_SimpleRate_S
.global ARM_SimpleRate_R
.global ARM_LinearRate_M
.global ARM_LinearRate_S
.global ARM_LinearRate_R
ARM_CopyRate_M:
@ r0 = len
@ r1 = obuf
@ r2 = vol_l
@ r3 = vol_r
@ <> = ptr
LDR r12,[r13]
STMFD r13!,{r4-r7,r14}
MOV r14,#0 @ r14= 0
ORR r2, r2, r2, LSL #8 @ r2 = vol_l as 16 bits
ORR r3, r3, r3, LSL #8 @ r3 = vol_r as 16 bits
CopyRate_M_loop:
LDRSH r5, [r12], #2 @ r5 = tmp0 = tmp1 = *ptr++
LDRSH r6, [r1] @ r6 = obuf[0]
LDRSH r7, [r1, #2] @ r7 = obuf[1]
MUL r4, r2, r5 @ r4 = tmp0*vol_l
MUL r5, r3, r5 @ r5 = tmp1*vol_r
ADDS r6, r4, r6, LSL #16 @ r6 = obuf[0]<<16 + tmp0*vol_l
RSCVS r6, r14,#1<<31 @ Clamp r6
ADDS r7, r5, r7, LSL #16 @ r7 = obuf[1]<<16 + tmp1*vol_r
RSCVS r7, r14,#1<<31 @ Clamp r7
MOV r6, r6, LSR #16 @ Shift back to halfword
MOV r7, r7, LSR #16 @ Shift back to halfword
STRH r6, [r1], #2 @ Store output value
STRH r7, [r1], #2 @ Store output value
SUBS r0,r0,#1 @ len--
BGT CopyRate_M_loop @ and loop
LDMFD r13!,{r4-r7,PC}
ARM_CopyRate_S:
@ r0 = len
@ r1 = obuf
@ r2 = vol_l
@ r3 = vol_r
@ <> = ptr
LDR r12,[r13]
STMFD r13!,{r4-r7,r14}
MOV r14,#0 @ r14= 0
ORR r2, r2, r2, LSL #8 @ r2 = vol_l as 16 bits
ORR r3, r3, r3, LSL #8 @ r3 = vol_r as 16 bits
CopyRate_S_loop:
LDRSH r4, [r12],#2 @ r4 = tmp0 = *ptr++
LDRSH r5, [r12],#2 @ r5 = tmp1 = *ptr++
LDRSH r6, [r1] @ r6 = obuf[0]
LDRSH r7, [r1,#2] @ r7 = obuf[1]
MUL r4, r2, r4 @ r5 = tmp0*vol_l
MUL r5, r3, r5 @ r6 = tmp1*vol_r
ADDS r6, r4, r6, LSL #16 @ r6 = obuf[0]<<16 + tmp0*vol_l
RSCVS r6, r14,#1<<31 @ Clamp r6
ADDS r7, r5, r7, LSL #16 @ r7 = obuf[1]<<16 + tmp1*vol_r
RSCVS r7, r14,#1<<31 @ Clamp r7
MOV r6, r6, LSR #16 @ Shift back to halfword
MOV r7, r7, LSR #16 @ Shift back to halfword
STRH r6, [r1],#2 @ Store output value
STRH r7, [r1],#2 @ Store output value
SUBS r0,r0,#2 @ len -= 2
BGT CopyRate_S_loop @ and loop
LDMFD r13!,{r4-r7,PC}
ARM_CopyRate_R:
@ r0 = len
@ r1 = obuf
@ r2 = vol_l
@ r3 = vol_r
@ <> = ptr
LDR r12,[r13]
STMFD r13!,{r4-r7,r14}
MOV r14,#0 @ r14= 0
ORR r2, r2, r2, LSL #8 @ r2 = vol_l as 16 bits
ORR r3, r3, r3, LSL #8 @ r3 = vol_r as 16 bits
CopyRate_R_loop:
LDRSH r5, [r12],#2 @ r5 = tmp1 = *ptr++
LDRSH r4, [r12],#2 @ r4 = tmp0 = *ptr++
LDRSH r6, [r1] @ r6 = obuf[0]
LDRSH r7, [r1,#2] @ r7 = obuf[1]
MUL r4, r2, r4 @ r4 = tmp0*vol_l
MUL r5, r3, r5 @ r5 = tmp1*vol_r
ADDS r6, r4, r6, LSL #16 @ r6 = obuf[0]<<16 + tmp0*vol_l
RSCVS r6, r14,#1<<31 @ Clamp r6
ADDS r7, r5, r7, LSL #16 @ r7 = obuf[1]<<16 + tmp1*vol_r
RSCVS r7, r14,#1<<31 @ Clamp r7
MOV r6, r6, LSR #16 @ Shift back to halfword
MOV r7, r7, LSR #16 @ Shift back to halfword
STRH r6, [r1],#2 @ Store output value
STRH r7, [r1],#2 @ Store output value
SUBS r0,r0,#2 @ len -= 2
BGT CopyRate_R_loop @ and loop
LDMFD r13!,{r4-r7,PC}
ARM_SimpleRate_M:
@ r0 = AudioStream &input
@ r1 = input.readBuffer
@ r2 = input->sr
@ r3 = obuf
@ <> = osamp
@ <> = vol_l
@ <> = vol_r
MOV r12,r13
STMFD r13!,{r0-r2,r4-r8,r10-r11,r14}
LDMFD r12,{r11,r12,r14} @ r11= osamp
@ r12= vol_l
@ r14= vol_r
LDMIA r2,{r0,r1,r2,r8} @ r0 = inPtr
@ r1 = inLen
@ r2 = opos
@ r8 = opos_inc
CMP r11,#0 @ if (osamp <= 0)
BLE SimpleRate_M_end @ bale
MOV r10,#0
ORR r12,r12,r12,LSL #8 @ r12= vol_l as 16 bits
ORR r14,r14,r14,LSL #8 @ r14= vol_r as 16 bits
SimpleRate_M_loop:
SUBS r1, r1, #1 @ r1 = inLen -= 1
BLT SimpleRate_M_read
SUBS r2, r2, #1 @ r2 = opos--
ADDGE r0, r0, #2 @ if (r2 >= 0) { sr.inPtr++
BGE SimpleRate_M_loop @ and loop }
SimpleRate_M_read_return:
LDRSH r5, [r0],#2 @ r5 = tmp1 = *inPtr++
LDRSH r6, [r3] @ r6 = obuf[0]
LDRSH r7, [r3,#2] @ r7 = obuf[1]
ADD r2, r2, r8 @ r2 = opos += opos_inc
MUL r4, r12,r5 @ r4 = tmp0*vol_l
MUL r5, r14,r5 @ r5 = tmp1*vol_r
ADDS r6, r4, r6, LSL #16 @ r6 = obuf[0]<<16 + tmp0*vol_l
RSCVS r6, r10,#1<<31 @ Clamp r6
ADDS r7, r5, r7, LSL #16 @ r7 = obuf[1]<<16 + tmp1*vol_r
RSCVS r7, r10,#1<<31 @ Clamp r7
MOV r6, r6, LSR #16 @ Shift back to halfword
MOV r7, r7, LSR #16 @ Shift back to halfword
STRH r6, [r3],#2 @ Store output value
STRH r7, [r3],#2 @ Store output value
SUBS r11,r11,#1 @ len--
BGT SimpleRate_M_loop @ and loop
SimpleRate_M_end:
LDR r14,[r13,#8] @ r14 = sr
ADD r13,r13,#12 @ Skip over r0-r2 on stack
STMIA r14,{r0,r1,r2} @ Store back updated values
LDMFD r13!,{r4-r8,r10-r11,PC}
SimpleRate_M_read:
LDR r0, [r13,#4*2] @ r0 = sr
ADD r0, r0, #16 @ r0 = inPtr = inBuf
STMFD r13!,{r0,r2-r3,r12,r14}
MOV r1, r0 @ r1 = inBuf
LDR r0, [r13,#4*5] @ r0 = AudioStream & input
MOV r2, #512 @ r2 = ARRAYSIZE(inBuf)
@ Calling back into C++ here. WinCE is fairly easy about such things
@ but other OS are more awkward. r9 is preserved for Symbian, and
@ we have 3+8+5 = 16 things on the stack (an even number).
MOV r14,PC
LDR PC,[r13,#4*6] @ inLen = input.readBuffer(inBuf,512)
SUBS r1, r0, #1 @ r1 = inLen-1
LDMFD r13!,{r0,r2-r3,r12,r14}
BLT SimpleRate_M_end
SUBS r2, r2, #1 @ r2 = opos--
ADDGE r0, r0, #2 @ if (r2 >= 0) { sr.inPtr++
BGE SimpleRate_M_loop @ and loop }
B SimpleRate_M_read_return
ARM_SimpleRate_S:
@ r0 = AudioStream &input
@ r1 = input.readBuffer
@ r2 = input->sr
@ r3 = obuf
@ <> = osamp
@ <> = vol_l
@ <> = vol_r
MOV r12,r13
STMFD r13!,{r0-r2,r4-r8,r10-r11,r14}
LDMFD r12,{r11,r12,r14} @ r11= osamp
@ r12= vol_l
@ r14= vol_r
LDMIA r2,{r0,r1,r2,r8} @ r0 = inPtr
@ r1 = inLen
@ r2 = opos
@ r8 = opos_inc
CMP r11,#0 @ if (osamp <= 0)
BLE SimpleRate_S_end @ bale
MOV r10,#0
ORR r12,r12,r12,LSL #8 @ r12= vol_l as 16 bits
ORR r14,r14,r14,LSL #8 @ r14= vol_r as 16 bits
SimpleRate_S_loop:
SUBS r1, r1, #2 @ r1 = inLen -= 2
BLT SimpleRate_S_read
SUBS r2, r2, #1 @ r2 = opos--
ADDGE r0, r0, #4 @ if (r2 >= 0) { sr.inPtr += 2
BGE SimpleRate_S_loop @ and loop }
SimpleRate_S_read_return:
LDRSH r4, [r0],#2 @ r4 = tmp0 = *inPtr++
LDRSH r5, [r0],#2 @ r5 = tmp1 = *inPtr++
LDRSH r6, [r3] @ r6 = obuf[0]
LDRSH r7, [r3,#2] @ r7 = obuf[1]
ADD r2, r2, r8 @ r2 = opos += opos_inc
MUL r4, r12,r4 @ r5 = tmp0*vol_l
MUL r5, r14,r5 @ r6 = tmp1*vol_r
ADDS r6, r4, r6, LSL #16 @ r6 = obuf[0]<<16 + tmp0*vol_l
RSCVS r6, r10,#1<<31 @ Clamp r6
ADDS r7, r5, r7, LSL #16 @ r7 = obuf[1]<<16 + tmp1*vol_r
RSCVS r7, r10,#1<<31 @ Clamp r7
MOV r6, r6, LSR #16 @ Shift back to halfword
MOV r7, r7, LSR #16 @ Shift back to halfword
STRH r6, [r3],#2 @ Store output value
STRH r7, [r3],#2 @ Store output value
SUBS r11,r11,#1 @ osamp--
BGT SimpleRate_S_loop @ and loop
SimpleRate_S_end:
LDR r14,[r13,#8] @ r14 = sr
ADD r13,r13,#12 @ skip over r0-r2 on stack
STMIA r14,{r0,r1,r2} @ store back updated values
LDMFD r13!,{r4-r8,r10-r11,PC}
SimpleRate_S_read:
LDR r0, [r13,#4*2] @ r0 = sr
ADD r0, r0, #16 @ r0 = inPtr = inBuf
STMFD r13!,{r0,r2-r3,r12,r14}
MOV r1, r0 @ r1 = inBuf
LDR r0, [r13,#4*5] @ r0 = AudioStream & input
MOV r2, #512 @ r2 = ARRAYSIZE(inBuf)
@ Calling back into C++ here. WinCE is fairly easy about such things
@ but other OS are more awkward. r9 is preserved for Symbian, and
@ we have 3+8+5 = 16 things on the stack (an even number).
MOV r14,PC
LDR PC,[r13,#4*6] @ inLen = input.readBuffer(inBuf,512)
SUBS r1, r0, #2 @ r1 = inLen-2
LDMFD r13!,{r0,r2-r3,r12,r14}
BLT SimpleRate_S_end
SUBS r2, r2, #1 @ r2 = opos--
ADDGE r0, r0, #4 @ if (r2 >= 0) { sr.inPtr += 2
BGE SimpleRate_S_loop @ and loop }
B SimpleRate_S_read_return
ARM_SimpleRate_R:
@ r0 = AudioStream &input
@ r1 = input.readBuffer
@ r2 = input->sr
@ r3 = obuf
@ <> = osamp
@ <> = vol_l
@ <> = vol_r
MOV r12,r13
STMFD r13!,{r0-r2,r4-r8,r10-r11,r14}
LDMFD r12,{r11,r12,r14} @ r11= osamp
@ r12= vol_l
@ r14= vol_r
LDMIA r2,{r0,r1,r2,r8} @ r0 = inPtr
@ r1 = inLen
@ r2 = opos
@ r8 = opos_inc
CMP r11,#0 @ if (osamp <= 0)
BLE SimpleRate_R_end @ bale
MOV r10,#0
ORR r12,r12,r12,LSL #8 @ r12= vol_l as 16 bits
ORR r14,r14,r14,LSL #8 @ r14= vol_r as 16 bits
SimpleRate_R_loop:
SUBS r1, r1, #2 @ r1 = inLen -= 2
BLT SimpleRate_R_read
SUBS r2, r2, #1 @ r2 = opos--
ADDGE r0, r0, #4 @ if (r2 >= 0) { sr.inPtr += 2
BGE SimpleRate_R_loop @ and loop }
SimpleRate_R_read_return:
LDRSH r5, [r0],#2 @ r5 = tmp0 = *inPtr++
LDRSH r4, [r0],#2 @ r4 = tmp1 = *inPtr++
LDRSH r6, [r3] @ r6 = obuf[0]
LDRSH r7, [r3,#2] @ r7 = obuf[1]
ADD r2, r2, r8 @ r2 = opos += opos_inc
MUL r4, r12,r4 @ r5 = tmp0*vol_l
MUL r5, r14,r5 @ r6 = tmp1*vol_r
ADDS r6, r4, r6, LSL #16 @ r6 = obuf[0]<<16 + tmp0*vol_l
RSCVS r6, r10,#1<<31 @ Clamp r6
ADDS r7, r5, r7, LSL #16 @ r7 = obuf[1]<<16 + tmp1*vol_r
RSCVS r7, r10,#1<<31 @ Clamp r7
MOV r6, r6, LSR #16 @ Shift back to halfword
MOV r7, r7, LSR #16 @ Shift back to halfword
STRH r6, [r3],#2 @ Store output value
STRH r7, [r3],#2 @ Store output value
SUBS r11,r11,#1 @ osamp--
BGT SimpleRate_R_loop @ and loop
SimpleRate_R_end:
LDR r14,[r13,#8] @ r14 = sr
ADD r13,r13,#12 @ Skip over r0-r2 on stack
STMIA r14,{r0,r1,r2} @ Store back updated values
LDMFD r13!,{r4-r8,r10-r11,PC}
SimpleRate_R_read:
LDR r0, [r13,#4*2] @ r0 = sr
ADD r0, r0, #16 @ r0 = inPtr = inBuf
STMFD r13!,{r0,r2-r3,r12,r14}
MOV r1, r0 @ r1 = inBuf
LDR r0, [r13,#4*5] @ r0 = AudioStream & input
MOV r2, #512 @ r2 = ARRAYSIZE(inBuf)
@ Calling back into C++ here. WinCE is fairly easy about such things
@ but other OS are more awkward. r9 is preserved for Symbian, and
@ we have 3+8+5 = 16 things on the stack (an even number).
MOV r14,PC
LDR PC,[r13,#4*6] @ inLen = input.readBuffer(inBuf,512)
SUBS r1, r0, #2 @ r1 = inLen-2
LDMFD r13!,{r0,r2-r3,r12,r14}
BLT SimpleRate_R_end
SUBS r2, r2, #1 @ r2 = opos--
ADDGE r0, r0, #4 @ if (r2 >= 0) { sr.inPtr += 2
BGE SimpleRate_R_loop @ and loop }
B SimpleRate_R_read_return
ARM_LinearRate_M:
@ r0 = AudioStream &input
@ r1 = input.readBuffer
@ r2 = input->sr
@ r3 = obuf
@ <> = osamp
@ <> = vol_l
@ <> = vol_r
MOV r12,r13
STMFD r13!,{r0-r1,r4-r11,r14}
LDMFD r12,{r11,r12,r14} @ r11= osamp
@ r12= vol_l
@ r14= vol_r
LDMIA r2,{r0,r1,r8} @ r0 = inPtr
@ r1 = inLen
@ r8 = opos
CMP r11,#0 @ if (osamp <= 0)
BLE LinearRate_M_end @ bale
ORR r12,r12,r12,LSL #8 @ r12= vol_l as 16 bits
ORR r14,r14,r14,LSL #8 @ r14= vol_r as 16 bits
CMP r1,#0
BGT LinearRate_M_part2
@ part1 - read input samples
LinearRate_M_loop:
SUBS r1, r1, #1 @ r1 = inLen -= 1
BLT LinearRate_M_read
LinearRate_M_read_return:
LDR r10,[r2, #16] @ r10= icur[0,1]
LDRSH r5, [r0],#2 @ r5 = tmp1 = *inPtr++
SUBS r8, r8, #1 @ r8 = opos--
STR r10,[r2,#20] @ ilast[0,1] = icur[0,1]
STRH r5, [r2,#16] @ icur[0] = tmp1
BGE LinearRate_M_loop
@ part2 - form output samples
LinearRate_M_part2:
@ We are guaranteed that opos < 0 here
LDRSH r6, [r2,#20] @ r6 = ilast[0]
LDRSH r5, [r2,#16] @ r5 = icur[0]
LDRH r4, [r2,#24] @ r4 = opos_frac
LDR r10,[r2,#28] @ r10= opos_frac_inc
MOV r6, r6, LSL #16 @ r6 = ilast[0]<<16
SUB r5, r5, r6, ASR #16 @ r5 = icur[0] - ilast[0]
ADD r6, r6, #1<<15 @ r6 = ilast[0]+1<<(FRAC_BITS-1)
MLA r6, r4, r5, r6 @ r6 = (icur[0]-ilast[0])*opos_frac+ilast[0]
ADD r4, r4, r10 @ r4 = tmp = opos_frac+opos_inc_frac
STRH r4,[r2,#24] @ opos_frac &= 65535
ADD r8, r8, r4, LSR #16 @ opos += (tmp>>FRAC_BITS)
LDRSH r4, [r3] @ r4 = obuf[0]
LDRSH r5, [r3,#2] @ r5 = obuf[1]
MOV r6, r6, ASR #16 @ r6 = tmp0 = tmp1 >>= 16
MUL r7, r12,r6 @ r7 = tmp0*vol_l
MUL r6, r14,r6 @ r6 = tmp1*vol_r
ADDS r7, r7, r4, LSL #16 @ r7 = obuf[0]<<16 + tmp0*vol_l
MOV r4, #0
RSCVS r7, r4, #1<<31 @ Clamp r7
ADDS r6, r6, r5, LSL #16 @ r6 = obuf[1]<<16 + tmp1*vol_r
RSCVS r6, r4, #1<<31 @ Clamp r6
MOV r7, r7, LSR #16 @ Shift back to halfword
MOV r6, r6, LSR #16 @ Shift back to halfword
LDR r5, [r2,#12] @ r5 = opos_inc
STRH r7, [r3],#2 @ Store output value
STRH r6, [r3],#2 @ Store output value
SUBS r11, r11,#1 @ opos--
BLE LinearRate_M_end @ end if needed
ADDS r8, r8, r5 @ r8 = opos += opos_inc
BLT LinearRate_M_part2
B LinearRate_M_loop
LinearRate_M_end:
ADD r13,r13,#8
STMIA r2,{r0,r1,r8}
LDMFD r13!,{r4-r11,PC}
LinearRate_M_read:
ADD r0, r2, #32 @ r0 = inPtr = inBuf
STMFD r13!,{r0,r2-r3,r12,r14}
MOV r1, r0 @ r1 = inBuf
LDR r0, [r13,#4*5] @ r0 = AudioStream & input
MOV r2, #512 @ r2 = ARRAYSIZE(inBuf)
@ Calling back into C++ here. WinCE is fairly easy about such things
@ but other OS are more awkward. r9 is preserved for Symbian, and
@ we have 2+9+5 = 16 things on the stack (an even number).
MOV r14,PC
LDR PC,[r13,#4*6] @ inLen = input.readBuffer(inBuf,512)
SUBS r1, r0, #1 @ r1 = inLen-1
LDMFD r13!,{r0,r2-r3,r12,r14}
BLT LinearRate_M_end
B LinearRate_M_read_return
ARM_LinearRate_S:
@ r0 = AudioStream &input
@ r1 = input.readBuffer
@ r2 = input->sr
@ r3 = obuf
@ <> = osamp
@ <> = vol_l
@ <> = vol_r
MOV r12,r13
STMFD r13!,{r0-r1,r4-r11,r14}
LDMFD r12,{r11,r12,r14} @ r11= osamp
@ r12= vol_l
@ r14= vol_r
LDMIA r2,{r0,r1,r8} @ r0 = inPtr
@ r1 = inLen
@ r8 = opos
CMP r11,#0 @ if (osamp <= 0)
BLE LinearRate_S_end @ bale
ORR r12,r12,r12,LSL #8 @ r12= vol_l as 16 bits
ORR r14,r14,r14,LSL #8 @ r14= vol_r as 16 bits
CMP r1,#0
BGT LinearRate_S_part2
@ part1 - read input samples
LinearRate_S_loop:
SUBS r1, r1, #2 @ r1 = inLen -= 2
BLT LinearRate_S_read
LinearRate_S_read_return:
LDR r10,[r2, #16] @ r10= icur[0,1]
LDRSH r5, [r0],#2 @ r5 = tmp0 = *inPtr++
LDRSH r6, [r0],#2 @ r5 = tmp1 = *inPtr++
SUBS r8, r8, #1 @ r8 = opos--
STR r10,[r2,#20] @ ilast[0,1] = icur[0,1]
STRH r5, [r2,#16] @ icur[0] = tmp0
STRH r6, [r2,#16] @ icur[1] = tmp1
BGE LinearRate_S_loop
@ part2 - form output samples
LinearRate_S_part2:
@ We are guaranteed that opos < 0 here
LDRSH r6, [r2,#20] @ r6 = ilast[0]
LDRSH r5, [r2,#16] @ r5 = icur[0]
LDRH r4, [r2,#24] @ r4 = opos_frac
MOV r6, r6, LSL #16 @ r6 = ilast[0]<<16
SUB r5, r5, r6, ASR #16 @ r5 = icur[0] - ilast[0]
ADD r6, r6, #1<<15 @ r6 = ilast[0]+1<<(FRAC_BITS-1)
MLA r6, r4, r5, r6 @ r6 = (icur[0]-ilast[0])*opos_frac+ilast[0]
LDRSH r7, [r2,#22] @ r6 = ilast[1]
LDRSH r5, [r2,#18] @ r5 = icur[1]
LDR r10,[r2,#28] @ r10= opos_frac_inc
MOV r7, r7, LSL #16 @ r7 = ilast[1]<<16
SUB r5, r5, r7, ASR #16 @ r5 = icur[1] - ilast[1]
ADD r7, r7, #1<<15 @ r6 = ilast[1]+1<<(FRAC_BITS-1)
MLA r7, r4, r5, r7 @ r6 = (icur[1]-ilast[1])*opos_frac+ilast[1]
ADD r4, r4, r10 @ r4 = tmp = opos_frac+opos_inc_frac
STRH r4,[r2,#24] @ opos_frac &= 65535
ADD r8, r8, r4, LSR #16 @ opos += (tmp>>FRAC_BITS)
LDRSH r4, [r3] @ r4 = obuf[0]
LDRSH r5, [r3,#2] @ r5 = obuf[1]
MOV r7, r7, ASR #16 @ r7 = tmp0 >>= 16
MOV r6, r6, ASR #16 @ r6 = tmp1 >>= 16
MUL r7, r12,r7 @ r7 = tmp0*vol_l
MUL r6, r14,r6 @ r6 = tmp1*vol_r
ADDS r7, r7, r4, LSL #16 @ r7 = obuf[0]<<16 + tmp0*vol_l
MOV r4, #0
RSCVS r7, r4, #1<<31 @ Clamp r7
ADDS r6, r6, r5, LSL #16 @ r6 = obuf[1]<<16 + tmp1*vol_r
RSCVS r6, r4, #1<<31 @ Clamp r6
MOV r7, r7, LSR #16 @ Shift back to halfword
MOV r6, r6, LSR #16 @ Shift back to halfword
LDR r5, [r2,#12] @ r5 = opos_inc
STRH r7, [r3],#2 @ Store output value
STRH r6, [r3],#2 @ Store output value
SUBS r11, r11,#1 @ opos--
BLE LinearRate_S_end @ and loop
ADDS r8, r8, r5 @ r8 = opos += opos_inc
BLT LinearRate_S_part2
B LinearRate_S_loop
LinearRate_S_end:
ADD r13,r13,#8
STMIA r2,{r0,r1,r8}
LDMFD r13!,{r4-r11,PC}
LinearRate_S_read:
ADD r0, r2, #32 @ r0 = inPtr = inBuf
STMFD r13!,{r0,r2-r3,r12,r14}
MOV r1, r0 @ r1 = inBuf
LDR r0, [r13,#4*5] @ r0 = AudioStream & input
MOV r2, #512 @ r2 = ARRAYSIZE(inBuf)
@ Calling back into C++ here. WinCE is fairly easy about such things
@ but other OS are more awkward. r9 is preserved for Symbian, and
@ we have 2+9+5 = 16 things on the stack (an even number).
MOV r14,PC
LDR PC,[r13,#4*6] @ inLen = input.readBuffer(inBuf,512)
SUBS r1, r0, #2 @ r1 = inLen-2
LDMFD r13!,{r0,r2-r3,r12,r14}
BLT LinearRate_S_end
B LinearRate_S_read_return
ARM_LinearRate_R:
@ r0 = AudioStream &input
@ r1 = input.readBuffer
@ r2 = input->sr
@ r3 = obuf
@ <> = osamp
@ <> = vol_l
@ <> = vol_r
MOV r12,r13
STMFD r13!,{r0-r1,r4-r11,r14}
LDMFD r12,{r11,r12,r14} @ r11= osamp
@ r12= vol_l
@ r14= vol_r
LDMIA r2,{r0,r1,r8} @ r0 = inPtr
@ r1 = inLen
@ r8 = opos
CMP r11,#0 @ if (osamp <= 0)
BLE LinearRate_R_end @ bale
ORR r12,r12,r12,LSL #8 @ r12= vol_l as 16 bits
ORR r14,r14,r14,LSL #8 @ r14= vol_r as 16 bits
CMP r1,#0
BGT LinearRate_R_part2
@ part1 - read input samples
LinearRate_R_loop:
SUBS r1, r1, #2 @ r1 = inLen -= 2
BLT LinearRate_R_read
LinearRate_R_read_return:
LDR r10,[r2, #16] @ r10= icur[0,1]
LDRSH r5, [r0],#2 @ r5 = tmp0 = *inPtr++
LDRSH r6, [r0],#2 @ r5 = tmp1 = *inPtr++
SUBS r8, r8, #1 @ r8 = opos--
STR r10,[r2,#20] @ ilast[0,1] = icur[0,1]
STRH r5, [r2,#16] @ icur[0] = tmp0
STRH r6, [r2,#16] @ icur[1] = tmp1
BGE LinearRate_R_loop
@ part2 - form output samples
LinearRate_R_part2:
@ We are guaranteed that opos < 0 here
LDRSH r6, [r2,#20] @ r6 = ilast[0]
LDRSH r5, [r2,#16] @ r5 = icur[0]
LDRH r4, [r2,#24] @ r4 = opos_frac
MOV r6, r6, LSL #16 @ r6 = ilast[0]<<16
SUB r5, r5, r6, ASR #16 @ r5 = icur[0] - ilast[0]
ADD r6, r6, #1<<15 @ r6 = ilast[0]+1<<(FRAC_BITS-1)
MLA r6, r4, r5, r6 @ r6 = (icur[0]-ilast[0])*opos_frac+ilast[0]
LDRSH r7, [r2,#22] @ r6 = ilast[1]
LDRSH r5, [r2,#18] @ r5 = icur[1]
LDR r10,[r2,#28] @ r10= opos_frac_inc
MOV r7, r7, LSL #16 @ r7 = ilast[1]<<16
SUB r5, r5, r7, ASR #16 @ r5 = icur[1] - ilast[1]
ADD r7, r7, #1<<15 @ r6 = ilast[1]+1<<(FRAC_BITS-1)
MLA r7, r4, r5, r7 @ r6 = (icur[1]-ilast[1])*opos_frac+ilast[1]
ADD r4, r4, r10 @ r4 = tmp = opos_frac+opos_inc_frac
STRH r4,[r2,#24] @ opos_frac &= 65535
ADD r8, r8, r4, LSR #16 @ opos += (tmp>>FRAC_BITS)
LDRSH r4, [r3] @ r4 = obuf[0]
LDRSH r5, [r3,#2] @ r5 = obuf[1]
MOV r7, r7, ASR #16 @ r7 = tmp0 >>= 16
MOV r6, r6, ASR #16 @ r6 = tmp1 >>= 16
MUL r7, r12,r7 @ r7 = tmp0*vol_l
MUL r6, r14,r6 @ r6 = tmp1*vol_r
ADDS r7, r7, r4, LSL #16 @ r7 = obuf[0]<<16 + tmp0*vol_l
MOV r4, #0
RSCVS r7, r4, #1<<31 @ Clamp r7
ADDS r6, r6, r5, LSL #16 @ r6 = obuf[1]<<16 + tmp1*vol_r
RSCVS r6, r4, #1<<31 @ Clamp r6
MOV r7, r7, LSR #16 @ Shift back to halfword
MOV r6, r6, LSR #16 @ Shift back to halfword
LDR r5, [r2,#12] @ r5 = opos_inc
STRH r6, [r3],#2 @ Store output value
STRH r7, [r3],#2 @ Store output value
SUBS r11, r11,#1 @ opos--
BLE LinearRate_R_end @ and loop
ADDS r8, r8, r5 @ r8 = opos += opos_inc
BLT LinearRate_R_part2
B LinearRate_R_loop
LinearRate_R_end:
ADD r13,r13,#8
STMIA r2,{r0,r1,r8}
LDMFD r13!,{r4-r11,PC}
LinearRate_R_read:
ADD r0, r2, #32 @ r0 = inPtr = inBuf
STMFD r13!,{r0,r2-r3,r12,r14}
MOV r1, r0 @ r1 = inBuf
LDR r0, [r13,#4*5] @ r0 = AudioStream & input
MOV r2, #512 @ r2 = ARRAYSIZE(inBuf)
@ Calling back into C++ here. WinCE is fairly easy about such things
@ but other OS are more awkward. r9 is preserved for Symbian, and
@ we have 2+9+5 = 16 things on the stack (an even number).
MOV r14,PC
LDR PC,[r13,#4*6] @ inLen = input.readBuffer(inBuf,512)
SUBS r1, r0, #2 @ r1 = inLen-2
LDMFD r13!,{r0,r2-r3,r12,r14}
BLT LinearRate_R_end
B LinearRate_R_read_return