Patch #1721826: ARM asm versions of sound rate conversion/mixing code

svn-id: r27467
2024-12-02 23:26:44 +00:00 · 2007-06-16 16:46:13 +00:00 · 2007-06-16 16:46:13 +00:00 · 6498d669d0
commit 6498d669d0
parent c7c2cb498c
4 changed files with 1128 additions and 1 deletions
--- a/backends/platform/wince/Makefile
+++ b/backends/platform/wince/Makefile
@ -25,6 +25,8 @@ DISABLE_CRUISE = 1

 #DISABLE_HQ_SCALERS = 1

+USE_ARM_SOUND_ASM = 1
+
 CXX    = arm-wince-pe-g++
 LD     = arm-wince-pe-g++
 AR     = arm-wince-pe-ar cru
--- a/sound/module.mk
+++ b/sound/module.mk
@ -16,7 +16,6 @@ MODULE_OBJS := \
 	mp3.o \
 	mpu401.o \
 	null.o \
-	rate.o \
 	voc.o \
 	vorbis.o \
 	wave.o \
@ -31,5 +30,14 @@ MODULE_OBJS := \
 	softsynth/fluidsynth.o \
 	softsynth/mt32.o \

+ifndef USE_ARM_SOUND_ASM
+MODULE_OBJS += \
+	rate.o
+else
+MODULE_OBJS += \
+	rate_arm.o \
+	rate_arm_asm.o
+endif
+
 # Include common rules
 include $(srcdir)/rules.mk
--- a/sound/rate_arm.cpp
+++ b/sound/rate_arm.cpp
@ -0,0 +1,428 @@
+/* ScummVM - Scumm Interpreter
+ * Copyright (C) 2001-2006 The ScummVM project
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * $URL$
+ * $Id$
+ *
+ */
+
+/*
+ * The code in this file, together with the rate_arm_asm.s file offers
+ * an ARM optimised version of the code in rate.cpp. The operation of this
+ * code should be identical to that of rate.cpp, but faster. The heavy
+ * lifting is done in the assembler file.
+ *
+ * To be as portable as possible we implement the core routines with C
+ * linkage in assembly, and implement the C++ routines that call into
+ * the C here. The C++ symbol mangling varies wildly between compilers,
+ * so this is the simplest way to ensure that the C/C++ combination should
+ * work on as many ARM based platforms as possible.
+ *
+ * Essentially the algorithm herein is the same as that in rate.cpp, so
+ * anyone seeking to understand this should attempt to understand that
+ * first. That code was based in turn on code with Copyright 1998 Fabrice
+ * Bellard - part of SoX (http://sox.sourceforge.net).
+ * Max Horn adapted that code to the needs of ScummVM and partially rewrote
+ * it, in the process removing any use of floating point arithmetic. Various
+ * other improvments over the original code were made.
+ */
+
+#include "common/stdafx.h"
+#include "sound/audiostream.h"
+#include "sound/rate.h"
+#include "sound/mixer.h"
+#include "common/util.h"
+
+namespace Audio {
+
+/**
+ * The precision of the fractional computations used by the rate converter.
+ * Normally you should never have to modify this value.
+ */
+#define FRAC_BITS 16
+
+/**
+ * The size of the intermediate input cache. Bigger values may increase
+ * performance, but only until some point (depends largely on cache size,
+ * target processor and various other factors), at which it will decrease
+ * again.
+ */
+#define INTERMEDIATE_BUFFER_SIZE 512
+
+
+/**
+ * Audio rate converter based on simple resampling. Used when no
+ * interpolation is required.
+ *
+ * Limited to sampling frequency <= 65535 Hz.
+ */
+typedef struct {
+	const st_sample_t *inPtr;
+	int inLen;
+
+	/** position of how far output is ahead of input */
+	/** Holds what would have been opos-ipos */
+	long opos;
+
+	/** fractional position increment in the output stream */
+	long opos_inc;
+
+	st_sample_t inBuf[INTERMEDIATE_BUFFER_SIZE];
+} SimpleRateDetails;
+
+template<bool stereo, bool reverseStereo>
+class SimpleRateConverter : public RateConverter {
+protected:
+	SimpleRateDetails  sr;
+public:
+	SimpleRateConverter(st_rate_t inrate, st_rate_t outrate);
+	int flow(AudioStream &input, st_sample_t *obuf, st_size_t osamp, st_volume_t vol_l, st_volume_t vol_r);
+	int drain(st_sample_t *obuf, st_size_t osamp, st_volume_t vol) {
+		return (ST_SUCCESS);
+	}
+};
+
+
+/*
+ * Prepare processing.
+ */
+template<bool stereo, bool reverseStereo>
+SimpleRateConverter<stereo, reverseStereo>::SimpleRateConverter(st_rate_t inrate, st_rate_t outrate) {
+	if (inrate == outrate) {
+		error("Input and Output rates must be different to use rate effect");
+	}
+
+	if ((inrate % outrate) != 0) {
+		error("Input rate must be a multiple of Output rate to use rate effect");
+	}
+
+	if (inrate >= 65536 || outrate >= 65536) {
+		error("rate effect can only handle rates < 65536");
+	}
+
+	sr.opos = 1;
+
+	/* increment */
+	sr.opos_inc = inrate / outrate;
+
+	sr.inLen = 0;
+}
+
+extern "C" void ARM_SimpleRate_M(AudioStream       &input,
+                                 int (*fn)(Audio::AudioStream&,int16*,int),
+                                 SimpleRateDetails *sr,
+                                 st_sample_t       *obuf,
+                                 st_size_t          osamp,
+                                 st_volume_t        vol_l,
+                                 st_volume_t        vol_r);
+
+extern "C" void ARM_SimpleRate_S(AudioStream       &input,
+                                 int (*fn)(Audio::AudioStream&,int16*,int),
+                                 SimpleRateDetails *sr,
+                                 st_sample_t       *obuf,
+                                 st_size_t          osamp,
+                                 st_volume_t        vol_l,
+                                 st_volume_t        vol_r);
+
+extern "C" void ARM_SimpleRate_R(AudioStream       &input,
+                                 int (*fn)(Audio::AudioStream&,int16*,int),
+                                 SimpleRateDetails *sr,
+                                 st_sample_t       *obuf,
+                                 st_size_t          osamp,
+                                 st_volume_t        vol_l,
+                                 st_volume_t        vol_r);
+
+extern "C" int SimpleRate_readFudge(Audio::AudioStream &input,
+                                    int16 *a, int b)
+{
+  return input.readBuffer(a, b);
+}
+
+template<bool stereo, bool reverseStereo>
+int SimpleRateConverter<stereo, reverseStereo>::flow(AudioStream &input, st_sample_t *obuf, st_size_t osamp, st_volume_t vol_l, st_volume_t vol_r) {
+
+#ifdef DEBUG_RATECONV
+fprintf(stderr, "Simple st=%d rev=%d\n", stereo, reverseStereo);
+fflush(stderr);
+#endif
+	if (!stereo) {
+		ARM_SimpleRate_M(input,
+		                 &SimpleRate_readFudge,
+		                 &sr,
+				 obuf, osamp, vol_l, vol_r);
+	} else if (reverseStereo) {
+		ARM_SimpleRate_R(input,
+		                 &SimpleRate_readFudge,
+		                 &sr,
+				 obuf, osamp, vol_l, vol_r);
+	} else {
+		ARM_SimpleRate_S(input,
+		                 &SimpleRate_readFudge,
+		                 &sr,
+				 obuf, osamp, vol_l, vol_r);
+	}
+	return (ST_SUCCESS);
+}
+
+/**
+ * Audio rate converter based on simple linear Interpolation.
+ *
+ * The use of fractional increment allows us to use no buffer. It
+ * avoid the problems at the end of the buffer we had with the old
+ * method which stored a possibly big buffer of size
+ * lcm(in_rate,out_rate).
+ *
+ * Limited to sampling frequency <= 65535 Hz.
+ */
+
+typedef struct {
+	const st_sample_t *inPtr;
+	int inLen;
+
+	/** position of how far output is ahead of input */
+	/** Holds what would have been opos-ipos */
+	long opos;
+
+	/** integer position increment in the output stream */
+	long opos_inc;
+
+	/** current sample(s) in the input stream (left/right channel) */
+	st_sample_t icur[2];
+	/** last sample(s) in the input stream (left/right channel) */
+	st_sample_t ilast[2];
+
+	/** fractional position in the output stream */
+	long opos_frac;
+
+	/** fractional position increment in the output stream */
+	long opos_inc_frac;
+
+	st_sample_t inBuf[INTERMEDIATE_BUFFER_SIZE];
+} LinearRateDetails;
+
+extern "C" void ARM_LinearRate_M(AudioStream       &input,
+                                 int (*fn)(Audio::AudioStream&,int16*,int),
+                                 LinearRateDetails *lr,
+                                 st_sample_t       *obuf,
+                                 st_size_t          osamp,
+                                 st_volume_t        vol_l,
+                                 st_volume_t        vol_r);
+
+extern "C" void ARM_LinearRate_S(AudioStream       &input,
+                                 int (*fn)(Audio::AudioStream&,int16*,int),
+                                 LinearRateDetails *lr,
+                                 st_sample_t       *obuf,
+                                 st_size_t          osamp,
+                                 st_volume_t        vol_l,
+                                 st_volume_t        vol_r);
+
+extern "C" void ARM_LinearRate_R(AudioStream       &input,
+                                 int (*fn)(Audio::AudioStream&,int16*,int),
+                                 LinearRateDetails *lr,
+                                 st_sample_t       *obuf,
+                                 st_size_t          osamp,
+                                 st_volume_t        vol_l,
+                                 st_volume_t        vol_r);
+
+template<bool stereo, bool reverseStereo>
+class LinearRateConverter : public RateConverter {
+protected:
+	LinearRateDetails lr;
+
+public:
+	LinearRateConverter(st_rate_t inrate, st_rate_t outrate);
+	int flow(AudioStream &input, st_sample_t *obuf, st_size_t osamp, st_volume_t vol_l, st_volume_t vol_r);
+	int drain(st_sample_t *obuf, st_size_t osamp, st_volume_t vol) {
+		return (ST_SUCCESS);
+	}
+};
+
+
+/*
+ * Prepare processing.
+ */
+template<bool stereo, bool reverseStereo>
+LinearRateConverter<stereo, reverseStereo>::LinearRateConverter(st_rate_t inrate, st_rate_t outrate) {
+	unsigned long incr;
+
+	if (inrate == outrate) {
+		error("Input and Output rates must be different to use rate effect");
+	}
+
+	if (inrate >= 65536 || outrate >= 65536) {
+		error("rate effect can only handle rates < 65536");
+	}
+
+	lr.opos_frac = 0;
+	lr.opos = 1;
+
+	/* increment */
+	incr = (inrate << FRAC_BITS) / outrate;
+
+	lr.opos_inc_frac = incr & ((1UL << FRAC_BITS) - 1);
+	lr.opos_inc = incr >> FRAC_BITS;
+
+	lr.ilast[0] = lr.ilast[1] = 0;
+	lr.icur[0] = lr.icur[1] = 0;
+
+	lr.inLen = 0;
+}
+
+/*
+ * Processed signed long samples from ibuf to obuf.
+ * Return number of samples processed.
+ */
+template<bool stereo, bool reverseStereo>
+int LinearRateConverter<stereo, reverseStereo>::flow(AudioStream &input, st_sample_t *obuf, st_size_t osamp, st_volume_t vol_l, st_volume_t vol_r) {
+
+#ifdef DEBUG_RATECONV
+fprintf(stderr, "Linear st=%d rev=%d\n", stereo, reverseStereo);
+fflush(stderr);
+#endif
+	if (!stereo) {
+		ARM_LinearRate_M(input,
+		                 &SimpleRate_readFudge,
+		                 &lr,
+				 obuf, osamp, vol_l, vol_r);
+	} else if (reverseStereo) {
+		ARM_LinearRate_R(input,
+		                 &SimpleRate_readFudge,
+		                 &lr,
+				 obuf, osamp, vol_l, vol_r);
+	} else {
+		ARM_LinearRate_S(input,
+		                 &SimpleRate_readFudge,
+		                 &lr,
+				 obuf, osamp, vol_l, vol_r);
+	}
+	return (ST_SUCCESS);
+}
+
+
+#pragma mark -
+
+
+/**
+ * Simple audio rate converter for the case that the inrate equals the outrate.
+ */
+extern "C" void ARM_CopyRate_M(st_size_t    len,
+                               st_sample_t *obuf,
+                               st_volume_t  vol_l,
+                               st_volume_t  vol_r,
+                               st_sample_t *_buffer);
+
+extern "C" void ARM_CopyRate_S(st_size_t    len,
+                               st_sample_t *obuf,
+                               st_volume_t  vol_l,
+                               st_volume_t  vol_r,
+                               st_sample_t *_buffer);
+
+extern "C" void ARM_CopyRate_R(st_size_t    len,
+                               st_sample_t *obuf,
+                               st_volume_t  vol_l,
+                               st_volume_t  vol_r,
+                               st_sample_t *_buffer);
+
+
+template<bool stereo, bool reverseStereo>
+class CopyRateConverter : public RateConverter {
+	st_sample_t *_buffer;
+	st_size_t _bufferSize;
+public:
+	CopyRateConverter() : _buffer(0), _bufferSize(0) {}
+	~CopyRateConverter() {
+		free(_buffer);
+	}
+
+	virtual int flow(AudioStream &input, st_sample_t *obuf, st_size_t osamp, st_volume_t vol_l, st_volume_t vol_r) {
+		assert(input.isStereo() == stereo);
+
+#ifdef DEBUG_RATECONV
+fprintf(stderr, "Copy st=%d rev=%d\n", stereo, reverseStereo);
+fflush(stderr);
+#endif
+		st_sample_t *ptr;
+		st_size_t len;
+
+		if (stereo)
+			osamp *= 2;
+
+		// Reallocate temp buffer, if necessary
+		if (osamp > _bufferSize) {
+			free(_buffer);
+			_buffer = (st_sample_t *)malloc(osamp * 2);
+			_bufferSize = osamp;
+		}
+
+		// Read up to 'osamp' samples into our temporary buffer
+		len = input.readBuffer(_buffer, osamp);
+		if (len <= 0)
+			return (ST_SUCCESS);
+
+		// Mix the data into the output buffer
+		if (stereo && reverseStereo)
+			ARM_CopyRate_R(len, obuf, vol_l, vol_r, _buffer);
+		else if (stereo)
+			ARM_CopyRate_S(len, obuf, vol_l, vol_r, _buffer);
+		else
+			ARM_CopyRate_M(len, obuf, vol_l, vol_r, _buffer);
+
+		return (ST_SUCCESS);
+	}
+	virtual int drain(st_sample_t *obuf, st_size_t osamp, st_volume_t vol) {
+		return (ST_SUCCESS);
+	}
+};
+
+
+#pragma mark -
+
+
+/**
+ * Create and return a RateConverter object for the specified input and output rates.
+ */
+RateConverter *makeRateConverter(st_rate_t inrate, st_rate_t outrate, bool stereo, bool reverseStereo) {
+	if (inrate != outrate) {
+		if ((inrate % outrate) == 0) {
+			if (stereo) {
+				if (reverseStereo)
+					return new SimpleRateConverter<true, true>(inrate, outrate);
+				else
+					return new SimpleRateConverter<true, false>(inrate, outrate);
+			} else
+				return new SimpleRateConverter<false, false>(inrate, outrate);
+		} else {
+			if (stereo) {
+				if (reverseStereo)
+					return new LinearRateConverter<true, true>(inrate, outrate);
+				else
+					return new LinearRateConverter<true, false>(inrate, outrate);
+			} else
+				return new LinearRateConverter<false, false>(inrate, outrate);
+		 }
+	} else {
+		if (stereo) {
+			if (reverseStereo)
+				return new CopyRateConverter<true, true>();
+			else
+				return new CopyRateConverter<true, false>();
+		} else
+			return new CopyRateConverter<false, false>();
+	}
+}
+
+} // End of namespace Audio
--- a/sound/rate_arm_asm.s
+++ b/sound/rate_arm_asm.s
@ -0,0 +1,689 @@
+@ ScummVM Scumm Interpreter
+@ Copyright (C) 2007 The ScummVM project
+@
+@ This program is free software@ you can redistribute it and/or
+@ modify it under the terms of the GNU General Public License
+@ as published by the Free Software Foundation@ either version 2
+@ of the License, or (at your option) any later version.
+@
+@ This program is distributed in the hope that it will be useful,
+@ but WITHOUT ANY WARRANTY@ without even the implied warranty of
+@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+@ GNU General Public License for more details.
+@
+@ You should have received a copy of the GNU General Public License
+@ along with this program@ if not, write to the Free Software
+@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+@
+@ $URL: $
+@ $Id:  $
+@
+@ @author Robin Watts (robin@wss.co.uk)
+@
+@ This file, together with rate_arm.cpp, provides an ARM optimised version
+@ of rate.cpp. The algorithm is essentially the same as that within rate.cpp
+@ so to understand this file you should understand rate.cpp first.
+
+        .text
+
+        .global ARM_CopyRate_M
+        .global ARM_CopyRate_S
+        .global ARM_CopyRate_R
+        .global ARM_SimpleRate_M
+        .global ARM_SimpleRate_S
+        .global ARM_SimpleRate_R
+        .global ARM_LinearRate_M
+        .global ARM_LinearRate_S
+        .global ARM_LinearRate_R
+
+ARM_CopyRate_M:
+        @ r0 = len
+        @ r1 = obuf
+        @ r2 = vol_l
+        @ r3 = vol_r
+        @ <> = ptr
+        LDR     r12,[r13]
+        STMFD   r13!,{r4-r7,r14}
+
+        MOV     r14,#0                  @ r14= 0
+        ORR     r2, r2, r2, LSL #8      @ r2 = vol_l as 16 bits
+        ORR     r3, r3, r3, LSL #8      @ r3 = vol_r as 16 bits
+CopyRate_M_loop:
+        LDRSH   r5, [r12], #2           @ r5 = tmp0 = tmp1 = *ptr++
+        LDRSH   r6, [r1]                @ r6 = obuf[0]
+        LDRSH   r7, [r1, #2]            @ r7 = obuf[1]
+        MUL     r4, r2, r5              @ r4 = tmp0*vol_l
+        MUL     r5, r3, r5              @ r5 = tmp1*vol_r
+
+        ADDS    r6, r4, r6, LSL #16     @ r6 = obuf[0]<<16 + tmp0*vol_l
+        RSCVS   r6, r14,#1<<31          @ Clamp r6
+        ADDS    r7, r5, r7, LSL #16     @ r7 = obuf[1]<<16 + tmp1*vol_r
+        RSCVS   r7, r14,#1<<31          @ Clamp r7
+
+        MOV     r6, r6, LSR #16         @ Shift back to halfword
+        MOV     r7, r7, LSR #16         @ Shift back to halfword
+
+        STRH    r6, [r1], #2            @ Store output value
+        STRH    r7, [r1], #2            @ Store output value
+
+        SUBS    r0,r0,#1                @ len--
+        BGT     CopyRate_M_loop         @ and loop
+
+        LDMFD   r13!,{r4-r7,PC}
+
+ARM_CopyRate_S:
+        @ r0 = len
+        @ r1 = obuf
+        @ r2 = vol_l
+        @ r3 = vol_r
+        @ <> = ptr
+        LDR     r12,[r13]
+        STMFD   r13!,{r4-r7,r14}
+
+        MOV     r14,#0                  @ r14= 0
+        ORR     r2, r2, r2, LSL #8      @ r2 = vol_l as 16 bits
+        ORR     r3, r3, r3, LSL #8      @ r3 = vol_r as 16 bits
+CopyRate_S_loop:
+        LDRSH   r4, [r12],#2            @ r4 = tmp0 = *ptr++
+        LDRSH   r5, [r12],#2            @ r5 = tmp1 = *ptr++
+        LDRSH   r6, [r1]                @ r6 = obuf[0]
+        LDRSH   r7, [r1,#2]             @ r7 = obuf[1]
+        MUL     r4, r2, r4              @ r5 = tmp0*vol_l
+        MUL     r5, r3, r5              @ r6 = tmp1*vol_r
+
+        ADDS    r6, r4, r6, LSL #16     @ r6 = obuf[0]<<16 + tmp0*vol_l
+        RSCVS   r6, r14,#1<<31          @ Clamp r6
+        ADDS    r7, r5, r7, LSL #16     @ r7 = obuf[1]<<16 + tmp1*vol_r
+        RSCVS   r7, r14,#1<<31          @ Clamp r7
+
+        MOV     r6, r6, LSR #16         @ Shift back to halfword
+        MOV     r7, r7, LSR #16         @ Shift back to halfword
+
+        STRH    r6, [r1],#2             @ Store output value
+        STRH    r7, [r1],#2             @ Store output value
+
+        SUBS    r0,r0,#2                @ len -= 2
+        BGT     CopyRate_S_loop         @ and loop
+
+        LDMFD   r13!,{r4-r7,PC}
+
+ARM_CopyRate_R:
+        @ r0 = len
+        @ r1 = obuf
+        @ r2 = vol_l
+        @ r3 = vol_r
+        @ <> = ptr
+        LDR     r12,[r13]
+        STMFD   r13!,{r4-r7,r14}
+
+        MOV     r14,#0                  @ r14= 0
+        ORR     r2, r2, r2, LSL #8      @ r2 = vol_l as 16 bits
+        ORR     r3, r3, r3, LSL #8      @ r3 = vol_r as 16 bits
+CopyRate_R_loop:
+        LDRSH   r5, [r12],#2            @ r5 = tmp1 = *ptr++
+        LDRSH   r4, [r12],#2            @ r4 = tmp0 = *ptr++
+        LDRSH   r6, [r1]                @ r6 = obuf[0]
+        LDRSH   r7, [r1,#2]             @ r7 = obuf[1]
+        MUL     r4, r2, r4              @ r4 = tmp0*vol_l
+        MUL     r5, r3, r5              @ r5 = tmp1*vol_r
+
+        ADDS    r6, r4, r6, LSL #16     @ r6 = obuf[0]<<16 + tmp0*vol_l
+        RSCVS   r6, r14,#1<<31          @ Clamp r6
+        ADDS    r7, r5, r7, LSL #16     @ r7 = obuf[1]<<16 + tmp1*vol_r
+        RSCVS   r7, r14,#1<<31          @ Clamp r7
+
+        MOV     r6, r6, LSR #16         @ Shift back to halfword
+        MOV     r7, r7, LSR #16         @ Shift back to halfword
+
+        STRH    r6, [r1],#2             @ Store output value
+        STRH    r7, [r1],#2             @ Store output value
+
+        SUBS    r0,r0,#2                @ len -= 2
+        BGT     CopyRate_R_loop         @ and loop
+
+        LDMFD   r13!,{r4-r7,PC}
+
+ARM_SimpleRate_M:
+        @ r0 = AudioStream &input
+        @ r1 = input.readBuffer
+        @ r2 = input->sr
+        @ r3 = obuf
+        @ <> = osamp
+        @ <> = vol_l
+        @ <> = vol_r
+        MOV     r12,r13
+        STMFD   r13!,{r0-r2,r4-r8,r10-r11,r14}
+        LDMFD   r12,{r11,r12,r14}       @ r11= osamp
+                                        @ r12= vol_l
+                                        @ r14= vol_r
+        LDMIA   r2,{r0,r1,r2,r8}        @ r0 = inPtr
+                                        @ r1 = inLen
+                                        @ r2 = opos
+                                        @ r8 = opos_inc
+        CMP     r11,#0                  @ if (osamp <= 0)
+        BLE     SimpleRate_M_end        @   bale
+        MOV     r10,#0
+        ORR     r12,r12,r12,LSL #8      @ r12= vol_l as 16 bits
+        ORR     r14,r14,r14,LSL #8      @ r14= vol_r as 16 bits
+SimpleRate_M_loop:
+        SUBS    r1, r1, #1              @ r1 = inLen -= 1
+        BLT     SimpleRate_M_read
+        SUBS    r2, r2, #1              @ r2 = opos--
+        ADDGE   r0, r0, #2              @ if (r2 >= 0) { sr.inPtr++
+        BGE     SimpleRate_M_loop       @                and loop }
+SimpleRate_M_read_return:
+        LDRSH   r5, [r0],#2             @ r5 = tmp1 = *inPtr++
+        LDRSH   r6, [r3]                @ r6 = obuf[0]
+        LDRSH   r7, [r3,#2]             @ r7 = obuf[1]
+        ADD     r2, r2, r8              @ r2 = opos += opos_inc
+        MUL     r4, r12,r5              @ r4 = tmp0*vol_l
+        MUL     r5, r14,r5              @ r5 = tmp1*vol_r
+
+        ADDS    r6, r4, r6, LSL #16     @ r6 = obuf[0]<<16 + tmp0*vol_l
+        RSCVS   r6, r10,#1<<31          @ Clamp r6
+        ADDS    r7, r5, r7, LSL #16     @ r7 = obuf[1]<<16 + tmp1*vol_r
+        RSCVS   r7, r10,#1<<31          @ Clamp r7
+
+        MOV     r6, r6, LSR #16         @ Shift back to halfword
+        MOV     r7, r7, LSR #16         @ Shift back to halfword
+
+        STRH    r6, [r3],#2             @ Store output value
+        STRH    r7, [r3],#2             @ Store output value
+
+        SUBS    r11,r11,#1              @ len--
+        BGT     SimpleRate_M_loop       @ and loop
+SimpleRate_M_end:
+        LDR     r14,[r13,#8]            @ r14 = sr
+        ADD     r13,r13,#12             @ Skip over r0-r2 on stack
+        STMIA   r14,{r0,r1,r2}          @ Store back updated values
+        LDMFD   r13!,{r4-r8,r10-r11,PC}
+SimpleRate_M_read:
+        LDR     r0, [r13,#4*2]          @ r0 = sr
+        ADD     r0, r0, #16             @ r0 = inPtr = inBuf
+        STMFD   r13!,{r0,r2-r3,r12,r14}
+
+        MOV     r1, r0                  @ r1 = inBuf
+        LDR     r0, [r13,#4*5]          @ r0 = AudioStream & input
+        MOV     r2, #512                @ r2 = ARRAYSIZE(inBuf)
+
+        @ Calling back into C++ here. WinCE is fairly easy about such things
+        @ but other OS are more awkward. r9 is preserved for Symbian, and
+        @ we have 3+8+5 = 16 things on the stack (an even number).
+        MOV     r14,PC
+        LDR     PC,[r13,#4*6]           @ inLen = input.readBuffer(inBuf,512)
+        SUBS    r1, r0, #1              @ r1 = inLen-1
+        LDMFD   r13!,{r0,r2-r3,r12,r14}
+        BLT     SimpleRate_M_end
+        SUBS    r2, r2, #1              @ r2 = opos--
+        ADDGE   r0, r0, #2              @ if (r2 >= 0) { sr.inPtr++
+        BGE     SimpleRate_M_loop       @                and loop }
+        B       SimpleRate_M_read_return
+
+
+ARM_SimpleRate_S:
+        @ r0 = AudioStream &input
+        @ r1 = input.readBuffer
+        @ r2 = input->sr
+        @ r3 = obuf
+        @ <> = osamp
+        @ <> = vol_l
+        @ <> = vol_r
+        MOV     r12,r13
+        STMFD   r13!,{r0-r2,r4-r8,r10-r11,r14}
+        LDMFD   r12,{r11,r12,r14}       @ r11= osamp
+                                        @ r12= vol_l
+                                        @ r14= vol_r
+        LDMIA   r2,{r0,r1,r2,r8}        @ r0 = inPtr
+                                        @ r1 = inLen
+                                        @ r2 = opos
+                                        @ r8 = opos_inc
+        CMP     r11,#0                  @ if (osamp <= 0)
+        BLE     SimpleRate_S_end        @   bale
+        MOV     r10,#0
+        ORR     r12,r12,r12,LSL #8      @ r12= vol_l as 16 bits
+        ORR     r14,r14,r14,LSL #8      @ r14= vol_r as 16 bits
+SimpleRate_S_loop:
+        SUBS    r1, r1, #2              @ r1 = inLen -= 2
+        BLT     SimpleRate_S_read
+        SUBS    r2, r2, #1              @ r2 = opos--
+        ADDGE   r0, r0, #4              @ if (r2 >= 0) { sr.inPtr += 2
+        BGE     SimpleRate_S_loop       @                and loop }
+SimpleRate_S_read_return:
+        LDRSH   r4, [r0],#2             @ r4 = tmp0 = *inPtr++
+        LDRSH   r5, [r0],#2             @ r5 = tmp1 = *inPtr++
+        LDRSH   r6, [r3]                @ r6 = obuf[0]
+        LDRSH   r7, [r3,#2]             @ r7 = obuf[1]
+        ADD     r2, r2, r8              @ r2 = opos += opos_inc
+        MUL     r4, r12,r4              @ r5 = tmp0*vol_l
+        MUL     r5, r14,r5              @ r6 = tmp1*vol_r
+
+        ADDS    r6, r4, r6, LSL #16     @ r6 = obuf[0]<<16 + tmp0*vol_l
+        RSCVS   r6, r10,#1<<31          @ Clamp r6
+        ADDS    r7, r5, r7, LSL #16     @ r7 = obuf[1]<<16 + tmp1*vol_r
+        RSCVS   r7, r10,#1<<31          @ Clamp r7
+
+        MOV     r6, r6, LSR #16         @ Shift back to halfword
+        MOV     r7, r7, LSR #16         @ Shift back to halfword
+
+        STRH    r6, [r3],#2             @ Store output value
+        STRH    r7, [r3],#2             @ Store output value
+
+        SUBS    r11,r11,#1              @ osamp--
+        BGT     SimpleRate_S_loop       @ and loop
+SimpleRate_S_end:
+        LDR     r14,[r13,#8]            @ r14 = sr
+        ADD     r13,r13,#12             @ skip over r0-r2 on stack
+        STMIA   r14,{r0,r1,r2}          @ store back updated values
+        LDMFD   r13!,{r4-r8,r10-r11,PC}
+SimpleRate_S_read:
+        LDR     r0, [r13,#4*2]          @ r0 = sr
+        ADD     r0, r0, #16             @ r0 = inPtr = inBuf
+        STMFD   r13!,{r0,r2-r3,r12,r14}
+
+        MOV     r1, r0                  @ r1 = inBuf
+        LDR     r0, [r13,#4*5]          @ r0 = AudioStream & input
+        MOV     r2, #512                @ r2 = ARRAYSIZE(inBuf)
+
+        @ Calling back into C++ here. WinCE is fairly easy about such things
+        @ but other OS are more awkward. r9 is preserved for Symbian, and
+        @ we have 3+8+5 = 16 things on the stack (an even number).
+        MOV     r14,PC
+        LDR     PC,[r13,#4*6]           @ inLen = input.readBuffer(inBuf,512)
+        SUBS    r1, r0, #2              @ r1 = inLen-2
+        LDMFD   r13!,{r0,r2-r3,r12,r14}
+        BLT     SimpleRate_S_end
+        SUBS    r2, r2, #1              @ r2 = opos--
+        ADDGE   r0, r0, #4              @ if (r2 >= 0) { sr.inPtr += 2
+        BGE     SimpleRate_S_loop       @                and loop }
+        B       SimpleRate_S_read_return
+
+
+
+ARM_SimpleRate_R:
+        @ r0 = AudioStream &input
+        @ r1 = input.readBuffer
+        @ r2 = input->sr
+        @ r3 = obuf
+        @ <> = osamp
+        @ <> = vol_l
+        @ <> = vol_r
+        MOV     r12,r13
+        STMFD   r13!,{r0-r2,r4-r8,r10-r11,r14}
+        LDMFD   r12,{r11,r12,r14}       @ r11= osamp
+                                        @ r12= vol_l
+                                        @ r14= vol_r
+        LDMIA   r2,{r0,r1,r2,r8}        @ r0 = inPtr
+                                        @ r1 = inLen
+                                        @ r2 = opos
+                                        @ r8 = opos_inc
+        CMP     r11,#0                  @ if (osamp <= 0)
+        BLE     SimpleRate_R_end        @   bale
+        MOV     r10,#0
+        ORR     r12,r12,r12,LSL #8      @ r12= vol_l as 16 bits
+        ORR     r14,r14,r14,LSL #8      @ r14= vol_r as 16 bits
+SimpleRate_R_loop:
+        SUBS    r1, r1, #2              @ r1 = inLen -= 2
+        BLT     SimpleRate_R_read
+        SUBS    r2, r2, #1              @ r2 = opos--
+        ADDGE   r0, r0, #4              @ if (r2 >= 0) { sr.inPtr += 2
+        BGE     SimpleRate_R_loop       @                and loop }
+SimpleRate_R_read_return:
+        LDRSH   r5, [r0],#2             @ r5 = tmp0 = *inPtr++
+        LDRSH   r4, [r0],#2             @ r4 = tmp1 = *inPtr++
+        LDRSH   r6, [r3]                @ r6 = obuf[0]
+        LDRSH   r7, [r3,#2]             @ r7 = obuf[1]
+        ADD     r2, r2, r8              @ r2 = opos += opos_inc
+        MUL     r4, r12,r4              @ r5 = tmp0*vol_l
+        MUL     r5, r14,r5              @ r6 = tmp1*vol_r
+
+        ADDS    r6, r4, r6, LSL #16     @ r6 = obuf[0]<<16 + tmp0*vol_l
+        RSCVS   r6, r10,#1<<31          @ Clamp r6
+        ADDS    r7, r5, r7, LSL #16     @ r7 = obuf[1]<<16 + tmp1*vol_r
+        RSCVS   r7, r10,#1<<31          @ Clamp r7
+
+        MOV     r6, r6, LSR #16         @ Shift back to halfword
+        MOV     r7, r7, LSR #16         @ Shift back to halfword
+
+        STRH    r6, [r3],#2             @ Store output value
+        STRH    r7, [r3],#2             @ Store output value
+
+        SUBS    r11,r11,#1              @ osamp--
+        BGT     SimpleRate_R_loop       @ and loop
+SimpleRate_R_end:
+        LDR     r14,[r13,#8]            @ r14 = sr
+        ADD     r13,r13,#12             @ Skip over r0-r2 on stack
+        STMIA   r14,{r0,r1,r2}          @ Store back updated values
+        LDMFD   r13!,{r4-r8,r10-r11,PC}
+SimpleRate_R_read:
+        LDR     r0, [r13,#4*2]          @ r0 = sr
+        ADD     r0, r0, #16             @ r0 = inPtr = inBuf
+        STMFD   r13!,{r0,r2-r3,r12,r14}
+
+        MOV     r1, r0                  @ r1 = inBuf
+        LDR     r0, [r13,#4*5]          @ r0 = AudioStream & input
+        MOV     r2, #512                @ r2 = ARRAYSIZE(inBuf)
+
+        @ Calling back into C++ here. WinCE is fairly easy about such things
+        @ but other OS are more awkward. r9 is preserved for Symbian, and
+        @ we have 3+8+5 = 16 things on the stack (an even number).
+        MOV     r14,PC
+        LDR     PC,[r13,#4*6]           @ inLen = input.readBuffer(inBuf,512)
+        SUBS    r1, r0, #2              @ r1 = inLen-2
+        LDMFD   r13!,{r0,r2-r3,r12,r14}
+        BLT     SimpleRate_R_end
+        SUBS    r2, r2, #1              @ r2 = opos--
+        ADDGE   r0, r0, #4              @ if (r2 >= 0) { sr.inPtr += 2
+        BGE     SimpleRate_R_loop       @                and loop }
+        B       SimpleRate_R_read_return
+
+
+ARM_LinearRate_M:
+        @ r0 = AudioStream &input
+        @ r1 = input.readBuffer
+        @ r2 = input->sr
+        @ r3 = obuf
+        @ <> = osamp
+        @ <> = vol_l
+        @ <> = vol_r
+        MOV     r12,r13
+        STMFD   r13!,{r0-r1,r4-r11,r14}
+        LDMFD   r12,{r11,r12,r14}       @ r11= osamp
+                                        @ r12= vol_l
+                                        @ r14= vol_r
+        LDMIA   r2,{r0,r1,r8}           @ r0 = inPtr
+                                        @ r1 = inLen
+                                        @ r8 = opos
+        CMP     r11,#0                  @ if (osamp <= 0)
+        BLE     LinearRate_M_end        @   bale
+        ORR     r12,r12,r12,LSL #8      @ r12= vol_l as 16 bits
+        ORR     r14,r14,r14,LSL #8      @ r14= vol_r as 16 bits
+        CMP     r1,#0
+        BGT     LinearRate_M_part2
+
+        @ part1 - read input samples
+LinearRate_M_loop:
+        SUBS    r1, r1, #1              @ r1 = inLen -= 1
+        BLT     LinearRate_M_read
+LinearRate_M_read_return:
+        LDR     r10,[r2, #16]           @ r10= icur[0,1]
+        LDRSH   r5, [r0],#2             @ r5 = tmp1 = *inPtr++
+        SUBS    r8, r8, #1              @ r8 = opos--
+        STR     r10,[r2,#20]            @      ilast[0,1] = icur[0,1]
+        STRH    r5, [r2,#16]            @      icur[0] = tmp1
+        BGE     LinearRate_M_loop
+
+        @ part2 - form output samples
+LinearRate_M_part2:
+        @ We are guaranteed that opos < 0 here
+        LDRSH   r6, [r2,#20]            @ r6 = ilast[0]
+        LDRSH   r5, [r2,#16]            @ r5 = icur[0]
+        LDRH    r4, [r2,#24]            @ r4 = opos_frac
+        LDR     r10,[r2,#28]            @ r10= opos_frac_inc
+        MOV     r6, r6, LSL #16         @ r6 = ilast[0]<<16
+        SUB     r5, r5, r6, ASR #16     @ r5 = icur[0] - ilast[0]
+        ADD     r6, r6, #1<<15          @ r6 = ilast[0]+1<<(FRAC_BITS-1)
+        MLA     r6, r4, r5, r6  @ r6 = (icur[0]-ilast[0])*opos_frac+ilast[0]
+
+        ADD     r4, r4, r10             @ r4 = tmp = opos_frac+opos_inc_frac
+        STRH    r4,[r2,#24]             @ opos_frac &= 65535
+        ADD     r8, r8, r4, LSR #16     @ opos += (tmp>>FRAC_BITS)
+
+        LDRSH   r4, [r3]                @ r4 = obuf[0]
+        LDRSH   r5, [r3,#2]             @ r5 = obuf[1]
+        MOV     r6, r6, ASR #16         @ r6 = tmp0 = tmp1 >>= 16
+        MUL     r7, r12,r6              @ r7 = tmp0*vol_l
+        MUL     r6, r14,r6              @ r6 = tmp1*vol_r
+
+        ADDS    r7, r7, r4, LSL #16     @ r7 = obuf[0]<<16 + tmp0*vol_l
+        MOV     r4, #0
+        RSCVS   r7, r4, #1<<31          @ Clamp r7
+        ADDS    r6, r6, r5, LSL #16     @ r6 = obuf[1]<<16 + tmp1*vol_r
+        RSCVS   r6, r4, #1<<31          @ Clamp r6
+
+        MOV     r7, r7, LSR #16         @ Shift back to halfword
+        MOV     r6, r6, LSR #16         @ Shift back to halfword
+
+        LDR     r5, [r2,#12]            @ r5 = opos_inc
+        STRH    r7, [r3],#2             @ Store output value
+        STRH    r6, [r3],#2             @ Store output value
+        SUBS    r11, r11,#1             @ opos--
+        BLE     LinearRate_M_end        @ end if needed
+
+        ADDS    r8, r8, r5              @ r8 = opos += opos_inc
+        BLT     LinearRate_M_part2
+        B       LinearRate_M_loop
+LinearRate_M_end:
+        ADD     r13,r13,#8
+        STMIA   r2,{r0,r1,r8}
+        LDMFD   r13!,{r4-r11,PC}
+LinearRate_M_read:
+        ADD     r0, r2, #32             @ r0 = inPtr = inBuf
+        STMFD   r13!,{r0,r2-r3,r12,r14}
+
+        MOV     r1, r0                  @ r1 = inBuf
+        LDR     r0, [r13,#4*5]          @ r0 = AudioStream & input
+        MOV     r2, #512                @ r2 = ARRAYSIZE(inBuf)
+
+        @ Calling back into C++ here. WinCE is fairly easy about such things
+        @ but other OS are more awkward. r9 is preserved for Symbian, and
+        @ we have 2+9+5 = 16 things on the stack (an even number).
+        MOV     r14,PC
+        LDR     PC,[r13,#4*6]           @ inLen = input.readBuffer(inBuf,512)
+        SUBS    r1, r0, #1              @ r1 = inLen-1
+        LDMFD   r13!,{r0,r2-r3,r12,r14}
+        BLT     LinearRate_M_end
+        B       LinearRate_M_read_return
+
+ARM_LinearRate_S:
+        @ r0 = AudioStream &input
+        @ r1 = input.readBuffer
+        @ r2 = input->sr
+        @ r3 = obuf
+        @ <> = osamp
+        @ <> = vol_l
+        @ <> = vol_r
+        MOV     r12,r13
+        STMFD   r13!,{r0-r1,r4-r11,r14}
+        LDMFD   r12,{r11,r12,r14}       @ r11= osamp
+                                        @ r12= vol_l
+                                        @ r14= vol_r
+        LDMIA   r2,{r0,r1,r8}           @ r0 = inPtr
+                                        @ r1 = inLen
+                                        @ r8 = opos
+        CMP     r11,#0                  @ if (osamp <= 0)
+        BLE     LinearRate_S_end        @   bale
+        ORR     r12,r12,r12,LSL #8      @ r12= vol_l as 16 bits
+        ORR     r14,r14,r14,LSL #8      @ r14= vol_r as 16 bits
+        CMP     r1,#0
+        BGT     LinearRate_S_part2
+
+        @ part1 - read input samples
+LinearRate_S_loop:
+        SUBS    r1, r1, #2              @ r1 = inLen -= 2
+        BLT     LinearRate_S_read
+LinearRate_S_read_return:
+        LDR     r10,[r2, #16]           @ r10= icur[0,1]
+        LDRSH   r5, [r0],#2             @ r5 = tmp0 = *inPtr++
+        LDRSH   r6, [r0],#2             @ r5 = tmp1 = *inPtr++
+        SUBS    r8, r8, #1              @ r8 = opos--
+        STR     r10,[r2,#20]            @      ilast[0,1] = icur[0,1]
+        STRH    r5, [r2,#16]            @      icur[0] = tmp0
+        STRH    r6, [r2,#16]            @      icur[1] = tmp1
+        BGE     LinearRate_S_loop
+
+        @ part2 - form output samples
+LinearRate_S_part2:
+        @ We are guaranteed that opos < 0 here
+        LDRSH   r6, [r2,#20]            @ r6 = ilast[0]
+        LDRSH   r5, [r2,#16]            @ r5 = icur[0]
+        LDRH    r4, [r2,#24]            @ r4 = opos_frac
+        MOV     r6, r6, LSL #16         @ r6 = ilast[0]<<16
+        SUB     r5, r5, r6, ASR #16     @ r5 = icur[0] - ilast[0]
+        ADD     r6, r6, #1<<15          @ r6 = ilast[0]+1<<(FRAC_BITS-1)
+        MLA     r6, r4, r5, r6  @ r6 = (icur[0]-ilast[0])*opos_frac+ilast[0]
+
+        LDRSH   r7, [r2,#22]            @ r6 = ilast[1]
+        LDRSH   r5, [r2,#18]            @ r5 = icur[1]
+        LDR     r10,[r2,#28]            @ r10= opos_frac_inc
+        MOV     r7, r7, LSL #16         @ r7 = ilast[1]<<16
+        SUB     r5, r5, r7, ASR #16     @ r5 = icur[1] - ilast[1]
+        ADD     r7, r7, #1<<15          @ r6 = ilast[1]+1<<(FRAC_BITS-1)
+        MLA     r7, r4, r5, r7  @ r6 = (icur[1]-ilast[1])*opos_frac+ilast[1]
+
+        ADD     r4, r4, r10             @ r4 = tmp = opos_frac+opos_inc_frac
+        STRH    r4,[r2,#24]             @ opos_frac &= 65535
+        ADD     r8, r8, r4, LSR #16     @ opos += (tmp>>FRAC_BITS)
+
+        LDRSH   r4, [r3]                @ r4 = obuf[0]
+        LDRSH   r5, [r3,#2]             @ r5 = obuf[1]
+        MOV     r7, r7, ASR #16         @ r7 = tmp0 >>= 16
+        MOV     r6, r6, ASR #16         @ r6 = tmp1 >>= 16
+        MUL     r7, r12,r7              @ r7 = tmp0*vol_l
+        MUL     r6, r14,r6              @ r6 = tmp1*vol_r
+
+        ADDS    r7, r7, r4, LSL #16     @ r7 = obuf[0]<<16 + tmp0*vol_l
+        MOV     r4, #0
+        RSCVS   r7, r4, #1<<31          @ Clamp r7
+        ADDS    r6, r6, r5, LSL #16     @ r6 = obuf[1]<<16 + tmp1*vol_r
+        RSCVS   r6, r4, #1<<31          @ Clamp r6
+
+        MOV     r7, r7, LSR #16         @ Shift back to halfword
+        MOV     r6, r6, LSR #16         @ Shift back to halfword
+
+        LDR     r5, [r2,#12]            @ r5 = opos_inc
+        STRH    r7, [r3],#2             @ Store output value
+        STRH    r6, [r3],#2             @ Store output value
+        SUBS    r11, r11,#1             @ opos--
+        BLE     LinearRate_S_end        @ and loop
+
+        ADDS    r8, r8, r5              @ r8 = opos += opos_inc
+        BLT     LinearRate_S_part2
+        B       LinearRate_S_loop
+LinearRate_S_end:
+        ADD     r13,r13,#8
+        STMIA   r2,{r0,r1,r8}
+        LDMFD   r13!,{r4-r11,PC}
+LinearRate_S_read:
+        ADD     r0, r2, #32             @ r0 = inPtr = inBuf
+        STMFD   r13!,{r0,r2-r3,r12,r14}
+
+        MOV     r1, r0                  @ r1 = inBuf
+        LDR     r0, [r13,#4*5]          @ r0 = AudioStream & input
+        MOV     r2, #512                @ r2 = ARRAYSIZE(inBuf)
+
+        @ Calling back into C++ here. WinCE is fairly easy about such things
+        @ but other OS are more awkward. r9 is preserved for Symbian, and
+        @ we have 2+9+5 = 16 things on the stack (an even number).
+        MOV     r14,PC
+        LDR     PC,[r13,#4*6]           @ inLen = input.readBuffer(inBuf,512)
+        SUBS    r1, r0, #2              @ r1 = inLen-2
+        LDMFD   r13!,{r0,r2-r3,r12,r14}
+        BLT     LinearRate_S_end
+        B       LinearRate_S_read_return
+
+ARM_LinearRate_R:
+        @ r0 = AudioStream &input
+        @ r1 = input.readBuffer
+        @ r2 = input->sr
+        @ r3 = obuf
+        @ <> = osamp
+        @ <> = vol_l
+        @ <> = vol_r
+        MOV     r12,r13
+        STMFD   r13!,{r0-r1,r4-r11,r14}
+        LDMFD   r12,{r11,r12,r14}       @ r11= osamp
+                                        @ r12= vol_l
+                                        @ r14= vol_r
+        LDMIA   r2,{r0,r1,r8}           @ r0 = inPtr
+                                        @ r1 = inLen
+                                        @ r8 = opos
+        CMP     r11,#0                  @ if (osamp <= 0)
+        BLE     LinearRate_R_end        @   bale
+        ORR     r12,r12,r12,LSL #8      @ r12= vol_l as 16 bits
+        ORR     r14,r14,r14,LSL #8      @ r14= vol_r as 16 bits
+        CMP     r1,#0
+        BGT     LinearRate_R_part2
+
+        @ part1 - read input samples
+LinearRate_R_loop:
+        SUBS    r1, r1, #2              @ r1 = inLen -= 2
+        BLT     LinearRate_R_read
+LinearRate_R_read_return:
+        LDR     r10,[r2, #16]           @ r10= icur[0,1]
+        LDRSH   r5, [r0],#2             @ r5 = tmp0 = *inPtr++
+        LDRSH   r6, [r0],#2             @ r5 = tmp1 = *inPtr++
+        SUBS    r8, r8, #1              @ r8 = opos--
+        STR     r10,[r2,#20]            @ ilast[0,1] = icur[0,1]
+        STRH    r5, [r2,#16]            @ icur[0] = tmp0
+        STRH    r6, [r2,#16]            @ icur[1] = tmp1
+        BGE     LinearRate_R_loop
+
+        @ part2 - form output samples
+LinearRate_R_part2:
+        @ We are guaranteed that opos < 0 here
+        LDRSH   r6, [r2,#20]            @ r6 = ilast[0]
+        LDRSH   r5, [r2,#16]            @ r5 = icur[0]
+        LDRH    r4, [r2,#24]            @ r4 = opos_frac
+        MOV     r6, r6, LSL #16         @ r6 = ilast[0]<<16
+        SUB     r5, r5, r6, ASR #16     @ r5 = icur[0] - ilast[0]
+        ADD     r6, r6, #1<<15          @ r6 = ilast[0]+1<<(FRAC_BITS-1)
+        MLA     r6, r4, r5, r6  @ r6 = (icur[0]-ilast[0])*opos_frac+ilast[0]
+
+        LDRSH   r7, [r2,#22]            @ r6 = ilast[1]
+        LDRSH   r5, [r2,#18]            @ r5 = icur[1]
+        LDR     r10,[r2,#28]            @ r10= opos_frac_inc
+        MOV     r7, r7, LSL #16         @ r7 = ilast[1]<<16
+        SUB     r5, r5, r7, ASR #16     @ r5 = icur[1] - ilast[1]
+        ADD     r7, r7, #1<<15          @ r6 = ilast[1]+1<<(FRAC_BITS-1)
+        MLA     r7, r4, r5, r7  @ r6 = (icur[1]-ilast[1])*opos_frac+ilast[1]
+
+        ADD     r4, r4, r10             @ r4 = tmp = opos_frac+opos_inc_frac
+        STRH    r4,[r2,#24]             @ opos_frac &= 65535
+        ADD     r8, r8, r4, LSR #16     @ opos += (tmp>>FRAC_BITS)
+
+        LDRSH   r4, [r3]                @ r4 = obuf[0]
+        LDRSH   r5, [r3,#2]             @ r5 = obuf[1]
+        MOV     r7, r7, ASR #16         @ r7 = tmp0 >>= 16
+        MOV     r6, r6, ASR #16         @ r6 = tmp1 >>= 16
+        MUL     r7, r12,r7              @ r7 = tmp0*vol_l
+        MUL     r6, r14,r6              @ r6 = tmp1*vol_r
+
+        ADDS    r7, r7, r4, LSL #16     @ r7 = obuf[0]<<16 + tmp0*vol_l
+        MOV     r4, #0
+        RSCVS   r7, r4, #1<<31          @ Clamp r7
+        ADDS    r6, r6, r5, LSL #16     @ r6 = obuf[1]<<16 + tmp1*vol_r
+        RSCVS   r6, r4, #1<<31          @ Clamp r6
+
+        MOV     r7, r7, LSR #16         @ Shift back to halfword
+        MOV     r6, r6, LSR #16         @ Shift back to halfword
+
+        LDR     r5, [r2,#12]            @ r5 = opos_inc
+        STRH    r6, [r3],#2             @ Store output value
+        STRH    r7, [r3],#2             @ Store output value
+        SUBS    r11, r11,#1             @ opos--
+        BLE     LinearRate_R_end        @ and loop
+
+        ADDS    r8, r8, r5              @ r8 = opos += opos_inc
+        BLT     LinearRate_R_part2
+        B       LinearRate_R_loop
+LinearRate_R_end:
+        ADD     r13,r13,#8
+        STMIA   r2,{r0,r1,r8}
+        LDMFD   r13!,{r4-r11,PC}
+LinearRate_R_read:
+        ADD     r0, r2, #32             @ r0 = inPtr = inBuf
+        STMFD   r13!,{r0,r2-r3,r12,r14}
+
+        MOV     r1, r0                  @ r1 = inBuf
+        LDR     r0, [r13,#4*5]          @ r0 = AudioStream & input
+        MOV     r2, #512                @ r2 = ARRAYSIZE(inBuf)
+
+        @ Calling back into C++ here. WinCE is fairly easy about such things
+        @ but other OS are more awkward. r9 is preserved for Symbian, and
+        @ we have 2+9+5 = 16 things on the stack (an even number).
+        MOV     r14,PC
+        LDR     PC,[r13,#4*6]           @ inLen = input.readBuffer(inBuf,512)
+        SUBS    r1, r0, #2              @ r1 = inLen-2
+        LDMFD   r13!,{r0,r2-r3,r12,r14}
+        BLT     LinearRate_R_end
+        B       LinearRate_R_read_return