mirror of
https://github.com/mozilla/gecko-dev.git
synced 2025-02-08 16:03:21 +00:00
Bug 926838 - [Part 1] Add new files, and update license file. r=gerv,ehsan
This commit is contained in:
parent
98d8b74fc0
commit
6c2a2d1f4e
39
media/openmax_dl/LICENSE
Normal file
39
media/openmax_dl/LICENSE
Normal file
@ -0,0 +1,39 @@
|
||||
Use of this source code is governed by a BSD-style license that can be
|
||||
found in the LICENSE file in the root of the source tree. All
|
||||
contributing project authors may be found in the AUTHORS file in the
|
||||
root of the source tree.
|
||||
|
||||
The files were originally licensed by ARM Limited.
|
||||
|
||||
The following files:
|
||||
|
||||
* dl/api/omxtypes.h
|
||||
* dl/sp/api/omxSP.h
|
||||
|
||||
are licensed by Khronos:
|
||||
|
||||
Copyright (c) 2005-2008,2015 The Khronos Group Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and/or associated documentation files (the
|
||||
"Materials"), to deal in the Materials without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
permit persons to whom the Materials are furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included
|
||||
in all copies or substantial portions of the Materials.
|
||||
|
||||
MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
https://www.khronos.org/registry/
|
||||
|
||||
THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
3
media/openmax_dl/OWNERS
Normal file
3
media/openmax_dl/OWNERS
Normal file
@ -0,0 +1,3 @@
|
||||
ajm@google.com
|
||||
kma@google.com
|
||||
rtoy@google.com
|
19
media/openmax_dl/README.chromium
Normal file
19
media/openmax_dl/README.chromium
Normal file
@ -0,0 +1,19 @@
|
||||
Name: OpenMAX DL
|
||||
Short Name: OpenMax DL
|
||||
URL: https://silver.arm.com/download/Software/Graphics/OX000-BU-00010-r1p0-00bet0/OX000-BU-00010-r1p0-00bet0.tgz
|
||||
Version: 1.0.2
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
Security Critical: yes
|
||||
|
||||
Description:
|
||||
Implementation of OpenMAX DL spec from ARM. This is used to support
|
||||
WebAudio for Chromium on Android.
|
||||
|
||||
Local Modifications:
|
||||
Only the FFT routines from the OpenMAX DL package are included. The
|
||||
code was modified to work with gcc and a new implementation for a
|
||||
floating-point FFT was added.
|
||||
|
||||
The original ARM license is unclear, but Google has obtained
|
||||
permission to relicense this code under a BSD license.
|
409
media/openmax_dl/dl/api/armCOMM_s.h
Normal file
409
media/openmax_dl/dl/api/armCOMM_s.h
Normal file
@ -0,0 +1,409 @@
|
||||
@// -*- Mode: asm; -*-
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// File Name: armCOMM_s.h
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 13871
|
||||
@// Last Modified Date: Fri, 09 May 2008
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// ARM optimized OpenMAX common header file
|
||||
@//
|
||||
|
||||
.set _SBytes, 0 @ Number of scratch bytes on stack
|
||||
.set _Workspace, 0 @ Stack offset of scratch workspace
|
||||
|
||||
.set _RRegList, 0 @ R saved register list (last register number)
|
||||
.set _DRegList, 0 @ D saved register list (last register number)
|
||||
|
||||
@// Work out a list of R saved registers, and how much stack space is needed.
|
||||
@// gas doesn't support setting a variable to a string, so we set _RRegList to
|
||||
@// the register number.
|
||||
.macro _M_GETRREGLIST rreg
|
||||
.ifeqs "\rreg", ""
|
||||
@ Nothing needs to be saved
|
||||
.exitm
|
||||
.endif
|
||||
@ If rreg is lr or r4, save lr and r4
|
||||
.ifeqs "\rreg", "lr"
|
||||
.set _RRegList, 4
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.ifeqs "\rreg", "r4"
|
||||
.set _RRegList, 4
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
@ If rreg = r5 or r6, save up to register r6
|
||||
.ifeqs "\rreg", "r5"
|
||||
.set _RRegList, 6
|
||||
.exitm
|
||||
.endif
|
||||
.ifeqs "\rreg", "r6"
|
||||
.set _RRegList, 6
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
@ If rreg = r7 or r8, save up to register r8
|
||||
.ifeqs "\rreg", "r7"
|
||||
.set _RRegList, 8
|
||||
.exitm
|
||||
.endif
|
||||
.ifeqs "\rreg", "r8"
|
||||
.set _RRegList, 8
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
@ If rreg = r9 or r10, save up to register r10
|
||||
.ifeqs "\rreg", "r9"
|
||||
.set _RRegList, 10
|
||||
.exitm
|
||||
.endif
|
||||
.ifeqs "\rreg", "r10"
|
||||
.set _RRegList, 10
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
@ If rreg = r11 or r12, save up to register r12
|
||||
.ifeqs "\rreg", "r11"
|
||||
.set _RRegList, 12
|
||||
.exitm
|
||||
.endif
|
||||
.ifeqs "\rreg", "r12"
|
||||
.set _RRegList, 12
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.warning "Unrecognized saved r register limit: \rreg"
|
||||
.endm
|
||||
|
||||
@ Work out list of D saved registers, like for R registers.
|
||||
.macro _M_GETDREGLIST dreg
|
||||
.ifeqs "\dreg", ""
|
||||
.set _DRegList, 0
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.ifeqs "\dreg", "d8"
|
||||
.set _DRegList, 8
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.ifeqs "\dreg", "d9"
|
||||
.set _DRegList, 9
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.ifeqs "\dreg", "d10"
|
||||
.set _DRegList, 10
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.ifeqs "\dreg", "d11"
|
||||
.set _DRegList, 11
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.ifeqs "\dreg", "d12"
|
||||
.set _DRegList, 12
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.ifeqs "\dreg", "d13"
|
||||
.set _DRegList, 13
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.ifeqs "\dreg", "d14"
|
||||
.set _DRegList, 14
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.ifeqs "\dreg", "d15"
|
||||
.set _DRegList, 15
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.warning "Unrecognized saved d register limit: \rreg"
|
||||
.endm
|
||||
|
||||
@//////////////////////////////////////////////////////////
|
||||
@// Function header and footer macros
|
||||
@//////////////////////////////////////////////////////////
|
||||
|
||||
@ Function Header Macro
|
||||
@ Generates the function prologue
|
||||
@ Note that functions should all be "stack-moves-once"
|
||||
@ The FNSTART and FNEND macros should be the only places
|
||||
@ where the stack moves.
|
||||
@
|
||||
@ name = function name
|
||||
@ rreg = "" don't stack any registers
|
||||
@ "lr" stack "lr" only
|
||||
@ "rN" stack registers "r4-rN,lr"
|
||||
@ dreg = "" don't stack any D registers
|
||||
@ "dN" stack registers "d8-dN"
|
||||
@
|
||||
@ Note: ARM Archicture procedure call standard AAPCS
|
||||
@ states that r4-r11, sp, d8-d15 must be preserved by
|
||||
@ a compliant function.
|
||||
.macro M_START name, rreg, dreg
|
||||
.set _Workspace, 0
|
||||
|
||||
@ Define the function and make it external.
|
||||
.global \name
|
||||
.func \name
|
||||
.section .text.\name,"ax",%progbits
|
||||
.align 2
|
||||
\name :
|
||||
.fnstart
|
||||
@ Save specified R registers
|
||||
_M_GETRREGLIST \rreg
|
||||
_M_PUSH_RREG
|
||||
|
||||
@ Save specified D registers
|
||||
_M_GETDREGLIST \dreg
|
||||
_M_PUSH_DREG
|
||||
|
||||
@ Ensure size claimed on stack is 8-byte aligned
|
||||
.if (_SBytes & 7) != 0
|
||||
.set _SBytes, _SBytes + (8 - (_SBytes & 7))
|
||||
.endif
|
||||
.if _SBytes != 0
|
||||
sub sp, sp, #_SBytes
|
||||
.endif
|
||||
.endm
|
||||
|
||||
@ Function Footer Macro
|
||||
@ Generates the function epilogue
|
||||
.macro M_END
|
||||
@ Restore the stack pointer to its original value on function entry
|
||||
.if _SBytes != 0
|
||||
add sp, sp, #_SBytes
|
||||
.endif
|
||||
@ Restore any saved R or D registers.
|
||||
_M_RET
|
||||
.fnend
|
||||
.endfunc
|
||||
@ Reset the global stack tracking variables back to their
|
||||
@ initial values.
|
||||
.set _SBytes, 0
|
||||
.endm
|
||||
|
||||
@// Based on the value of _DRegList, push the specified set of registers
|
||||
@// to the stack. Is there a better way?
|
||||
.macro _M_PUSH_DREG
|
||||
.if _DRegList == 8
|
||||
vpush {d8}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _DRegList == 9
|
||||
vpush {d8-d9}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _DRegList == 10
|
||||
vpush {d8-d10}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _DRegList == 11
|
||||
vpush {d8-d11}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _DRegList == 12
|
||||
vpush {d8-d12}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _DRegList == 13
|
||||
vpush {d8-d13}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _DRegList == 14
|
||||
vpush {d8-d14}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _DRegList == 15
|
||||
vpush {d8-d15}
|
||||
.exitm
|
||||
.endif
|
||||
.endm
|
||||
|
||||
@// Based on the value of _RRegList, push the specified set of registers
|
||||
@// to the stack. Is there a better way?
|
||||
.macro _M_PUSH_RREG
|
||||
.if _RRegList == 4
|
||||
stmfd sp!, {r4, lr}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _RRegList == 6
|
||||
stmfd sp!, {r4-r6, lr}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _RRegList == 8
|
||||
stmfd sp!, {r4-r8, lr}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _RRegList == 10
|
||||
stmfd sp!, {r4-r10, lr}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _RRegList == 12
|
||||
stmfd sp!, {r4-r12, lr}
|
||||
.exitm
|
||||
.endif
|
||||
.endm
|
||||
|
||||
@// The opposite of _M_PUSH_DREG
|
||||
.macro _M_POP_DREG
|
||||
.if _DRegList == 8
|
||||
vpop {d8}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _DRegList == 9
|
||||
vpop {d8-d9}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _DRegList == 10
|
||||
vpop {d8-d10}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _DRegList == 11
|
||||
vpop {d8-d11}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _DRegList == 12
|
||||
vpop {d8-d12}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _DRegList == 13
|
||||
vpop {d8-d13}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _DRegList == 14
|
||||
vpop {d8-d14}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _DRegList == 15
|
||||
vpop {d8-d15}
|
||||
.exitm
|
||||
.endif
|
||||
.endm
|
||||
|
||||
@// The opposite of _M_PUSH_RREG
|
||||
.macro _M_POP_RREG cc
|
||||
.if _RRegList == 0
|
||||
bx\cc lr
|
||||
.exitm
|
||||
.endif
|
||||
.if _RRegList == 4
|
||||
ldm\cc\()fd sp!, {r4, pc}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _RRegList == 6
|
||||
ldm\cc\()fd sp!, {r4-r6, pc}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _RRegList == 8
|
||||
ldm\cc\()fd sp!, {r4-r8, pc}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _RRegList == 10
|
||||
ldm\cc\()fd sp!, {r4-r10, pc}
|
||||
.exitm
|
||||
.endif
|
||||
|
||||
.if _RRegList == 12
|
||||
ldm\cc\()fd sp!, {r4-r12, pc}
|
||||
.exitm
|
||||
.endif
|
||||
.endm
|
||||
|
||||
@ Produce function return instructions
|
||||
.macro _M_RET cc
|
||||
_M_POP_DREG \cc
|
||||
_M_POP_RREG \cc
|
||||
.endm
|
||||
|
||||
@// Allocate 4-byte aligned area of name
|
||||
@// |name| and size |size| bytes.
|
||||
.macro M_ALLOC4 name, size
|
||||
.if (_SBytes & 3) != 0
|
||||
.set _SBytes, _SBytes + (4 - (_SBytes & 3))
|
||||
.endif
|
||||
.set \name\()_F, _SBytes
|
||||
.set _SBytes, _SBytes + \size
|
||||
|
||||
.endm
|
||||
|
||||
@ Load word from stack
|
||||
.macro M_LDR r, a0, a1, a2, a3
|
||||
_M_DATA "ldr", 4, \r, \a0, \a1, \a2, \a3
|
||||
.endm
|
||||
|
||||
@ Store word to stack
|
||||
.macro M_STR r, a0, a1, a2, a3
|
||||
_M_DATA "str", 4, \r, \a0, \a1, \a2, \a3
|
||||
.endm
|
||||
|
||||
@ Macro to perform a data access operation
|
||||
@ Such as LDR or STR
|
||||
@ The addressing mode is modified such that
|
||||
@ 1. If no address is given then the name is taken
|
||||
@ as a stack offset
|
||||
@ 2. If the addressing mode is not available for the
|
||||
@ state being assembled for (eg Thumb) then a suitable
|
||||
@ addressing mode is substituted.
|
||||
@
|
||||
@ On Entry:
|
||||
@ $i = Instruction to perform (eg "LDRB")
|
||||
@ $a = Required byte alignment
|
||||
@ $r = Register(s) to transfer (eg "r1")
|
||||
@ $a0,$a1,$a2. Addressing mode and condition. One of:
|
||||
@ label {,cc}
|
||||
@ [base] {,,,cc}
|
||||
@ [base, offset]{!} {,,cc}
|
||||
@ [base, offset, shift]{!} {,cc}
|
||||
@ [base], offset {,,cc}
|
||||
@ [base], offset, shift {,cc}
|
||||
@
|
||||
@ WARNING: Most of the above are not supported, except the first case.
|
||||
.macro _M_DATA i, a, r, a0, a1, a2, a3
|
||||
.set _Offset, _Workspace + \a0\()_F
|
||||
\i\a1 \r, [sp, #_Offset]
|
||||
.endm
|
289
media/openmax_dl/dl/api/armOMX.h
Normal file
289
media/openmax_dl/dl/api/armOMX.h
Normal file
@ -0,0 +1,289 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*
|
||||
* This file was originally licensed as follows. It has been
|
||||
* relicensed with permission from the copyright holders.
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* File Name: armOMX_ReleaseVersion.h
|
||||
* OpenMAX DL: v1.0.2
|
||||
* Last Modified Revision: 15322
|
||||
* Last Modified Date: Wed, 15 Oct 2008
|
||||
*
|
||||
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
*
|
||||
*
|
||||
*
|
||||
* This file allows a version of the OMX DL libraries to be built where some or
|
||||
* all of the function names can be given a user specified suffix.
|
||||
*
|
||||
* You might want to use it where:
|
||||
*
|
||||
* - you want to rename a function "out of the way" so that you could replace
|
||||
* a function with a different version (the original version would still be
|
||||
* in the library just with a different name - so you could debug the new
|
||||
* version by comparing it to the output of the old)
|
||||
*
|
||||
* - you want to rename all the functions to versions with a suffix so that
|
||||
* you can include two versions of the library and choose between functions
|
||||
* at runtime.
|
||||
*
|
||||
* e.g. omxIPBM_Copy_U8_C1R could be renamed omxIPBM_Copy_U8_C1R_CortexA8
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _armOMX_H_
|
||||
#define _armOMX_H_
|
||||
|
||||
#define ARMOMX_ENABLE_RENAMING 0
|
||||
#if ARMOMX_ENABLE_RENAMING
|
||||
|
||||
/* We need to define these two macros in order to expand and concatenate the names */
|
||||
#define OMXCAT2BAR(A, B) omx ## A ## B
|
||||
#define OMXCATBAR(A, B) OMXCAT2BAR(A, B)
|
||||
|
||||
/* Define the suffix to add to all functions - the default is no suffix */
|
||||
#define BARE_SUFFIX
|
||||
|
||||
|
||||
|
||||
/* Define what happens to the bare suffix-less functions, down to the sub-domain accuracy */
|
||||
#define OMXACAAC_SUFFIX BARE_SUFFIX
|
||||
#define OMXACMP3_SUFFIX BARE_SUFFIX
|
||||
#define OMXICJP_SUFFIX BARE_SUFFIX
|
||||
#define OMXIPBM_SUFFIX BARE_SUFFIX
|
||||
#define OMXIPCS_SUFFIX BARE_SUFFIX
|
||||
#define OMXIPPP_SUFFIX BARE_SUFFIX
|
||||
#define OMXSP_SUFFIX BARE_SUFFIX
|
||||
#define OMXVCCOMM_SUFFIX BARE_SUFFIX
|
||||
#define OMXVCM4P10_SUFFIX BARE_SUFFIX
|
||||
#define OMXVCM4P2_SUFFIX BARE_SUFFIX
|
||||
|
||||
|
||||
|
||||
|
||||
/* Define what the each bare, un-suffixed OpenMAX API function names is to be renamed */
|
||||
#define omxACAAC_DecodeChanPairElt OMXCATBAR(ACAAC_DecodeChanPairElt, OMXACAAC_SUFFIX)
|
||||
#define omxACAAC_DecodeDatStrElt OMXCATBAR(ACAAC_DecodeDatStrElt, OMXACAAC_SUFFIX)
|
||||
#define omxACAAC_DecodeFillElt OMXCATBAR(ACAAC_DecodeFillElt, OMXACAAC_SUFFIX)
|
||||
#define omxACAAC_DecodeIsStereo_S32 OMXCATBAR(ACAAC_DecodeIsStereo_S32, OMXACAAC_SUFFIX)
|
||||
#define omxACAAC_DecodeMsPNS_S32_I OMXCATBAR(ACAAC_DecodeMsPNS_S32_I, OMXACAAC_SUFFIX)
|
||||
#define omxACAAC_DecodeMsStereo_S32_I OMXCATBAR(ACAAC_DecodeMsStereo_S32_I, OMXACAAC_SUFFIX)
|
||||
#define omxACAAC_DecodePrgCfgElt OMXCATBAR(ACAAC_DecodePrgCfgElt, OMXACAAC_SUFFIX)
|
||||
#define omxACAAC_DecodeTNS_S32_I OMXCATBAR(ACAAC_DecodeTNS_S32_I, OMXACAAC_SUFFIX)
|
||||
#define omxACAAC_DeinterleaveSpectrum_S32 OMXCATBAR(ACAAC_DeinterleaveSpectrum_S32, OMXACAAC_SUFFIX)
|
||||
#define omxACAAC_EncodeTNS_S32_I OMXCATBAR(ACAAC_EncodeTNS_S32_I, OMXACAAC_SUFFIX)
|
||||
#define omxACAAC_LongTermPredict_S32 OMXCATBAR(ACAAC_LongTermPredict_S32, OMXACAAC_SUFFIX)
|
||||
#define omxACAAC_LongTermReconstruct_S32_I OMXCATBAR(ACAAC_LongTermReconstruct_S32_I, OMXACAAC_SUFFIX)
|
||||
#define omxACAAC_MDCTFwd_S32 OMXCATBAR(ACAAC_MDCTFwd_S32, OMXACAAC_SUFFIX)
|
||||
#define omxACAAC_MDCTInv_S32_S16 OMXCATBAR(ACAAC_MDCTInv_S32_S16, OMXACAAC_SUFFIX)
|
||||
#define omxACAAC_NoiselessDecode OMXCATBAR(ACAAC_NoiselessDecode, OMXACAAC_SUFFIX)
|
||||
#define omxACAAC_QuantInv_S32_I OMXCATBAR(ACAAC_QuantInv_S32_I, OMXACAAC_SUFFIX)
|
||||
#define omxACAAC_UnpackADIFHeader OMXCATBAR(ACAAC_UnpackADIFHeader, OMXACAAC_SUFFIX)
|
||||
#define omxACAAC_UnpackADTSFrameHeader OMXCATBAR(ACAAC_UnpackADTSFrameHeader, OMXACAAC_SUFFIX)
|
||||
|
||||
|
||||
#define omxACMP3_HuffmanDecode_S32 OMXCATBAR(ACMP3_HuffmanDecode_S32, OMXACMP3_SUFFIX)
|
||||
#define omxACMP3_HuffmanDecodeSfb_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfb_S32, OMXACMP3_SUFFIX)
|
||||
#define omxACMP3_HuffmanDecodeSfbMbp_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfbMbp_S32, OMXACMP3_SUFFIX)
|
||||
#define omxACMP3_MDCTInv_S32 OMXCATBAR(ACMP3_MDCTInv_S32, OMXACMP3_SUFFIX)
|
||||
#define omxACMP3_ReQuantize_S32_I OMXCATBAR(ACMP3_ReQuantize_S32_I, OMXACMP3_SUFFIX)
|
||||
#define omxACMP3_ReQuantizeSfb_S32_I OMXCATBAR(ACMP3_ReQuantizeSfb_S32_I, OMXACMP3_SUFFIX)
|
||||
#define omxACMP3_SynthPQMF_S32_S16 OMXCATBAR(ACMP3_SynthPQMF_S32_S16, OMXACMP3_SUFFIX)
|
||||
#define omxACMP3_UnpackFrameHeader OMXCATBAR(ACMP3_UnpackFrameHeader, OMXACMP3_SUFFIX)
|
||||
#define omxACMP3_UnpackScaleFactors_S8 OMXCATBAR(ACMP3_UnpackScaleFactors_S8, OMXACMP3_SUFFIX)
|
||||
#define omxACMP3_UnpackSideInfo OMXCATBAR(ACMP3_UnpackSideInfo, OMXACMP3_SUFFIX)
|
||||
|
||||
#define omxICJP_CopyExpand_U8_C3 OMXCATBAR(ICJP_CopyExpand_U8_C3, OMXICJP_SUFFIX)
|
||||
#define omxICJP_DCTFwd_S16 OMXCATBAR(ICJP_DCTFwd_S16, OMXICJP_SUFFIX)
|
||||
#define omxICJP_DCTFwd_S16_I OMXCATBAR(ICJP_DCTFwd_S16_I, OMXICJP_SUFFIX)
|
||||
#define omxICJP_DCTInv_S16 OMXCATBAR(ICJP_DCTInv_S16, OMXICJP_SUFFIX)
|
||||
#define omxICJP_DCTInv_S16_I OMXCATBAR(ICJP_DCTInv_S16_I, OMXICJP_SUFFIX)
|
||||
#define omxICJP_DCTQuantFwd_Multiple_S16 OMXCATBAR(ICJP_DCTQuantFwd_Multiple_S16, OMXICJP_SUFFIX)
|
||||
#define omxICJP_DCTQuantFwd_S16 OMXCATBAR(ICJP_DCTQuantFwd_S16, OMXICJP_SUFFIX)
|
||||
#define omxICJP_DCTQuantFwd_S16_I OMXCATBAR(ICJP_DCTQuantFwd_S16_I, OMXICJP_SUFFIX)
|
||||
#define omxICJP_DCTQuantFwdTableInit OMXCATBAR(ICJP_DCTQuantFwdTableInit, OMXICJP_SUFFIX)
|
||||
#define omxICJP_DCTQuantInv_Multiple_S16 OMXCATBAR(ICJP_DCTQuantInv_Multiple_S16, OMXICJP_SUFFIX)
|
||||
#define omxICJP_DCTQuantInv_S16 OMXCATBAR(ICJP_DCTQuantInv_S16, OMXICJP_SUFFIX)
|
||||
#define omxICJP_DCTQuantInv_S16_I OMXCATBAR(ICJP_DCTQuantInv_S16_I, OMXICJP_SUFFIX)
|
||||
#define omxICJP_DCTQuantInvTableInit OMXCATBAR(ICJP_DCTQuantInvTableInit, OMXICJP_SUFFIX)
|
||||
#define omxICJP_DecodeHuffman8x8_Direct_S16_C1 OMXCATBAR(ICJP_DecodeHuffman8x8_Direct_S16_C1, OMXICJP_SUFFIX)
|
||||
#define omxICJP_DecodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
|
||||
#define omxICJP_DecodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
|
||||
#define omxICJP_EncodeHuffman8x8_Direct_S16_U1_C1 OMXCATBAR(ICJP_EncodeHuffman8x8_Direct_S16_U1_C1, OMXICJP_SUFFIX)
|
||||
#define omxICJP_EncodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
|
||||
#define omxICJP_EncodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
|
||||
|
||||
#define omxIPBM_AddC_U8_C1R_Sfs OMXCATBAR(IPBM_AddC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
|
||||
#define omxIPBM_Copy_U8_C1R OMXCATBAR(IPBM_Copy_U8_C1R, OMXIPBM_SUFFIX)
|
||||
#define omxIPBM_Copy_U8_C3R OMXCATBAR(IPBM_Copy_U8_C3R, OMXIPBM_SUFFIX)
|
||||
#define omxIPBM_Mirror_U8_C1R OMXCATBAR(IPBM_Mirror_U8_C1R, OMXIPBM_SUFFIX)
|
||||
#define omxIPBM_MulC_U8_C1R_Sfs OMXCATBAR(IPBM_MulC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
|
||||
|
||||
#define omxIPCS_ColorTwistQ14_U8_C3R OMXCATBAR(IPCS_ColorTwistQ14_U8_C3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_YCbCr420RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr420RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_YCbCr420RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr420RszRot_U8_P3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_YCbCr420ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_YCbCr422RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr422RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R OMXCATBAR(IPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_YCbCr422RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr422RszRot_U8_P3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_YCbYCr422ToBGR565_U8_U16_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR565_U8_U16_C2C3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_YCbYCr422ToBGR888_U8_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR888_U8_C2C3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R OMXCATBAR(IPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_YCbCr422ToYCbCr420Rotate_U8_P3R OMXCATBAR(IPCS_YCbCr422ToYCbCr420Rotate_U8_P3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_YCbCr444ToBGR565_U8_U16_C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_C3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_YCbCr444ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
|
||||
#define omxIPCS_YCbCr444ToBGR888_U8_C3R OMXCATBAR(IPCS_YCbCr444ToBGR888_U8_C3R, OMXIPCS_SUFFIX)
|
||||
|
||||
#define omxIPPP_Deblock_HorEdge_U8_I OMXCATBAR(IPPP_Deblock_HorEdge_U8_I, OMXIPPP_SUFFIX)
|
||||
#define omxIPPP_Deblock_VerEdge_U8_I OMXCATBAR(IPPP_Deblock_VerEdge_U8_I, OMXIPPP_SUFFIX)
|
||||
#define omxIPPP_FilterFIR_U8_C1R OMXCATBAR(IPPP_FilterFIR_U8_C1R, OMXIPPP_SUFFIX)
|
||||
#define omxIPPP_FilterMedian_U8_C1R OMXCATBAR(IPPP_FilterMedian_U8_C1R, OMXIPPP_SUFFIX)
|
||||
#define omxIPPP_GetCentralMoment_S64 OMXCATBAR(IPPP_GetCentralMoment_S64, OMXIPPP_SUFFIX)
|
||||
#define omxIPPP_GetSpatialMoment_S64 OMXCATBAR(IPPP_GetSpatialMoment_S64, OMXIPPP_SUFFIX)
|
||||
#define omxIPPP_MomentGetStateSize OMXCATBAR(IPPP_MomentGetStateSize, OMXIPPP_SUFFIX)
|
||||
#define omxIPPP_MomentInit OMXCATBAR(IPPP_MomentInit, OMXIPPP_SUFFIX)
|
||||
#define omxIPPP_Moments_U8_C1R OMXCATBAR(IPPP_Moments_U8_C1R, OMXIPPP_SUFFIX)
|
||||
#define omxIPPP_Moments_U8_C3R OMXCATBAR(IPPP_Moments_U8_C3R, OMXIPPP_SUFFIX)
|
||||
|
||||
#define omxSP_BlockExp_S16 OMXCATBAR(SP_BlockExp_S16, OMXSP_SUFFIX)
|
||||
#define omxSP_BlockExp_S32 OMXCATBAR(SP_BlockExp_S32, OMXSP_SUFFIX)
|
||||
#define omxSP_Copy_S16 OMXCATBAR(SP_Copy_S16, OMXSP_SUFFIX)
|
||||
#define omxSP_DotProd_S16 OMXCATBAR(SP_DotProd_S16, OMXSP_SUFFIX)
|
||||
#define omxSP_DotProd_S16_Sfs OMXCATBAR(SP_DotProd_S16_Sfs, OMXSP_SUFFIX)
|
||||
#define omxSP_FFTFwd_CToC_SC16_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC16_Sfs, OMXSP_SUFFIX)
|
||||
#define omxSP_FFTFwd_CToC_SC32_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC32_Sfs, OMXSP_SUFFIX)
|
||||
#define omxSP_FFTFwd_RToCCS_S16S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S16S32_Sfs, OMXSP_SUFFIX)
|
||||
#define omxSP_FFTFwd_RToCCS_S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S32_Sfs, OMXSP_SUFFIX)
|
||||
#define omxSP_FFTGetBufSize_C_SC16 OMXCATBAR(SP_FFTGetBufSize_C_SC16, OMXSP_SUFFIX)
|
||||
#define omxSP_FFTGetBufSize_C_SC32 OMXCATBAR(SP_FFTGetBufSize_C_SC32, OMXSP_SUFFIX)
|
||||
#define omxSP_FFTGetBufSize_R_S16S32 OMXCATBAR(SP_FFTGetBufSize_R_S16S32, OMXSP_SUFFIX)
|
||||
#define omxSP_FFTGetBufSize_R_S32 OMXCATBAR(SP_FFTGetBufSize_R_S32, OMXSP_SUFFIX)
|
||||
#define omxSP_FFTInit_C_SC16 OMXCATBAR(SP_FFTInit_C_SC16, OMXSP_SUFFIX)
|
||||
#define omxSP_FFTInit_C_SC32 OMXCATBAR(SP_FFTInit_C_SC32, OMXSP_SUFFIX)
|
||||
#define omxSP_FFTInit_R_S16S32 OMXCATBAR(SP_FFTInit_R_S16S32, OMXSP_SUFFIX)
|
||||
#define omxSP_FFTInit_R_S32 OMXCATBAR(SP_FFTInit_R_S32, OMXSP_SUFFIX)
|
||||
#define omxSP_FFTInv_CCSToR_S32_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32_Sfs, OMXSP_SUFFIX)
|
||||
#define omxSP_FFTInv_CCSToR_S32S16_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32S16_Sfs, OMXSP_SUFFIX)
|
||||
#define omxSP_FFTInv_CToC_SC16_Sfs OMXCATBAR(SP_FFTInv_CToC_SC16_Sfs, OMXSP_SUFFIX)
|
||||
#define omxSP_FFTInv_CToC_SC32_Sfs OMXCATBAR(SP_FFTInv_CToC_SC32_Sfs, OMXSP_SUFFIX)
|
||||
#define omxSP_FilterMedian_S32 OMXCATBAR(SP_FilterMedian_S32, OMXSP_SUFFIX)
|
||||
#define omxSP_FilterMedian_S32_I OMXCATBAR(SP_FilterMedian_S32_I, OMXSP_SUFFIX)
|
||||
#define omxSP_FIR_Direct_S16 OMXCATBAR(SP_FIR_Direct_S16, OMXSP_SUFFIX)
|
||||
#define omxSP_FIR_Direct_S16_I OMXCATBAR(SP_FIR_Direct_S16_I, OMXSP_SUFFIX)
|
||||
#define omxSP_FIR_Direct_S16_ISfs OMXCATBAR(SP_FIR_Direct_S16_ISfs, OMXSP_SUFFIX)
|
||||
#define omxSP_FIR_Direct_S16_Sfs OMXCATBAR(SP_FIR_Direct_S16_Sfs, OMXSP_SUFFIX)
|
||||
#define omxSP_FIROne_Direct_S16 OMXCATBAR(SP_FIROne_Direct_S16, OMXSP_SUFFIX)
|
||||
#define omxSP_FIROne_Direct_S16_I OMXCATBAR(SP_FIROne_Direct_S16_I, OMXSP_SUFFIX)
|
||||
#define omxSP_FIROne_Direct_S16_ISfs OMXCATBAR(SP_FIROne_Direct_S16_ISfs, OMXSP_SUFFIX)
|
||||
#define omxSP_FIROne_Direct_S16_Sfs OMXCATBAR(SP_FIROne_Direct_S16_Sfs, OMXSP_SUFFIX)
|
||||
#define omxSP_IIR_BiQuadDirect_S16 OMXCATBAR(SP_IIR_BiQuadDirect_S16, OMXSP_SUFFIX)
|
||||
#define omxSP_IIR_BiQuadDirect_S16_I OMXCATBAR(SP_IIR_BiQuadDirect_S16_I, OMXSP_SUFFIX)
|
||||
#define omxSP_IIR_Direct_S16 OMXCATBAR(SP_IIR_Direct_S16, OMXSP_SUFFIX)
|
||||
#define omxSP_IIR_Direct_S16_I OMXCATBAR(SP_IIR_Direct_S16_I, OMXSP_SUFFIX)
|
||||
#define omxSP_IIROne_BiQuadDirect_S16 OMXCATBAR(SP_IIROne_BiQuadDirect_S16, OMXSP_SUFFIX)
|
||||
#define omxSP_IIROne_BiQuadDirect_S16_I OMXCATBAR(SP_IIROne_BiQuadDirect_S16_I, OMXSP_SUFFIX)
|
||||
#define omxSP_IIROne_Direct_S16 OMXCATBAR(SP_IIROne_Direct_S16, OMXSP_SUFFIX)
|
||||
#define omxSP_IIROne_Direct_S16_I OMXCATBAR(SP_IIROne_Direct_S16_I, OMXSP_SUFFIX)
|
||||
|
||||
#define omxVCCOMM_Average_16x OMXCATBAR(VCCOMM_Average_16x, OMXVCCOMM_SUFFIX)
|
||||
#define omxVCCOMM_Average_8x OMXCATBAR(VCCOMM_Average_8x, OMXVCCOMM_SUFFIX)
|
||||
#define omxVCCOMM_ComputeTextureErrorBlock OMXCATBAR(VCCOMM_ComputeTextureErrorBlock, OMXVCCOMM_SUFFIX)
|
||||
#define omxVCCOMM_ComputeTextureErrorBlock_SAD OMXCATBAR(VCCOMM_ComputeTextureErrorBlock_SAD, OMXVCCOMM_SUFFIX)
|
||||
#define omxVCCOMM_Copy16x16 OMXCATBAR(VCCOMM_Copy16x16, OMXVCCOMM_SUFFIX)
|
||||
#define omxVCCOMM_Copy8x8 OMXCATBAR(VCCOMM_Copy8x8, OMXVCCOMM_SUFFIX)
|
||||
#define omxVCCOMM_ExpandFrame_I OMXCATBAR(VCCOMM_ExpandFrame_I, OMXVCCOMM_SUFFIX)
|
||||
#define omxVCCOMM_LimitMVToRect OMXCATBAR(VCCOMM_LimitMVToRect, OMXVCCOMM_SUFFIX)
|
||||
#define omxVCCOMM_SAD_16x OMXCATBAR(VCCOMM_SAD_16x, OMXVCCOMM_SUFFIX)
|
||||
#define omxVCCOMM_SAD_8x OMXCATBAR(VCCOMM_SAD_8x, OMXVCCOMM_SUFFIX)
|
||||
|
||||
#define omxVCM4P10_Average_4x OMXCATBAR(VCM4P10_Average_4x, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_BlockMatch_Half OMXCATBAR(VCM4P10_BlockMatch_Half, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_BlockMatch_Integer OMXCATBAR(VCM4P10_BlockMatch_Integer, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_BlockMatch_Quarter OMXCATBAR(VCM4P10_BlockMatch_Quarter, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_DeblockChroma_I OMXCATBAR(VCM4P10_DeblockChroma_I, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_DeblockLuma_I OMXCATBAR(VCM4P10_DeblockLuma_I, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeChromaDcCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_DecodeCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_DequantTransformResidualFromPairAndAdd OMXCATBAR(VCM4P10_DequantTransformResidualFromPairAndAdd, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_FilterDeblockingChroma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_HorEdge_I, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_FilterDeblockingChroma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_VerEdge_I, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_FilterDeblockingLuma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_HorEdge_I, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_FilterDeblockingLuma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_VerEdge_I, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_GetVLCInfo OMXCATBAR(VCM4P10_GetVLCInfo, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_InterpolateChroma OMXCATBAR(VCM4P10_InterpolateChroma, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_InterpolateHalfHor_Luma OMXCATBAR(VCM4P10_InterpolateHalfHor_Luma, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_InterpolateHalfVer_Luma OMXCATBAR(VCM4P10_InterpolateHalfVer_Luma, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_InterpolateLuma OMXCATBAR(VCM4P10_InterpolateLuma, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_InvTransformDequant_ChromaDC OMXCATBAR(VCM4P10_InvTransformDequant_ChromaDC, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_InvTransformDequant_LumaDC OMXCATBAR(VCM4P10_InvTransformDequant_LumaDC, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_InvTransformResidualAndAdd OMXCATBAR(VCM4P10_InvTransformResidualAndAdd, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_MEGetBufSize OMXCATBAR(VCM4P10_MEGetBufSize, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_MEInit OMXCATBAR(VCM4P10_MEInit, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_MotionEstimationMB OMXCATBAR(VCM4P10_MotionEstimationMB, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_PredictIntra_16x16 OMXCATBAR(VCM4P10_PredictIntra_16x16, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_PredictIntra_4x4 OMXCATBAR(VCM4P10_PredictIntra_4x4, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_PredictIntraChroma_8x8 OMXCATBAR(VCM4P10_PredictIntraChroma_8x8, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_SAD_4x OMXCATBAR(VCM4P10_SAD_4x, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_SADQuar_16x OMXCATBAR(VCM4P10_SADQuar_16x, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_SADQuar_4x OMXCATBAR(VCM4P10_SADQuar_4x, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_SADQuar_8x OMXCATBAR(VCM4P10_SADQuar_8x, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_SATD_4x4 OMXCATBAR(VCM4P10_SATD_4x4, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_SubAndTransformQDQResidual OMXCATBAR(VCM4P10_SubAndTransformQDQResidual, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_TransformDequantChromaDCFromPair OMXCATBAR(VCM4P10_TransformDequantChromaDCFromPair, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_TransformDequantLumaDCFromPair OMXCATBAR(VCM4P10_TransformDequantLumaDCFromPair, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_TransformQuant_ChromaDC OMXCATBAR(VCM4P10_TransformQuant_ChromaDC, OMXVCM4P10_SUFFIX)
|
||||
#define omxVCM4P10_TransformQuant_LumaDC OMXCATBAR(VCM4P10_TransformQuant_LumaDC, OMXVCM4P10_SUFFIX)
|
||||
|
||||
#define omxVCM4P2_BlockMatch_Half_16x16 OMXCATBAR(VCM4P2_BlockMatch_Half_16x16, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_BlockMatch_Half_8x8 OMXCATBAR(VCM4P2_BlockMatch_Half_8x8, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_BlockMatch_Integer_16x16 OMXCATBAR(VCM4P2_BlockMatch_Integer_16x16, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_BlockMatch_Integer_8x8 OMXCATBAR(VCM4P2_BlockMatch_Integer_8x8, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_DCT8x8blk OMXCATBAR(VCM4P2_DCT8x8blk, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_DecodeBlockCoef_Inter OMXCATBAR(VCM4P2_DecodeBlockCoef_Inter, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_DecodeBlockCoef_Intra OMXCATBAR(VCM4P2_DecodeBlockCoef_Intra, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_DecodePadMV_PVOP OMXCATBAR(VCM4P2_DecodePadMV_PVOP, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_DecodeVLCZigzag_Inter OMXCATBAR(VCM4P2_DecodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_DecodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_DecodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_EncodeMV OMXCATBAR(VCM4P2_EncodeMV, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_EncodeVLCZigzag_Inter OMXCATBAR(VCM4P2_EncodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_EncodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_EncodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_FindMVpred OMXCATBAR(VCM4P2_FindMVpred, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_IDCT8x8blk OMXCATBAR(VCM4P2_IDCT8x8blk, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_MCReconBlock OMXCATBAR(VCM4P2_MCReconBlock, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_MEGetBufSize OMXCATBAR(VCM4P2_MEGetBufSize, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_MEInit OMXCATBAR(VCM4P2_MEInit, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_MotionEstimationMB OMXCATBAR(VCM4P2_MotionEstimationMB, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_PredictReconCoefIntra OMXCATBAR(VCM4P2_PredictReconCoefIntra, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_QuantInter_I OMXCATBAR(VCM4P2_QuantInter_I, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_QuantIntra_I OMXCATBAR(VCM4P2_QuantIntra_I, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_QuantInvInter_I OMXCATBAR(VCM4P2_QuantInvInter_I, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_QuantInvIntra_I OMXCATBAR(VCM4P2_QuantInvIntra_I, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_TransRecBlockCoef_inter OMXCATBAR(VCM4P2_TransRecBlockCoef_inter, OMXVCM4P2_SUFFIX)
|
||||
#define omxVCM4P2_TransRecBlockCoef_intra OMXCATBAR(VCM4P2_TransRecBlockCoef_intra, OMXVCM4P2_SUFFIX)
|
||||
|
||||
#endif /* endif ARMOMX_ENABLE_RENAMING */
|
||||
#endif /* _armOMX_h_ */
|
254
media/openmax_dl/dl/api/omxtypes.h
Normal file
254
media/openmax_dl/dl/api/omxtypes.h
Normal file
@ -0,0 +1,254 @@
|
||||
/**
|
||||
* File: omxtypes.h
|
||||
* Brief: Defines basic Data types used in OpenMAX v1.0.2 header files.
|
||||
*
|
||||
* Copyright (c) 2005-2008,2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _OMXTYPES_H_
|
||||
#define _OMXTYPES_H_
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#define OMX_IN
|
||||
#define OMX_OUT
|
||||
#define OMX_INOUT
|
||||
|
||||
|
||||
typedef enum {
|
||||
|
||||
/* Mandatory return codes - use cases are explicitly described for each function */
|
||||
OMX_Sts_NoErr = 0, /* No error, the function completed successfully */
|
||||
OMX_Sts_Err = -2, /* Unknown/unspecified error */
|
||||
OMX_Sts_InvalidBitstreamValErr = -182, /* Invalid value detected during bitstream processing */
|
||||
OMX_Sts_MemAllocErr = -9, /* Not enough memory allocated for the operation */
|
||||
OMX_StsACAAC_GainCtrErr = -159, /* AAC: Unsupported gain control data detected */
|
||||
OMX_StsACAAC_PrgNumErr = -167, /* AAC: Invalid number of elements for one program */
|
||||
OMX_StsACAAC_CoefValErr = -163, /* AAC: Invalid quantized coefficient value */
|
||||
OMX_StsACAAC_MaxSfbErr = -162, /* AAC: Invalid maxSfb value in relation to numSwb */
|
||||
OMX_StsACAAC_PlsDataErr = -160, /* AAC: pulse escape sequence data error */
|
||||
|
||||
/* Optional return codes - use cases are explicitly described for each function*/
|
||||
OMX_Sts_BadArgErr = -5, /* Bad Arguments */
|
||||
|
||||
OMX_StsACAAC_TnsNumFiltErr = -157, /* AAC: Invalid number of TNS filters */
|
||||
OMX_StsACAAC_TnsLenErr = -156, /* AAC: Invalid TNS region length */
|
||||
OMX_StsACAAC_TnsOrderErr = -155, /* AAC: Invalid order of TNS filter */
|
||||
OMX_StsACAAC_TnsCoefResErr = -154, /* AAC: Invalid bit-resolution for TNS filter coefficients */
|
||||
OMX_StsACAAC_TnsCoefErr = -153, /* AAC: Invalid TNS filter coefficients */
|
||||
OMX_StsACAAC_TnsDirectErr = -152, /* AAC: Invalid TNS filter direction */
|
||||
|
||||
OMX_StsICJP_JPEGMarkerErr = -183, /* JPEG marker encountered within an entropy-coded block; */
|
||||
/* Huffman decoding operation terminated early. */
|
||||
OMX_StsICJP_JPEGMarker = -181, /* JPEG marker encountered; Huffman decoding */
|
||||
/* operation terminated early. */
|
||||
OMX_StsIPPP_ContextMatchErr = -17, /* Context parameter doesn't match to the operation */
|
||||
|
||||
OMX_StsSP_EvenMedianMaskSizeErr = -180, /* Even size of the Median Filter mask was replaced by the odd one */
|
||||
|
||||
OMX_Sts_MaximumEnumeration = INT_MAX /*Placeholder, forces enum of size OMX_INT*/
|
||||
|
||||
} OMXResult; /** Return value or error value returned from a function. Identical to OMX_INT */
|
||||
|
||||
|
||||
/* OMX_U8 */
|
||||
#if UCHAR_MAX == 0xff
|
||||
typedef unsigned char OMX_U8;
|
||||
#elif USHRT_MAX == 0xff
|
||||
typedef unsigned short int OMX_U8;
|
||||
#else
|
||||
#error OMX_U8 undefined
|
||||
#endif
|
||||
|
||||
|
||||
/* OMX_S8 */
|
||||
#if SCHAR_MAX == 0x7f
|
||||
typedef signed char OMX_S8;
|
||||
#elif SHRT_MAX == 0x7f
|
||||
typedef signed short int OMX_S8;
|
||||
#else
|
||||
#error OMX_S8 undefined
|
||||
#endif
|
||||
|
||||
|
||||
/* OMX_U16 */
|
||||
#if USHRT_MAX == 0xffff
|
||||
typedef unsigned short int OMX_U16;
|
||||
#elif UINT_MAX == 0xffff
|
||||
typedef unsigned int OMX_U16;
|
||||
#else
|
||||
#error OMX_U16 undefined
|
||||
#endif
|
||||
|
||||
|
||||
/* OMX_S16 */
|
||||
#if SHRT_MAX == 0x7fff
|
||||
typedef signed short int OMX_S16;
|
||||
#elif INT_MAX == 0x7fff
|
||||
typedef signed int OMX_S16;
|
||||
#else
|
||||
#error OMX_S16 undefined
|
||||
#endif
|
||||
|
||||
|
||||
/* OMX_U32 */
|
||||
#if UINT_MAX == 0xffffffff
|
||||
typedef unsigned int OMX_U32;
|
||||
#elif LONG_MAX == 0xffffffff
|
||||
typedef unsigned long int OMX_U32;
|
||||
#else
|
||||
#error OMX_U32 undefined
|
||||
#endif
|
||||
|
||||
|
||||
/* OMX_S32 */
|
||||
#if INT_MAX == 0x7fffffff
|
||||
typedef signed int OMX_S32;
|
||||
#elif LONG_MAX == 0x7fffffff
|
||||
typedef long signed int OMX_S32;
|
||||
#else
|
||||
#error OMX_S32 undefined
|
||||
#endif
|
||||
|
||||
|
||||
/* OMX_U64 & OMX_S64 */
|
||||
#if defined( _WIN32 ) || defined ( _WIN64 )
|
||||
typedef __int64 OMX_S64; /** Signed 64-bit integer */
|
||||
typedef unsigned __int64 OMX_U64; /** Unsigned 64-bit integer */
|
||||
#define OMX_MIN_S64 (0x8000000000000000i64)
|
||||
#define OMX_MIN_U64 (0x0000000000000000i64)
|
||||
#define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFi64)
|
||||
#define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFi64)
|
||||
#else
|
||||
typedef long long OMX_S64; /** Signed 64-bit integer */
|
||||
typedef unsigned long long OMX_U64; /** Unsigned 64-bit integer */
|
||||
#define OMX_MIN_S64 (0x8000000000000000LL)
|
||||
#define OMX_MIN_U64 (0x0000000000000000LL)
|
||||
#define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFLL)
|
||||
#define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFLL)
|
||||
#endif
|
||||
|
||||
|
||||
/* OMX_SC8 */
|
||||
typedef struct
|
||||
{
|
||||
OMX_S8 Re; /** Real part */
|
||||
OMX_S8 Im; /** Imaginary part */
|
||||
|
||||
} OMX_SC8; /** Signed 8-bit complex number */
|
||||
|
||||
|
||||
/* OMX_SC16 */
|
||||
typedef struct
|
||||
{
|
||||
OMX_S16 Re; /** Real part */
|
||||
OMX_S16 Im; /** Imaginary part */
|
||||
|
||||
} OMX_SC16; /** Signed 16-bit complex number */
|
||||
|
||||
|
||||
/* OMX_SC32 */
|
||||
typedef struct
|
||||
{
|
||||
OMX_S32 Re; /** Real part */
|
||||
OMX_S32 Im; /** Imaginary part */
|
||||
|
||||
} OMX_SC32; /** Signed 32-bit complex number */
|
||||
|
||||
|
||||
/* OMX_SC64 */
|
||||
typedef struct
|
||||
{
|
||||
OMX_S64 Re; /** Real part */
|
||||
OMX_S64 Im; /** Imaginary part */
|
||||
|
||||
} OMX_SC64; /** Signed 64-bit complex number */
|
||||
|
||||
|
||||
/* OMX_F32 */
|
||||
typedef float OMX_F32; /** Single precision floating point,IEEE 754 */
|
||||
|
||||
|
||||
/* OMX_F64 */
|
||||
typedef double OMX_F64; /** Double precision floating point,IEEE 754 */
|
||||
|
||||
|
||||
/* OMX_INT */
|
||||
typedef int OMX_INT; /** signed integer corresponding to machine word length, has maximum signed value INT_MAX*/
|
||||
|
||||
|
||||
#define OMX_MIN_S8 (-128)
|
||||
#define OMX_MIN_U8 0
|
||||
#define OMX_MIN_S16 (-32768)
|
||||
#define OMX_MIN_U16 0
|
||||
#define OMX_MIN_S32 (-2147483647-1)
|
||||
#define OMX_MIN_U32 0
|
||||
|
||||
#define OMX_MAX_S8 (127)
|
||||
#define OMX_MAX_U8 (255)
|
||||
#define OMX_MAX_S16 (32767)
|
||||
#define OMX_MAX_U16 (0xFFFF)
|
||||
#define OMX_MAX_S32 (2147483647)
|
||||
#define OMX_MAX_U32 (0xFFFFFFFF)
|
||||
|
||||
typedef void OMXVoid;
|
||||
|
||||
#ifndef NULL
|
||||
#define NULL ((void*)0)
|
||||
#endif
|
||||
|
||||
/** Defines the geometric position and size of a rectangle,
|
||||
* where x,y defines the coordinates of the top left corner
|
||||
* of the rectangle, with dimensions width in the x-direction
|
||||
* and height in the y-direction */
|
||||
typedef struct {
|
||||
OMX_INT x; /** x-coordinate of top left corner of rectangle */
|
||||
OMX_INT y; /** y-coordinate of top left corner of rectangle */
|
||||
OMX_INT width; /** Width in the x-direction. */
|
||||
OMX_INT height; /** Height in the y-direction. */
|
||||
}OMXRect;
|
||||
|
||||
|
||||
/** Defines the geometric position of a point, */
|
||||
typedef struct
|
||||
{
|
||||
OMX_INT x; /** x-coordinate */
|
||||
OMX_INT y; /** y-coordinate */
|
||||
|
||||
} OMXPoint;
|
||||
|
||||
|
||||
/** Defines the dimensions of a rectangle, or region of interest in an image */
|
||||
typedef struct
|
||||
{
|
||||
OMX_INT width; /** Width of the rectangle, in the x-direction */
|
||||
OMX_INT height; /** Height of the rectangle, in the y-direction */
|
||||
|
||||
} OMXSize;
|
||||
|
||||
#endif /* _OMXTYPES_H_ */
|
76
media/openmax_dl/dl/api/omxtypes_s.h
Normal file
76
media/openmax_dl/dl/api/omxtypes_s.h
Normal file
@ -0,0 +1,76 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// File Name: omxtypes_s.h
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 9622
|
||||
@// Last Modified Date: Wed, 06 Feb 2008
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
|
||||
@// Mandatory return codes - use cases are explicitly described for each function
|
||||
.equ OMX_Sts_NoErr, 0 @// No error the function completed successfully
|
||||
.equ OMX_Sts_Err, -2 @// Unknown/unspecified error
|
||||
.equ OMX_Sts_InvalidBitstreamValErr, -182 @// Invalid value detected during bitstream processing
|
||||
.equ OMX_Sts_MemAllocErr, -9 @// Not enough memory allocated for the operation
|
||||
.equ OMX_StsACAAC_GainCtrErr, -159 @// AAC: Unsupported gain control data detected
|
||||
.equ OMX_StsACAAC_PrgNumErr, -167 @// AAC: Invalid number of elements for one program
|
||||
.equ OMX_StsACAAC_CoefValErr, -163 @// AAC: Invalid quantized coefficient value
|
||||
.equ OMX_StsACAAC_MaxSfbErr, -162 @// AAC: Invalid maxSfb value in relation to numSwb
|
||||
.equ OMX_StsACAAC_PlsDataErr, -160 @// AAC: pulse escape sequence data error
|
||||
|
||||
@// Optional return codes - use cases are explicitly described for each function
|
||||
.equ OMX_Sts_BadArgErr, -5 @// Bad Arguments
|
||||
|
||||
.equ OMX_StsACAAC_TnsNumFiltErr, -157 @// AAC: Invalid number of TNS filters
|
||||
.equ OMX_StsACAAC_TnsLenErr, -156 @// AAC: Invalid TNS region length
|
||||
.equ OMX_StsACAAC_TnsOrderErr, -155 @// AAC: Invalid order of TNS filter
|
||||
.equ OMX_StsACAAC_TnsCoefResErr, -154 @// AAC: Invalid bit-resolution for TNS filter coefficients
|
||||
.equ OMX_StsACAAC_TnsCoefErr, -153 @// AAC: Invalid TNS filter coefficients
|
||||
.equ OMX_StsACAAC_TnsDirectErr, -152 @// AAC: Invalid TNS filter direction
|
||||
.equ OMX_StsICJP_JPEGMarkerErr, -183 @// JPEG marker encountered within an entropy-coded block;
|
||||
@// Huffman decoding operation terminated early.
|
||||
.equ OMX_StsICJP_JPEGMarker, -181 @// JPEG marker encountered; Huffman decoding
|
||||
@// operation terminated early.
|
||||
.equ OMX_StsIPPP_ContextMatchErr, -17 @// Context parameter doesn't match to the operation
|
||||
|
||||
.equ OMX_StsSP_EvenMedianMaskSizeErr, -180 @// Even size of the Median Filter mask was replaced by the odd one
|
||||
|
||||
.equ OMX_Sts_MaximumEnumeration, 0x7FFFFFFF
|
||||
|
||||
|
||||
|
||||
.equ OMX_MIN_S8, (-128)
|
||||
.equ OMX_MIN_U8, 0
|
||||
.equ OMX_MIN_S16, (-32768)
|
||||
.equ OMX_MIN_U16, 0
|
||||
|
||||
|
||||
.equ OMX_MIN_S32, (-2147483647-1)
|
||||
.equ OMX_MIN_U32, 0
|
||||
|
||||
.equ OMX_MAX_S8, (127)
|
||||
.equ OMX_MAX_U8, (255)
|
||||
.equ OMX_MAX_S16, (32767)
|
||||
.equ OMX_MAX_U16, (0xFFFF)
|
||||
.equ OMX_MAX_S32, (2147483647)
|
||||
.equ OMX_MAX_U32, (0xFFFFFFFF)
|
||||
|
||||
.equ OMX_VC_UPPER, 0x1 @// Used by the PredictIntra functions
|
||||
.equ OMX_VC_LEFT, 0x2 @// Used by the PredictIntra functions
|
||||
.equ OMX_VC_UPPER_RIGHT, 0x40 @// Used by the PredictIntra functions
|
||||
|
||||
.equ NULL, 0
|
92
media/openmax_dl/dl/sp/api/armSP.h
Normal file
92
media/openmax_dl/dl/sp/api/armSP.h
Normal file
@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*
|
||||
* This file was originally licensed as follows. It has been
|
||||
* relicensed with permission from the copyright holders.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* File Name: armSP.h
|
||||
* OpenMAX DL: v1.0.2
|
||||
* Last Modified Revision: 7014
|
||||
* Last Modified Date: Wed, 01 Aug 2007
|
||||
*
|
||||
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
*
|
||||
*
|
||||
*
|
||||
* File: armSP.h
|
||||
* Brief: Declares API's/Basic Data types used across the OpenMAX Signal Processing domain
|
||||
*
|
||||
*/
|
||||
#ifndef _armSP_H_
|
||||
#define _armSP_H_
|
||||
|
||||
#include "dl/api/omxtypes.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** FFT Specific declarations */
|
||||
extern OMX_S32 armSP_FFT_S32TwiddleTable[1026];
|
||||
extern OMX_F32 armSP_FFT_F32TwiddleTable[];
|
||||
|
||||
typedef struct ARMsFFTSpec_SC32_Tag
|
||||
{
|
||||
OMX_U32 N;
|
||||
OMX_U16 *pBitRev;
|
||||
OMX_SC32 *pTwiddle;
|
||||
OMX_SC32 *pBuf;
|
||||
}ARMsFFTSpec_SC32;
|
||||
|
||||
|
||||
typedef struct ARMsFFTSpec_SC16_Tag
|
||||
{
|
||||
OMX_U32 N;
|
||||
OMX_U16 *pBitRev;
|
||||
OMX_SC16 *pTwiddle;
|
||||
OMX_SC16 *pBuf;
|
||||
}ARMsFFTSpec_SC16;
|
||||
|
||||
typedef struct ARMsFFTSpec_R_SC32_Tag
|
||||
{
|
||||
OMX_U32 N;
|
||||
OMX_U16 *pBitRev;
|
||||
OMX_SC32 *pTwiddle;
|
||||
OMX_S32 *pBuf;
|
||||
}ARMsFFTSpec_R_SC32;
|
||||
|
||||
typedef struct ARMsFFTSpec_R_FC32_Tag
|
||||
{
|
||||
OMX_U32 N;
|
||||
OMX_U16* pBitRev;
|
||||
OMX_FC32* pTwiddle;
|
||||
OMX_F32* pBuf;
|
||||
} ARMsFFTSpec_R_FC32;
|
||||
|
||||
typedef struct ARMsFFTSpec_FC32_Tag
|
||||
{
|
||||
OMX_U32 N;
|
||||
OMX_U16* pBitRev;
|
||||
OMX_FC32* pTwiddle;
|
||||
OMX_FC32* pBuf;
|
||||
} ARMsFFTSpec_FC32;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/*End of File*/
|
||||
|
||||
|
||||
|
2031
media/openmax_dl/dl/sp/api/omxSP.h
Normal file
2031
media/openmax_dl/dl/sp/api/omxSP.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,294 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This is a modification of
|
||||
@// armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.s to support float
|
||||
@// instead of SC32.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute the "preTwiddleRadix2" stage prior to the call to the complexFFT
|
||||
@// It does a Z(k) = Feven(k) + jW^(-k) FOdd(k); k=0,1,2,...N/2-1 computation
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r1
|
||||
#define pFFTSpec r2
|
||||
#define scale r3
|
||||
|
||||
|
||||
@// Output registers
|
||||
#define result r0
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define argTwiddle r1
|
||||
#define argDst r2
|
||||
#define argScale r4
|
||||
#define tmpOrder r4
|
||||
#define pTwiddle r4
|
||||
#define pOut r5
|
||||
#define subFFTSize r7
|
||||
#define subFFTNum r6
|
||||
#define N r6
|
||||
#define order r14
|
||||
#define diff r9
|
||||
@// Total num of radix stages required to complete the FFT
|
||||
#define count r8
|
||||
#define x0r r4
|
||||
#define x0i r5
|
||||
#define diffMinusOne r2
|
||||
#define round r3
|
||||
|
||||
#define pOut1 r2
|
||||
#define size r7
|
||||
#define step r8
|
||||
#define step1 r9
|
||||
#define twStep r10
|
||||
#define pTwiddleTmp r11
|
||||
#define argTwiddle1 r12
|
||||
#define zero r14
|
||||
|
||||
@// Neon registers
|
||||
|
||||
#define dX0 D0.F32
|
||||
#define dShift D1.F32
|
||||
#define dX1 D1.F32
|
||||
#define dY0 D2.F32
|
||||
#define dY1 D3.F32
|
||||
#define dX0r D0.F32
|
||||
#define dX0i D1.F32
|
||||
#define dX1r D2.F32
|
||||
#define dX1i D3.F32
|
||||
#define dW0r D4.F32
|
||||
#define dW0i D5.F32
|
||||
#define dW1r D6.F32
|
||||
#define dW1i D7.F32
|
||||
#define dT0 D8.F32
|
||||
#define dT1 D9.F32
|
||||
#define dT2 D10.F32
|
||||
#define dT3 D11.F32
|
||||
#define qT0 D12.F32
|
||||
#define qT1 D14.F32
|
||||
#define qT2 D16.F32
|
||||
#define qT3 D18.F32
|
||||
#define dY0r D4.F32
|
||||
#define dY0i D5.F32
|
||||
#define dY1r D6.F32
|
||||
#define dY1i D7.F32
|
||||
|
||||
#define dY2 D4.F32
|
||||
#define dY3 D5.F32
|
||||
#define dW0 D6.F32
|
||||
#define dW1 D7.F32
|
||||
#define dW0Tmp D10.F32
|
||||
#define dW1Neg D11.F32
|
||||
|
||||
#define half D13.F32
|
||||
|
||||
@ Structure offsets for the FFTSpec
|
||||
.set ARMsFFTSpec_N, 0
|
||||
.set ARMsFFTSpec_pBitRev, 4
|
||||
.set ARMsFFTSpec_pTwiddle, 8
|
||||
.set ARMsFFTSpec_pBuf, 12
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse, name
|
||||
|
||||
@// Read the size from structure and take log
|
||||
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
|
||||
|
||||
@// Read other structure parameters
|
||||
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
|
||||
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
|
||||
|
||||
VMOV half, 0.5
|
||||
|
||||
|
||||
MOV size,N,ASR #1 @// preserve the contents of N
|
||||
MOV step,N,LSL #2 @// step = N/2 * 8 bytes
|
||||
|
||||
|
||||
@// Z(k) = 1/2 {[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]}
|
||||
@// Note: W^(k) is stored as negated value and also need to
|
||||
@// conjugate the values from the table
|
||||
|
||||
@// Z(0) : no need of twiddle multiply
|
||||
@// Z(0) = 1/2 { [F(0) + F'(N/2)] +j [F(0) - F'(N/2)] }
|
||||
|
||||
VLD1 dX0,[pSrc],step
|
||||
ADD pOut1,pOut,step @// pOut1 = pOut+ N/2*8 bytes
|
||||
|
||||
VLD1 dX1,[pSrc]!
|
||||
@// twStep = 3N/8 * 8 bytes pointing to W^1
|
||||
SUB twStep,step,size,LSL #1
|
||||
|
||||
MOV step1,size,LSL #2 @// step1 = N/4 * 8 = N/2*4 bytes
|
||||
SUB step1,step1,#8 @// (N/4-1)*8 bytes
|
||||
|
||||
VADD dY0,dX0,dX1 @// [b+d | a+c]
|
||||
VSUB dY1,dX0,dX1 @// [b-d | a-c]
|
||||
VMUL dY0, dY0, half[0]
|
||||
VMUL dY1, dY1, half[0]
|
||||
|
||||
@// dY0= [a-c | a+c] ;dY1= [b-d | b+d]
|
||||
VZIP dY0,dY1
|
||||
|
||||
VSUB dX0,dY0,dY1
|
||||
SUBS size,size,#2
|
||||
VADD dX1,dY0,dY1
|
||||
|
||||
SUB pSrc,pSrc,step
|
||||
|
||||
VST1 dX0[0],[pOut1]!
|
||||
ADD pTwiddleTmp,pTwiddle,#8 @// W^2
|
||||
VST1 dX1[1],[pOut1]!
|
||||
ADD argTwiddle1,pTwiddle,twStep @// W^1
|
||||
|
||||
|
||||
BLT decrementScale\name
|
||||
BEQ lastElement\name
|
||||
|
||||
|
||||
@// Z(k) = 1/2[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]
|
||||
@// Note: W^k is stored as negative values in the table and also
|
||||
@// need to conjugate the values from the table.
|
||||
@//
|
||||
@// Process 4 elements at a time. E.g: Z(1),Z(2) and Z(N/2-2),Z(N/2-1)
|
||||
@// since both of them require F(1),F(2) and F(N/2-2),F(N/2-1)
|
||||
|
||||
|
||||
SUB step,step,#24
|
||||
evenOddButterflyLoop\name :
|
||||
|
||||
|
||||
VLD1 dW0r,[argTwiddle1],step1
|
||||
VLD1 dW1r,[argTwiddle1]!
|
||||
|
||||
VLD2 {dX0r,dX0i},[pSrc],step
|
||||
SUB argTwiddle1,argTwiddle1,step1
|
||||
VLD2 {dX1r,dX1i},[pSrc]!
|
||||
|
||||
SUB step1,step1,#8 @// (N/4-2)*8 bytes
|
||||
VLD1 dW0i,[pTwiddleTmp],step1
|
||||
VLD1 dW1i,[pTwiddleTmp]!
|
||||
SUB pSrc,pSrc,step
|
||||
|
||||
SUB pTwiddleTmp,pTwiddleTmp,step1
|
||||
VREV64 dX1r,dX1r
|
||||
VREV64 dX1i,dX1i
|
||||
SUBS size,size,#4
|
||||
|
||||
|
||||
VSUB dT2,dX0r,dX1r @// a-c
|
||||
VADD dT3,dX0i,dX1i @// b+d
|
||||
VADD dT0,dX0r,dX1r @// a+c
|
||||
VSUB dT1,dX0i,dX1i @// b-d
|
||||
SUB step1,step1,#8
|
||||
|
||||
VMUL dT2, dT2, half[0]
|
||||
VMUL dT3, dT3, half[0]
|
||||
|
||||
VMUL dT0, dT0, half[0]
|
||||
VMUL dT1, dT1, half[0]
|
||||
|
||||
VZIP dW1r,dW1i
|
||||
VZIP dW0r,dW0i
|
||||
|
||||
|
||||
VMUL dX1r,dW1r,dT2
|
||||
VMUL dX1i,dW1r,dT3
|
||||
VMUL dX0r,dW0r,dT2
|
||||
VMUL dX0i,dW0r,dT3
|
||||
|
||||
VMLS dX1r,dW1i,dT3
|
||||
VMLA dX1i,dW1i,dT2
|
||||
|
||||
VMLA dX0r,dW0i,dT3
|
||||
VMLS dX0i,dW0i,dT2
|
||||
|
||||
|
||||
VADD dY1r,dT0,dX1i @// F(N/2 -1)
|
||||
VSUB dY1i,dX1r,dT1
|
||||
|
||||
VREV64 dY1r,dY1r
|
||||
VREV64 dY1i,dY1i
|
||||
|
||||
|
||||
VADD dY0r,dT0,dX0i @// F(1)
|
||||
VSUB dY0i,dT1,dX0r
|
||||
|
||||
|
||||
VST2 {dY0r,dY0i},[pOut1],step
|
||||
VST2 {dY1r,dY1i},[pOut1]!
|
||||
SUB pOut1,pOut1,step
|
||||
SUB step,step,#32 @// (N/2-4)*8 bytes
|
||||
|
||||
|
||||
BGT evenOddButterflyLoop\name
|
||||
|
||||
|
||||
@// set both the ptrs to the last element
|
||||
SUB pSrc,pSrc,#8
|
||||
SUB pOut1,pOut1,#8
|
||||
|
||||
@// Last element can be expanded as follows
|
||||
@// 1/2[Z(k) + Z'(k)] - j w^-k [Z(k) - Z'(k)] (since W^k is stored as
|
||||
@// -ve)
|
||||
@// 1/2[(a+jb) + (a-jb)] - j w^-k [(a+jb) - (a-jb)]
|
||||
@// 1/2[2a+j0] - j (c-jd) [0+j2b]
|
||||
@// (a+bc, -bd)
|
||||
@// Since (c,d) = (0,1) for the last element, result is just (a,-b)
|
||||
|
||||
lastElement\name :
|
||||
VLD1 dX0r,[pSrc]
|
||||
|
||||
VST1 dX0r[0],[pOut1]!
|
||||
VNEG dX0r,dX0r
|
||||
VST1 dX0r[1],[pOut1]
|
||||
|
||||
|
||||
|
||||
decrementScale\name :
|
||||
|
||||
.endm
|
||||
|
||||
M_START armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe,r4
|
||||
|
||||
FFTSTAGE "FALSE","TRUE",Inv
|
||||
M_END
|
||||
|
||||
.end
|
@ -0,0 +1,321 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// File Name: armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 7485
|
||||
@// Last Modified Date: Fri, 21 Sep 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute the "preTwiddleRadix2" stage prior to the call to the complexFFT
|
||||
@// It does a Z(k) = Feven(k) + jW^(-k) FOdd(k); k=0,1,2,...N/2-1 computation
|
||||
@// It implements both "scaled"(by 1/2) and "unsclaed" versions of the above formula
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r1
|
||||
#define pFFTSpec r2
|
||||
#define scale r3
|
||||
|
||||
|
||||
@// Output registers
|
||||
#define result r0
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define argTwiddle r1
|
||||
#define argDst r2
|
||||
#define argScale r4
|
||||
#define tmpOrder r4
|
||||
#define pTwiddle r4
|
||||
#define pOut r5
|
||||
#define subFFTSize r7
|
||||
#define subFFTNum r6
|
||||
#define N r6
|
||||
#define order r14
|
||||
#define diff r9
|
||||
#define count r8 @// Total num of radix stages required to comple the FFT
|
||||
#define x0r r4
|
||||
#define x0i r5
|
||||
#define diffMinusOne r2
|
||||
#define round r3
|
||||
|
||||
#define pOut1 r2
|
||||
#define size r7
|
||||
#define step r8
|
||||
#define step1 r9
|
||||
#define twStep r10
|
||||
#define pTwiddleTmp r11
|
||||
#define argTwiddle1 r12
|
||||
#define zero r14
|
||||
|
||||
@// Neon registers
|
||||
|
||||
#define dX0 D0.S32
|
||||
#define dShift D1.S32
|
||||
#define dX1 D1.S32
|
||||
#define dY0 D2.S32
|
||||
#define dY1 D3.S32
|
||||
#define dX0r D0.S32
|
||||
#define dX0i D1.S32
|
||||
#define dX1r D2.S32
|
||||
#define dX1i D3.S32
|
||||
#define dW0r D4.S32
|
||||
#define dW0i D5.S32
|
||||
#define dW1r D6.S32
|
||||
#define dW1i D7.S32
|
||||
#define dT0 D8.S32
|
||||
#define dT1 D9.S32
|
||||
#define dT2 D10.S32
|
||||
#define dT3 D11.S32
|
||||
#define qT0 Q6.S64
|
||||
#define qT1 Q7.S64
|
||||
#define qT2 Q8.S64
|
||||
#define qT3 Q9.S64
|
||||
#define dY0r D4.S32
|
||||
#define dY0i D5.S32
|
||||
#define dY1r D6.S32
|
||||
#define dY1i D7.S32
|
||||
|
||||
#define dY2 D4.S32
|
||||
#define dY3 D5.S32
|
||||
#define dW0 D6.S32
|
||||
#define dW1 D7.S32
|
||||
#define dW0Tmp D10.S32
|
||||
#define dW1Neg D11.S32
|
||||
|
||||
|
||||
@ Structure offsets for the FFTSpec
|
||||
.set ARMsFFTSpec_N, 0
|
||||
.set ARMsFFTSpec_pBitRev, 4
|
||||
.set ARMsFFTSpec_pTwiddle, 8
|
||||
.set ARMsFFTSpec_pBuf, 12
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse, name
|
||||
|
||||
@// Read the size from structure and take log
|
||||
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
|
||||
|
||||
@// Read other structure parameters
|
||||
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
|
||||
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
|
||||
|
||||
|
||||
|
||||
MOV size,N,ASR #1 @// preserve the contents of N
|
||||
MOV step,N,LSL #2 @// step = N/2 * 8 bytes
|
||||
|
||||
|
||||
@// Z(k) = 1/2 {[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]}
|
||||
@// Note: W^(k) is stored as negated value and also need to conjugate the values from the table
|
||||
|
||||
@// Z(0) : no need of twiddle multiply
|
||||
@// Z(0) = 1/2 { [F(0) + F'(N/2)] +j [F(0) - F'(N/2)] }
|
||||
|
||||
VLD1 dX0,[pSrc],step
|
||||
ADD pOut1,pOut,step @// pOut1 = pOut+ N/2*8 bytes
|
||||
|
||||
VLD1 dX1,[pSrc]!
|
||||
SUB twStep,step,size,LSL #1 @// twStep = 3N/8 * 8 bytes pointing to W^1
|
||||
|
||||
MOV step1,size,LSL #2 @// step1 = N/4 * 8 = N/2*4 bytes
|
||||
SUB step1,step1,#8 @// (N/4-1)*8 bytes
|
||||
|
||||
VHADD dY0,dX0,dX1 @// [b+d | a+c]
|
||||
VHSUB dY1,dX0,dX1 @// [b-d | a-c]
|
||||
VZIP dY0,dY1 @// dY0= [a-c | a+c] ;dY1= [b-d | b+d]
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
VHSUB dX0,dY0,dY1
|
||||
SUBS size,size,#2
|
||||
VHADD dX1,dY0,dY1
|
||||
.else
|
||||
VSUB dX0,dY0,dY1
|
||||
SUBS size,size,#2
|
||||
VADD dX1,dY0,dY1
|
||||
.endif
|
||||
|
||||
SUB pSrc,pSrc,step
|
||||
|
||||
VST1 dX0[0],[pOut1]!
|
||||
ADD pTwiddleTmp,pTwiddle,#8 @// W^2
|
||||
VST1 dX1[1],[pOut1]!
|
||||
ADD argTwiddle1,pTwiddle,twStep @// W^1
|
||||
|
||||
|
||||
BLT decrementScale\name
|
||||
BEQ lastElement\name
|
||||
|
||||
|
||||
@// Z(k) = 1/2[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]
|
||||
@// Note: W^k is stored as negative values in the table and also need to conjugate the values from the table
|
||||
@// Process 4 elements at a time. E.g: Z(1),Z(2) and Z(N/2-2),Z(N/2-1) since both of them
|
||||
@// require F(1),F(2) and F(N/2-2),F(N/2-1)
|
||||
|
||||
|
||||
SUB step,step,#24
|
||||
evenOddButterflyLoop\name :
|
||||
|
||||
|
||||
VLD1 dW0r,[argTwiddle1],step1
|
||||
VLD1 dW1r,[argTwiddle1]!
|
||||
|
||||
VLD2 {dX0r,dX0i},[pSrc],step
|
||||
SUB argTwiddle1,argTwiddle1,step1
|
||||
VLD2 {dX1r,dX1i},[pSrc]!
|
||||
|
||||
SUB step1,step1,#8 @// (N/4-2)*8 bytes
|
||||
VLD1 dW0i,[pTwiddleTmp],step1
|
||||
VLD1 dW1i,[pTwiddleTmp]!
|
||||
SUB pSrc,pSrc,step
|
||||
|
||||
SUB pTwiddleTmp,pTwiddleTmp,step1
|
||||
VREV64 dX1r,dX1r
|
||||
VREV64 dX1i,dX1i
|
||||
SUBS size,size,#4
|
||||
|
||||
|
||||
VHSUB dT2,dX0r,dX1r @// a-c
|
||||
VHADD dT3,dX0i,dX1i @// b+d
|
||||
SUB step1,step1,#8
|
||||
VHADD dT0,dX0r,dX1r @// a+c
|
||||
VHSUB dT1,dX0i,dX1i @// b-d
|
||||
|
||||
VZIP dW1r,dW1i
|
||||
VZIP dW0r,dW0i
|
||||
|
||||
|
||||
VMULL qT0,dW1r,dT2
|
||||
VMLSL qT0,dW1i,dT3
|
||||
VMULL qT1,dW1r,dT3
|
||||
VMLAL qT1,dW1i,dT2
|
||||
|
||||
VMULL qT2,dW0r,dT2
|
||||
VMLAL qT2,dW0i,dT3
|
||||
VMULL qT3,dW0r,dT3
|
||||
VMLSL qT3,dW0i,dT2
|
||||
|
||||
|
||||
VRSHRN dX1r,qT0,#31
|
||||
VRSHRN dX1i,qT1,#31
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
VHADD dY1r,dT0,dX1i @// F(N/2 -1)
|
||||
VHSUB dY1i,dX1r,dT1
|
||||
.else
|
||||
VADD dY1r,dT0,dX1i @// F(N/2 -1)
|
||||
VSUB dY1i,dX1r,dT1
|
||||
|
||||
.endif
|
||||
|
||||
|
||||
VREV64 dY1r,dY1r
|
||||
VREV64 dY1i,dY1i
|
||||
|
||||
|
||||
VRSHRN dX0r,qT2,#31
|
||||
VRSHRN dX0i,qT3,#31
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
VHADD dY0r,dT0,dX0i @// F(1)
|
||||
VHSUB dY0i,dT1,dX0r
|
||||
.else
|
||||
VADD dY0r,dT0,dX0i @// F(1)
|
||||
VSUB dY0i,dT1,dX0r
|
||||
.endif
|
||||
|
||||
|
||||
VST2 {dY0r,dY0i},[pOut1],step
|
||||
VST2 {dY1r,dY1i},[pOut1]!
|
||||
SUB pOut1,pOut1,step
|
||||
SUB step,step,#32 @// (N/2-4)*8 bytes
|
||||
|
||||
|
||||
BGT evenOddButterflyLoop\name
|
||||
|
||||
|
||||
SUB pSrc,pSrc,#8 @// set both the ptrs to the last element
|
||||
SUB pOut1,pOut1,#8
|
||||
|
||||
@// Last element can be expanded as follows
|
||||
@// 1/2[Z(k) + Z'(k)] - j w^-k [Z(k) - Z'(k)] (since W^k is stored as -ve)
|
||||
@// 1/2[(a+jb) + (a-jb)] - j w^-k [(a+jb) - (a-jb)]
|
||||
@// 1/2[2a+j0] - j (c-jd) [0+j2b]
|
||||
@// (a+bc, -bd)
|
||||
@// Since (c,d) = (0,1) for the last element, result is just (a,-b)
|
||||
|
||||
lastElement\name :
|
||||
VLD1 dX0r,[pSrc]
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
VSHR dX0r,dX0r,#1
|
||||
.endif
|
||||
|
||||
VST1 dX0r[0],[pOut1]!
|
||||
VNEG dX0r,dX0r
|
||||
VST1 dX0r[1],[pOut1]
|
||||
|
||||
|
||||
|
||||
decrementScale\name :
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
SUB scale,scale,#1
|
||||
.endif
|
||||
|
||||
.endm
|
||||
|
||||
M_START armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe,r4
|
||||
|
||||
FFTSTAGE "FALSE","TRUE",Inv
|
||||
M_END
|
||||
|
||||
M_START armSP_FFTInv_CCSToR_S32_Sfs_preTwiddleRadix2_unsafe,r4
|
||||
|
||||
FFTSTAGE "TRUE","TRUE",InvSfs
|
||||
M_END
|
||||
|
||||
|
||||
.end
|
@ -0,0 +1,134 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This is a modification of armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S
|
||||
@// to support float instead of SC32.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute the first stage of a Radix 2 DIT in-order out-of-place FFT
|
||||
@// stage for a N point complex signal.
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define pPingPongBuf r5
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define pointStep r3
|
||||
#define outPointStep r3
|
||||
#define grpSize r4
|
||||
#define setCount r4
|
||||
#define step r8
|
||||
#define dstStep r8
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dX0 D0.F32
|
||||
#define dX1 D1.F32
|
||||
#define dY0 D2.F32
|
||||
#define dY1 D3.F32
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse, name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
|
||||
@// update subFFTSize and subFFTNum into RN6 and RN7 for the next stage
|
||||
|
||||
|
||||
MOV subFFTSize,#2
|
||||
LSR grpSize,subFFTNum,#1
|
||||
MOV subFFTNum,grpSize
|
||||
|
||||
|
||||
@// pT0+1 increments pT0 by 8 bytes
|
||||
@// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes
|
||||
@// Note: outPointStep = pointStep for firststage
|
||||
@// Note: setCount = grpSize/2 (reuse the updated grpSize for setCount)
|
||||
|
||||
MOV pointStep,grpSize,LSL #3
|
||||
RSB step,pointStep,#8
|
||||
|
||||
|
||||
@// Loop on the sets for grp zero
|
||||
|
||||
grpZeroSetLoop\name :
|
||||
|
||||
VLD1 dX0,[pSrc],pointStep
|
||||
VLD1 dX1,[pSrc],step @// step = -pointStep + 8
|
||||
SUBS setCount,setCount,#1
|
||||
|
||||
VADD dY0,dX0,dX1
|
||||
VSUB dY1,dX0,dX1
|
||||
|
||||
VST1 dY0,[pDst],outPointStep
|
||||
@// dstStep = step = -pointStep + 8
|
||||
VST1 dY1,[pDst],dstStep
|
||||
|
||||
BGT grpZeroSetLoop\name
|
||||
|
||||
|
||||
@// reset pSrc to pDst for the next stage
|
||||
SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
|
||||
MOV pDst,pPingPongBuf
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",fwd
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",inv
|
||||
M_END
|
||||
|
||||
.end
|
@ -0,0 +1,153 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This is a modification of armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S
|
||||
@// to support float instead of SC32.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute the last stage of a Radix 2 DIT in-order out-of-place FFT
|
||||
@// stage for a N point complex signal.
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
|
||||
#define outPointStep r3
|
||||
#define grpCount r4
|
||||
#define dstStep r5
|
||||
#define pTmp r4
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dWr d0.f32
|
||||
#define dWi d1.f32
|
||||
#define dXr0 d2.f32
|
||||
#define dXi0 d3.f32
|
||||
#define dXr1 d4.f32
|
||||
#define dXi1 d5.f32
|
||||
#define dYr0 d6.f32
|
||||
#define dYi0 d7.f32
|
||||
#define dYr1 d8.f32
|
||||
#define dYi1 d9.f32
|
||||
#define qT0 d10.f32
|
||||
#define qT1 d12.f32
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse, name
|
||||
|
||||
|
||||
MOV outPointStep,subFFTSize,LSL #3
|
||||
@// Update grpCount and grpSize rightaway
|
||||
|
||||
MOV subFFTNum,#1 @//after the last stage
|
||||
LSL grpCount,subFFTSize,#1
|
||||
|
||||
@// update subFFTSize for the next stage
|
||||
MOV subFFTSize,grpCount
|
||||
|
||||
RSB dstStep,outPointStep,#16
|
||||
|
||||
|
||||
@// Loop on 2 grps at a time for the last stage
|
||||
|
||||
radix2lsGrpLoop\name :
|
||||
@ dWr = [pTwiddle[0].Re, pTwiddle[1].Re]
|
||||
@ dWi = [pTwiddle[0].Im, pTwiddle[1].Im]
|
||||
VLD2 {dWr,dWi},[pTwiddle :64]!
|
||||
|
||||
@ dXr0 = [pSrc[0].Re, pSrc[2].Re]
|
||||
@ dXi0 = [pSrc[0].Im, pSrc[2].Im]
|
||||
@ dXr1 = [pSrc[1].Re, pSrc[3].Re]
|
||||
@ dXi1 = [pSrc[1].Im, pSrc[3].Im]
|
||||
VLD4 {dXr0,dXi0,dXr1,dXi1},[pSrc :128]!
|
||||
SUBS grpCount,grpCount,#4 @// grpCount is multiplied by 2
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMUL qT0,dWr,dXr1
|
||||
VMLA qT0,dWi,dXi1 @// real part
|
||||
VMUL qT1,dWr,dXi1
|
||||
VMLS qT1,dWi,dXr1 @// imag part
|
||||
|
||||
.else
|
||||
|
||||
VMUL qT0,dWr,dXr1
|
||||
VMLS qT0,dWi,dXi1 @// real part
|
||||
VMUL qT1,dWr,dXi1
|
||||
VMLA qT1,dWi,dXr1 @// imag part
|
||||
|
||||
.endif
|
||||
|
||||
VSUB dYr0,dXr0,qT0
|
||||
VSUB dYi0,dXi0,qT1
|
||||
VADD dYr1,dXr0,qT0
|
||||
VADD dYi1,dXi0,qT1
|
||||
|
||||
VST2 {dYr0,dYi0},[pDst],outPointStep
|
||||
VST2 {dYr1,dYi1},[pDst],dstStep @// dstStep = step = -outPointStep + 16
|
||||
|
||||
BGT radix2lsGrpLoop\name
|
||||
|
||||
|
||||
@// Reset and Swap pSrc and pDst for the next stage
|
||||
MOV pTmp,pDst
|
||||
SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 4*size; pSrc -= 8*size bytes
|
||||
SUB pSrc,pTmp,outPointStep
|
||||
|
||||
@// Reset pTwiddle for the next stage
|
||||
SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 4*size bytes
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe,r4,""
|
||||
FFTSTAGE "FALSE","FALSE",fwd
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",inv
|
||||
M_END
|
||||
|
||||
.end
|
191
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_unsafe_s.S
Normal file
191
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix2_unsafe_s.S
Normal file
@ -0,0 +1,191 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This is a modification of armSP_FFT_CToC_SC32_Radix2_unsafe_s.s
|
||||
@// to support float instead of SC32.
|
||||
@//
|
||||
|
||||
@// Description:
|
||||
@// Compute a Radix 2 DIT in-order out-of-place FFT stage for an N point
|
||||
@// complex signal. This handles the general stage, not the first or last
|
||||
@// stage.
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define outPointStep r3
|
||||
#define pointStep r4
|
||||
#define grpCount r5
|
||||
#define setCount r8
|
||||
@//const RN 9
|
||||
#define step r10
|
||||
#define dstStep r11
|
||||
#define pTable r9
|
||||
#define pTmp r9
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dW D0.F32
|
||||
#define dX0 D2.F32
|
||||
#define dX1 D3.F32
|
||||
#define dX2 D4.F32
|
||||
#define dX3 D5.F32
|
||||
#define dY0 D6.F32
|
||||
#define dY1 D7.F32
|
||||
#define dY2 D8.F32
|
||||
#define dY3 D9.F32
|
||||
#define qT0 D10.F32
|
||||
#define qT1 D11.F32
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse, name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
|
||||
@// Update grpCount and grpSize rightaway inorder to reuse pGrpCount
|
||||
@// and pGrpSize regs
|
||||
|
||||
LSR subFFTNum,subFFTNum,#1 @//grpSize
|
||||
LSL grpCount,subFFTSize,#1
|
||||
|
||||
|
||||
@// pT0+1 increments pT0 by 8 bytes
|
||||
@// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes
|
||||
MOV pointStep,subFFTNum,LSL #2
|
||||
|
||||
@// update subFFTSize for the next stage
|
||||
MOV subFFTSize,grpCount
|
||||
|
||||
@// pOut0+1 increments pOut0 by 8 bytes
|
||||
@// pOut0+outPointStep == increment of 8*outPointStep bytes =
|
||||
@// 4*size bytes
|
||||
SMULBB outPointStep,grpCount,pointStep
|
||||
LSL pointStep,pointStep,#1
|
||||
|
||||
|
||||
RSB step,pointStep,#16
|
||||
RSB dstStep,outPointStep,#16
|
||||
|
||||
@// Loop on the groups
|
||||
|
||||
radix2GrpLoop\name :
|
||||
MOV setCount,pointStep,LSR #3
|
||||
VLD1 dW,[pTwiddle],pointStep @//[wi | wr]
|
||||
|
||||
|
||||
@// Loop on the sets
|
||||
|
||||
|
||||
radix2SetLoop\name :
|
||||
|
||||
|
||||
@// point0: dX0-real part dX1-img part
|
||||
VLD2 {dX0,dX1},[pSrc],pointStep
|
||||
@// point1: dX2-real part dX3-img part
|
||||
VLD2 {dX2,dX3},[pSrc],step
|
||||
|
||||
SUBS setCount,setCount,#2
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMUL qT0,dX2,dW[0]
|
||||
VMLA qT0,dX3,dW[1] @// real part
|
||||
VMUL qT1,dX3,dW[0]
|
||||
VMLS qT1,dX2,dW[1] @// imag part
|
||||
|
||||
.else
|
||||
|
||||
VMUL qT0,dX2,dW[0]
|
||||
VMLS qT0,dX3,dW[1] @// real part
|
||||
VMUL qT1,dX3,dW[0]
|
||||
VMLA qT1,dX2,dW[1] @// imag part
|
||||
|
||||
.endif
|
||||
|
||||
VSUB dY0,dX0,qT0
|
||||
VSUB dY1,dX1,qT1
|
||||
VADD dY2,dX0,qT0
|
||||
VADD dY3,dX1,qT1
|
||||
|
||||
VST2 {dY0,dY1},[pDst],outPointStep
|
||||
@// dstStep = -outPointStep + 16
|
||||
VST2 {dY2,dY3},[pDst],dstStep
|
||||
|
||||
BGT radix2SetLoop\name
|
||||
|
||||
SUBS grpCount,grpCount,#2
|
||||
ADD pSrc,pSrc,pointStep
|
||||
BGT radix2GrpLoop\name
|
||||
|
||||
|
||||
@// Reset and Swap pSrc and pDst for the next stage
|
||||
MOV pTmp,pDst
|
||||
@// pDst -= 4*size; pSrc -= 8*size bytes
|
||||
SUB pDst,pSrc,outPointStep,LSL #1
|
||||
SUB pSrc,pTmp,outPointStep
|
||||
|
||||
@// Reset pTwiddle for the next stage
|
||||
@// pTwiddle -= 4*size bytes
|
||||
SUB pTwiddle,pTwiddle,outPointStep
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",FWD
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",INV
|
||||
M_END
|
||||
|
||||
|
||||
.end
|
@ -0,0 +1,251 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@//
|
||||
@// This is a modification of armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.s
|
||||
@// to support float instead of SC32.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute a first stage Radix 4 FFT stage for a N point complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define pPingPongBuf r5
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define grpSize r3
|
||||
@// Reuse grpSize as setCount
|
||||
#define setCount r3
|
||||
#define pointStep r4
|
||||
#define outPointStep r4
|
||||
#define setStep r8
|
||||
#define step1 r9
|
||||
#define step3 r10
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dXr0 D0.F32
|
||||
#define dXi0 D1.F32
|
||||
#define dXr1 D2.F32
|
||||
#define dXi1 D3.F32
|
||||
#define dXr2 D4.F32
|
||||
#define dXi2 D5.F32
|
||||
#define dXr3 D6.F32
|
||||
#define dXi3 D7.F32
|
||||
#define dYr0 D8.F32
|
||||
#define dYi0 D9.F32
|
||||
#define dYr1 D10.F32
|
||||
#define dYi1 D11.F32
|
||||
#define dYr2 D12.F32
|
||||
#define dYi2 D13.F32
|
||||
#define dYr3 D14.F32
|
||||
#define dYi3 D15.F32
|
||||
#define qX0 Q0.F32
|
||||
#define qX1 Q1.F32
|
||||
#define qX2 Q2.F32
|
||||
#define qX3 Q3.F32
|
||||
#define qY0 Q4.F32
|
||||
#define qY1 Q5.F32
|
||||
#define qY2 Q6.F32
|
||||
#define qY3 Q7.F32
|
||||
#define dZr0 D16.F32
|
||||
#define dZi0 D17.F32
|
||||
#define dZr1 D18.F32
|
||||
#define dZi1 D19.F32
|
||||
#define dZr2 D20.F32
|
||||
#define dZi2 D21.F32
|
||||
#define dZr3 D22.F32
|
||||
#define dZi3 D23.F32
|
||||
#define qZ0 Q8.F32
|
||||
#define qZ1 Q9.F32
|
||||
#define qZ2 Q10.F32
|
||||
#define qZ3 Q11.F32
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse, name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// pT0+1 increments pT0 by 8 bytes
|
||||
@// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
|
||||
@// Note: outPointStep = pointStep for firststage
|
||||
|
||||
MOV pointStep,subFFTNum,LSL #1
|
||||
|
||||
|
||||
@// Update pSubFFTSize and pSubFFTNum regs
|
||||
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
|
||||
@// subFFTSize = 1 for the first stage
|
||||
MOV subFFTSize,#4
|
||||
|
||||
@// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
|
||||
LSR grpSize,subFFTNum,#2
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
|
||||
MOV subFFTNum,grpSize
|
||||
|
||||
|
||||
@// Calculate the step of input data for the next set
|
||||
@//MOV setStep,pointStep,LSL #1
|
||||
MOV setStep,grpSize,LSL #4
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
|
||||
@// setStep = 3*pointStep
|
||||
ADD setStep,setStep,pointStep
|
||||
@// setStep = - 3*pointStep+16
|
||||
RSB setStep,setStep,#16
|
||||
|
||||
@// data[3] & update pSrc for the next set
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],setStep
|
||||
@// step1 = 2*pointStep
|
||||
MOV step1,pointStep,LSL #1
|
||||
|
||||
VADD qY0,qX0,qX2
|
||||
|
||||
@// step3 = -pointStep
|
||||
RSB step3,pointStep,#0
|
||||
|
||||
@// grp = 0 a special case since all the twiddle factors are 1
|
||||
@// Loop on the sets : 2 sets at a time
|
||||
|
||||
radix4fsGrpZeroSetLoop\name :
|
||||
|
||||
|
||||
|
||||
@// Decrement setcount
|
||||
SUBS setCount,setCount,#2
|
||||
|
||||
|
||||
@// finish first stage of 4 point FFT
|
||||
|
||||
|
||||
VSUB qY2,qX0,qX2
|
||||
|
||||
VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
|
||||
VADD qY1,qX1,qX3
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],step3 @// data[2]
|
||||
VSUB qY3,qX1,qX3
|
||||
|
||||
|
||||
@// finish second stage of 4 point FFT
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
|
||||
VADD qZ0,qY0,qY1
|
||||
|
||||
@// data[3] & update pSrc for the next set, but not if it's the
|
||||
@// last iteration so that we don't read past the end of the
|
||||
@// input array.
|
||||
BEQ radix4SkipLastUpdateInv\name
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],setStep
|
||||
radix4SkipLastUpdateInv\name:
|
||||
VSUB dZr3,dYr2,dYi3
|
||||
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VADD dZi3,dYi2,dYr3
|
||||
|
||||
VSUB qZ1,qY0,qY1
|
||||
VST2 {dZr3,dZi3},[pDst :128],outPointStep
|
||||
|
||||
VADD dZr2,dYr2,dYi3
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
VSUB dZi2,dYi2,dYr3
|
||||
|
||||
VADD qY0,qX0,qX2 @// u0 for next iteration
|
||||
VST2 {dZr2,dZi2},[pDst :128],setStep
|
||||
|
||||
|
||||
.else
|
||||
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
|
||||
VADD qZ0,qY0,qY1
|
||||
|
||||
@// data[3] & update pSrc for the next set, but not if it's the
|
||||
@// last iteration so that we don't read past the end of the
|
||||
@// input array.
|
||||
BEQ radix4SkipLastUpdateFwd\name
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],setStep
|
||||
radix4SkipLastUpdateFwd\name:
|
||||
VADD dZr2,dYr2,dYi3
|
||||
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VSUB dZi2,dYi2,dYr3
|
||||
|
||||
VSUB qZ1,qY0,qY1
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
|
||||
VSUB dZr3,dYr2,dYi3
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
VADD dZi3,dYi2,dYr3
|
||||
|
||||
VADD qY0,qX0,qX2 @// u0 for next iteration
|
||||
VST2 {dZr3,dZi3},[pDst :128],setStep
|
||||
|
||||
.endif
|
||||
|
||||
BGT radix4fsGrpZeroSetLoop\name
|
||||
|
||||
@// reset pSrc to pDst for the next stage
|
||||
SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
|
||||
MOV pDst,pPingPongBuf
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",fwd
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",inv
|
||||
M_END
|
||||
|
||||
|
||||
.end
|
@ -0,0 +1,339 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This is a modification of armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.s
|
||||
@// to support float instead of SC32.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute a Radix 4 FFT stage for a N point complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
@//IMPORT armAAC_constTable
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define outPointStep r3
|
||||
#define grpCount r4
|
||||
#define dstStep r5
|
||||
#define grpTwStep r8
|
||||
#define stepTwiddle r9
|
||||
#define twStep r10
|
||||
#define pTmp r4
|
||||
#define step16 r11
|
||||
#define step24 r12
|
||||
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dButterfly1Real02 D0.F32
|
||||
#define dButterfly1Imag02 D1.F32
|
||||
#define dButterfly1Real13 D2.F32
|
||||
#define dButterfly1Imag13 D3.F32
|
||||
#define dButterfly2Real02 D4.F32
|
||||
#define dButterfly2Imag02 D5.F32
|
||||
#define dButterfly2Real13 D6.F32
|
||||
#define dButterfly2Imag13 D7.F32
|
||||
#define dXr0 D0.F32
|
||||
#define dXi0 D1.F32
|
||||
#define dXr1 D2.F32
|
||||
#define dXi1 D3.F32
|
||||
#define dXr2 D4.F32
|
||||
#define dXi2 D5.F32
|
||||
#define dXr3 D6.F32
|
||||
#define dXi3 D7.F32
|
||||
|
||||
#define dYr0 D16.F32
|
||||
#define dYi0 D17.F32
|
||||
#define dYr1 D18.F32
|
||||
#define dYi1 D19.F32
|
||||
#define dYr2 D20.F32
|
||||
#define dYi2 D21.F32
|
||||
#define dYr3 D22.F32
|
||||
#define dYi3 D23.F32
|
||||
|
||||
#define dW1r D8.F32
|
||||
#define dW1i D9.F32
|
||||
#define dW2r D10.F32
|
||||
#define dW2i D11.F32
|
||||
#define dW3r D12.F32
|
||||
#define dW3i D13.F32
|
||||
#define qT0 d14.f32
|
||||
#define qT1 d16.F32
|
||||
#define qT2 d18.F32
|
||||
#define qT3 d20.f32
|
||||
#define qT4 d22.f32
|
||||
#define qT5 d24.f32
|
||||
|
||||
#define dZr0 D14.F32
|
||||
#define dZi0 D15.F32
|
||||
#define dZr1 D26.F32
|
||||
#define dZi1 D27.F32
|
||||
#define dZr2 D28.F32
|
||||
#define dZi2 D29.F32
|
||||
#define dZr3 D30.F32
|
||||
#define dZi3 D31.F32
|
||||
|
||||
#define qX0 Q0.F32
|
||||
#define qY0 Q8.F32
|
||||
#define qY1 Q9.F32
|
||||
#define qY2 Q10.F32
|
||||
#define qY3 Q11.F32
|
||||
#define qZ0 Q7.F32
|
||||
#define qZ1 Q13.F32
|
||||
#define qZ2 Q14.F32
|
||||
#define qZ3 Q15.F32
|
||||
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse , name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
|
||||
@// pOut0+1 increments pOut0 by 8 bytes
|
||||
@// pOut0+outPointStep == increment of 8*outPointStep bytes
|
||||
MOV outPointStep,subFFTSize,LSL #3
|
||||
|
||||
@// Update grpCount and grpSize rightaway
|
||||
|
||||
VLD2 {dW1r,dW1i},[pTwiddle :128] @// [wi|wr]
|
||||
MOV step16,#16
|
||||
LSL grpCount,subFFTSize,#2
|
||||
|
||||
VLD1 dW2r,[pTwiddle :64] @// [wi|wr]
|
||||
MOV subFFTNum,#1 @//after the last stage
|
||||
|
||||
VLD1 dW3r,[pTwiddle :64],step16 @// [wi|wr]
|
||||
MOV stepTwiddle,#0
|
||||
|
||||
VLD1 dW2i,[pTwiddle :64]! @// [wi|wr]
|
||||
SUB grpTwStep,stepTwiddle,#8 @// grpTwStep = -8 to start with
|
||||
|
||||
@// update subFFTSize for the next stage
|
||||
MOV subFFTSize,grpCount
|
||||
VLD1 dW3i,[pTwiddle :64],grpTwStep @// [wi|wr]
|
||||
MOV dstStep,outPointStep,LSL #1
|
||||
|
||||
@// AC.r AC.i BD.r BD.i
|
||||
VLD4 {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]!
|
||||
ADD dstStep,dstStep,outPointStep @// dstStep = 3*outPointStep
|
||||
RSB dstStep,dstStep,#16 @// dstStep = - 3*outPointStep+16
|
||||
MOV step24,#24
|
||||
|
||||
@// AC.r AC.i BD.r BD.i
|
||||
VLD4 {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]!
|
||||
|
||||
|
||||
@// Process two groups at a time
|
||||
|
||||
radix4lsGrpLoop\name :
|
||||
|
||||
VZIP dW2r,dW2i
|
||||
ADD stepTwiddle,stepTwiddle,#16
|
||||
VZIP dW3r,dW3i
|
||||
ADD grpTwStep,stepTwiddle,#4
|
||||
VUZP dButterfly1Real13, dButterfly2Real13 @// B.r D.r
|
||||
SUB twStep,stepTwiddle,#16 @// -16+stepTwiddle
|
||||
VUZP dButterfly1Imag13, dButterfly2Imag13 @// B.i D.i
|
||||
MOV grpTwStep,grpTwStep,LSL #1
|
||||
VUZP dButterfly1Real02, dButterfly2Real02 @// A.r C.r
|
||||
RSB grpTwStep,grpTwStep,#0 @// -8-2*stepTwiddle
|
||||
|
||||
|
||||
VUZP dButterfly1Imag02, dButterfly2Imag02 @// A.i C.i
|
||||
|
||||
|
||||
@// grpCount is multiplied by 4
|
||||
SUBS grpCount,grpCount,#8
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMUL dZr1,dW1r,dXr1
|
||||
VMLA dZr1,dW1i,dXi1 @// real part
|
||||
VMUL dZi1,dW1r,dXi1
|
||||
VMLS dZi1,dW1i,dXr1 @// imag part
|
||||
|
||||
.else
|
||||
|
||||
VMUL dZr1,dW1r,dXr1
|
||||
VMLS dZr1,dW1i,dXi1 @// real part
|
||||
VMUL dZi1,dW1r,dXi1
|
||||
VMLA dZi1,dW1i,dXr1 @// imag part
|
||||
|
||||
.endif
|
||||
|
||||
VLD2 {dW1r,dW1i},[pTwiddle :128],stepTwiddle @// [wi|wr]
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMUL dZr2,dW2r,dXr2
|
||||
VMLA dZr2,dW2i,dXi2 @// real part
|
||||
VMUL dZi2,dW2r,dXi2
|
||||
VLD1 dW2r,[pTwiddle :64],step16 @// [wi|wr]
|
||||
VMLS dZi2,dW2i,dXr2 @// imag part
|
||||
|
||||
.else
|
||||
|
||||
VMUL dZr2,dW2r,dXr2
|
||||
VMLS dZr2,dW2i,dXi2 @// real part
|
||||
VMUL dZi2,dW2r,dXi2
|
||||
VLD1 dW2r,[pTwiddle :64],step16 @// [wi|wr]
|
||||
VMLA dZi2,dW2i,dXr2 @// imag part
|
||||
|
||||
.endif
|
||||
|
||||
|
||||
VLD1 dW2i,[pTwiddle :64],twStep @// [wi|wr]
|
||||
|
||||
@// move qX0 so as to load for the next iteration
|
||||
VMOV qZ0,qX0
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMUL dZr3,dW3r,dXr3
|
||||
VMLA dZr3,dW3i,dXi3 @// real part
|
||||
VMUL dZi3,dW3r,dXi3
|
||||
VLD1 dW3r,[pTwiddle :64],step24
|
||||
VMLS dZi3,dW3i,dXr3 @// imag part
|
||||
|
||||
.else
|
||||
|
||||
VMUL dZr3,dW3r,dXr3
|
||||
VMLS dZr3,dW3i,dXi3 @// real part
|
||||
VMUL dZi3,dW3r,dXi3
|
||||
VLD1 dW3r,[pTwiddle :64],step24
|
||||
VMLA dZi3,dW3i,dXr3 @// imag part
|
||||
|
||||
.endif
|
||||
|
||||
VLD1 dW3i,[pTwiddle :64],grpTwStep @// [wi|wr]
|
||||
|
||||
@// Don't do the load on the last iteration so we don't read past the end
|
||||
@// of pSrc.
|
||||
addeq pSrc, pSrc, #64
|
||||
beq radix4lsSkipRead\name
|
||||
@// AC.r AC.i BD.r BD.i
|
||||
VLD4 {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]!
|
||||
|
||||
@// AC.r AC.i BD.r BD.i
|
||||
VLD4 {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]!
|
||||
radix4lsSkipRead\name:
|
||||
|
||||
@// finish first stage of 4 point FFT
|
||||
|
||||
VADD qY0,qZ0,qZ2
|
||||
VSUB qY2,qZ0,qZ2
|
||||
VADD qY1,qZ1,qZ3
|
||||
VSUB qY3,qZ1,qZ3
|
||||
|
||||
|
||||
@// finish second stage of 4 point FFT
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VSUB qZ0,qY2,qY1
|
||||
|
||||
VADD dZr3,dYr0,dYi3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VSUB dZi3,dYi0,dYr3
|
||||
|
||||
VADD qZ2,qY2,qY1
|
||||
VST2 {dZr3,dZi3},[pDst :128],outPointStep
|
||||
|
||||
VSUB dZr1,dYr0,dYi3
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VADD dZi1,dYi0,dYr3
|
||||
|
||||
@// dstStep = -outPointStep + 16
|
||||
VST2 {dZr1,dZi1},[pDst :128],dstStep
|
||||
|
||||
|
||||
.else
|
||||
|
||||
VSUB qZ0,qY2,qY1
|
||||
|
||||
VSUB dZr1,dYr0,dYi3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VADD dZi1,dYi0,dYr3
|
||||
|
||||
VADD qZ2,qY2,qY1
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
|
||||
VADD dZr3,dYr0,dYi3
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VSUB dZi3,dYi0,dYr3
|
||||
|
||||
@// dstStep = -outPointStep + 16
|
||||
VST2 {dZr3,dZi3},[pDst :128],dstStep
|
||||
|
||||
|
||||
.endif
|
||||
|
||||
BGT radix4lsGrpLoop\name
|
||||
|
||||
|
||||
@// Reset and Swap pSrc and pDst for the next stage
|
||||
MOV pTmp,pDst
|
||||
@// Extra increment done in final iteration of the loop
|
||||
SUB pSrc,pSrc,#64
|
||||
@// pDst -= 4*size; pSrc -= 8*size bytes
|
||||
SUB pDst,pSrc,outPointStep,LSL #2
|
||||
SUB pSrc,pTmp,outPointStep
|
||||
SUB pTwiddle,pTwiddle,subFFTSize,LSL #1
|
||||
@// Extra increment done in final iteration of the loop
|
||||
SUB pTwiddle,pTwiddle,#16
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_FC32_Radix4_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",fwd
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",inv
|
||||
M_END
|
||||
|
||||
|
||||
.end
|
331
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_unsafe_s.S
Normal file
331
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_FC32_Radix4_unsafe_s.S
Normal file
@ -0,0 +1,331 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@//
|
||||
@// This is a modification of armSP_FFT_CToC_SC32_Radix4_unsafe_s.s
|
||||
@// to support float instead of SC32.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute a Radix 4 FFT stage for a N point complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define grpCount r3
|
||||
#define pointStep r4
|
||||
#define outPointStep r5
|
||||
#define stepTwiddle r12
|
||||
#define setCount r14
|
||||
#define srcStep r8
|
||||
#define setStep r9
|
||||
#define dstStep r10
|
||||
#define twStep r11
|
||||
#define t1 r3
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dW1 D0.F32
|
||||
#define dW2 D1.F32
|
||||
#define dW3 D2.F32
|
||||
|
||||
#define dXr0 D4.F32
|
||||
#define dXi0 D5.F32
|
||||
#define dXr1 D6.F32
|
||||
#define dXi1 D7.F32
|
||||
#define dXr2 D8.F32
|
||||
#define dXi2 D9.F32
|
||||
#define dXr3 D10.F32
|
||||
#define dXi3 D11.F32
|
||||
#define dYr0 D12.F32
|
||||
#define dYi0 D13.F32
|
||||
#define dYr1 D14.F32
|
||||
#define dYi1 D15.F32
|
||||
#define dYr2 D16.F32
|
||||
#define dYi2 D17.F32
|
||||
#define dYr3 D18.F32
|
||||
#define dYi3 D19.F32
|
||||
#define qT0 d16.f32
|
||||
#define qT1 d18.f32
|
||||
#define qT2 d12.f32
|
||||
#define qT3 d14.f32
|
||||
#define dZr0 D20.F32
|
||||
#define dZi0 D21.F32
|
||||
#define dZr1 D22.F32
|
||||
#define dZi1 D23.F32
|
||||
#define dZr2 D24.F32
|
||||
#define dZi2 D25.F32
|
||||
#define dZr3 D26.F32
|
||||
#define dZi3 D27.F32
|
||||
|
||||
#define qY0 Q6.F32
|
||||
#define qY1 Q7.F32
|
||||
#define qY2 Q8.F32
|
||||
#define qY3 Q9.F32
|
||||
#define qX0 Q2.F32
|
||||
#define qZ0 Q10.F32
|
||||
#define qZ1 Q11.F32
|
||||
#define qZ2 Q12.F32
|
||||
#define qZ3 Q13.F32
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse , name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
|
||||
@// Update grpCount and grpSize rightaway inorder to reuse
|
||||
@// pGrpCount and pGrpSize regs
|
||||
|
||||
LSL grpCount,subFFTSize,#2
|
||||
LSR subFFTNum,subFFTNum,#2
|
||||
MOV subFFTSize,grpCount
|
||||
|
||||
VLD1 dW1,[pTwiddle] @//[wi | wr]
|
||||
@// pT0+1 increments pT0 by 8 bytes
|
||||
@// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
|
||||
MOV pointStep,subFFTNum,LSL #1
|
||||
|
||||
|
||||
@// pOut0+1 increments pOut0 by 8 bytes
|
||||
@// pOut0+outPointStep == increment of 8*outPointStep bytes
|
||||
@// = 2*size bytes
|
||||
|
||||
MOV stepTwiddle,#0
|
||||
VLD1 dW2,[pTwiddle] @//[wi | wr]
|
||||
SMULBB outPointStep,grpCount,pointStep
|
||||
LSL pointStep,pointStep,#2 @// 2*grpSize
|
||||
|
||||
VLD1 dW3,[pTwiddle] @//[wi | wr]
|
||||
MOV srcStep,pointStep,LSL #1 @// srcStep = 2*pointStep
|
||||
ADD setStep,srcStep,pointStep @// setStep = 3*pointStep
|
||||
|
||||
RSB setStep,setStep,#0 @// setStep = - 3*pointStep
|
||||
SUB srcStep,srcStep,#16 @// srcStep = 2*pointStep-16
|
||||
|
||||
MOV dstStep,outPointStep,LSL #1
|
||||
ADD dstStep,dstStep,outPointStep @// dstStep = 3*outPointStep
|
||||
@// dstStep = - 3*outPointStep+16
|
||||
RSB dstStep,dstStep,#16
|
||||
|
||||
|
||||
|
||||
radix4GrpLoop\name :
|
||||
|
||||
VLD2 {dXr0,dXi0},[pSrc],pointStep @// data[0]
|
||||
ADD stepTwiddle,stepTwiddle,pointStep
|
||||
VLD2 {dXr1,dXi1},[pSrc],pointStep @// data[1]
|
||||
@// set pTwiddle to the first point
|
||||
ADD pTwiddle,pTwiddle,stepTwiddle
|
||||
VLD2 {dXr2,dXi2},[pSrc],pointStep @// data[2]
|
||||
MOV twStep,stepTwiddle,LSL #2
|
||||
|
||||
@// data[3] & update pSrc for the next set
|
||||
VLD2 {dXr3,dXi3},[pSrc],setStep
|
||||
SUB twStep,stepTwiddle,twStep @// twStep = -3*stepTwiddle
|
||||
|
||||
MOV setCount,pointStep,LSR #3
|
||||
@// set pSrc to data[0] of the next set
|
||||
ADD pSrc,pSrc,#16
|
||||
@// increment to data[1] of the next set
|
||||
ADD pSrc,pSrc,pointStep
|
||||
|
||||
|
||||
@// Loop on the sets
|
||||
|
||||
radix4SetLoop\name :
|
||||
|
||||
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMUL dZr1,dXr1,dW1[0]
|
||||
VMUL dZi1,dXi1,dW1[0]
|
||||
VMUL dZr2,dXr2,dW2[0]
|
||||
VMUL dZi2,dXi2,dW2[0]
|
||||
VMUL dZr3,dXr3,dW3[0]
|
||||
VMUL dZi3,dXi3,dW3[0]
|
||||
|
||||
VMLA dZr1,dXi1,dW1[1] @// real part
|
||||
VMLS dZi1,dXr1,dW1[1] @// imag part
|
||||
|
||||
@// data[1] for next iteration
|
||||
VLD2 {dXr1,dXi1},[pSrc],pointStep
|
||||
|
||||
VMLA dZr2,dXi2,dW2[1] @// real part
|
||||
VMLS dZi2,dXr2,dW2[1] @// imag part
|
||||
|
||||
@// data[2] for next iteration
|
||||
VLD2 {dXr2,dXi2},[pSrc],pointStep
|
||||
|
||||
VMLA dZr3,dXi3,dW3[1] @// real part
|
||||
VMLS dZi3,dXr3,dW3[1] @// imag part
|
||||
.else
|
||||
VMUL dZr1,dXr1,dW1[0]
|
||||
VMUL dZi1,dXi1,dW1[0]
|
||||
VMUL dZr2,dXr2,dW2[0]
|
||||
VMUL dZi2,dXi2,dW2[0]
|
||||
VMUL dZr3,dXr3,dW3[0]
|
||||
VMUL dZi3,dXi3,dW3[0]
|
||||
|
||||
VMLS dZr1,dXi1,dW1[1] @// real part
|
||||
VMLA dZi1,dXr1,dW1[1] @// imag part
|
||||
|
||||
@// data[1] for next iteration
|
||||
VLD2 {dXr1,dXi1},[pSrc],pointStep
|
||||
|
||||
VMLS dZr2,dXi2,dW2[1] @// real part
|
||||
VMLA dZi2,dXr2,dW2[1] @// imag part
|
||||
|
||||
@// data[2] for next iteration
|
||||
VLD2 {dXr2,dXi2},[pSrc],pointStep
|
||||
|
||||
VMLS dZr3,dXi3,dW3[1] @// real part
|
||||
VMLA dZi3,dXr3,dW3[1] @// imag part
|
||||
.endif
|
||||
|
||||
@// data[3] & update pSrc to data[0]
|
||||
@// But don't read on the very last iteration because that reads past
|
||||
@// the end of pSrc. The last iteration is grpCount = 4, setCount = 2.
|
||||
cmp grpCount, #4
|
||||
cmpeq setCount, #2 @// Test setCount if grpCount = 4
|
||||
@// These are executed only if both grpCount = 4 and setCount = 2
|
||||
addeq pSrc, pSrc, setStep
|
||||
beq radix4SkipRead\name
|
||||
VLD2 {dXr3,dXi3},[pSrc],setStep
|
||||
radix4SkipRead\name:
|
||||
SUBS setCount,setCount,#2
|
||||
|
||||
@// finish first stage of 4 point FFT
|
||||
VADD qY0,qX0,qZ2
|
||||
VSUB qY2,qX0,qZ2
|
||||
|
||||
@// data[0] for next iteration
|
||||
VLD2 {dXr0,dXi0},[pSrc :128]!
|
||||
VADD qY1,qZ1,qZ3
|
||||
VSUB qY3,qZ1,qZ3
|
||||
|
||||
@// finish second stage of 4 point FFT
|
||||
|
||||
VSUB qZ0,qY2,qY1
|
||||
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VADD dZr3,dYr0,dYi3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VSUB dZi3,dYi0,dYr3
|
||||
|
||||
VADD qZ2,qY2,qY1
|
||||
VST2 {dZr3,dZi3},[pDst :128],outPointStep
|
||||
|
||||
VSUB dZr1,dYr0,dYi3
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VADD dZi1,dYi0,dYr3
|
||||
|
||||
VST2 {dZr1,dZi1},[pDst :128],dstStep
|
||||
|
||||
|
||||
.else
|
||||
|
||||
VSUB dZr1,dYr0,dYi3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VADD dZi1,dYi0,dYr3
|
||||
|
||||
VADD qZ2,qY2,qY1
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
|
||||
VADD dZr3,dYr0,dYi3
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VSUB dZi3,dYi0,dYr3
|
||||
|
||||
VST2 {dZr3,dZi3},[pDst :128],dstStep
|
||||
|
||||
|
||||
.endif
|
||||
|
||||
@// increment to data[1] of the next set
|
||||
ADD pSrc,pSrc,pointStep
|
||||
BGT radix4SetLoop\name
|
||||
|
||||
|
||||
VLD1 dW1,[pTwiddle :64],stepTwiddle @//[wi | wr]
|
||||
@// subtract 4 since grpCount multiplied by 4
|
||||
SUBS grpCount,grpCount,#4
|
||||
VLD1 dW2,[pTwiddle :64],stepTwiddle @//[wi | wr]
|
||||
@// increment pSrc for the next grp
|
||||
ADD pSrc,pSrc,srcStep
|
||||
VLD1 dW3,[pTwiddle :64],twStep @//[wi | wr]
|
||||
BGT radix4GrpLoop\name
|
||||
|
||||
|
||||
@// Reset and Swap pSrc and pDst for the next stage
|
||||
MOV t1,pDst
|
||||
@// pDst -= 2*size; pSrc -= 8*size bytes
|
||||
SUB pDst,pSrc,outPointStep,LSL #2
|
||||
SUB pSrc,t1,outPointStep
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",FWD
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",INV
|
||||
M_END
|
||||
|
||||
|
||||
.end
|
@ -0,0 +1,426 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This is a modification of armSP_FFT_CToC_FC32_Radix8_fs_unsafe_s.s
|
||||
@// to support float instead of SC32.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute a first stage Radix 8 FFT stage for a N point complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
@// dest buffer for the next stage (not pSrc for first stage)
|
||||
#define pPingPongBuf r5
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define grpSize r3
|
||||
@// Reuse grpSize as setCount
|
||||
#define setCount r3
|
||||
#define pointStep r4
|
||||
#define outPointStep r4
|
||||
#define setStep r8
|
||||
#define step1 r9
|
||||
#define step2 r10
|
||||
#define t0 r11
|
||||
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dXr0 D0.F32
|
||||
#define dXi0 D1.F32
|
||||
#define dXr1 D2.F32
|
||||
#define dXi1 D3.F32
|
||||
#define dXr2 D4.F32
|
||||
#define dXi2 D5.F32
|
||||
#define dXr3 D6.F32
|
||||
#define dXi3 D7.F32
|
||||
#define dXr4 D8.F32
|
||||
#define dXi4 D9.F32
|
||||
#define dXr5 D10.F32
|
||||
#define dXi5 D11.F32
|
||||
#define dXr6 D12.F32
|
||||
#define dXi6 D13.F32
|
||||
#define dXr7 D14.F32
|
||||
#define dXi7 D15.F32
|
||||
#define qX0 Q0.F32
|
||||
#define qX1 Q1.F32
|
||||
#define qX2 Q2.F32
|
||||
#define qX3 Q3.F32
|
||||
#define qX4 Q4.F32
|
||||
#define qX5 Q5.F32
|
||||
#define qX6 Q6.F32
|
||||
#define qX7 Q7.F32
|
||||
|
||||
#define dUr0 D16.F32
|
||||
#define dUi0 D17.F32
|
||||
#define dUr2 D18.F32
|
||||
#define dUi2 D19.F32
|
||||
#define dUr4 D20.F32
|
||||
#define dUi4 D21.F32
|
||||
#define dUr6 D22.F32
|
||||
#define dUi6 D23.F32
|
||||
#define dUr1 D24.F32
|
||||
#define dUi1 D25.F32
|
||||
#define dUr3 D26.F32
|
||||
#define dUi3 D27.F32
|
||||
#define dUr5 D28.F32
|
||||
#define dUi5 D29.F32
|
||||
@// reuse dXr7 and dXi7
|
||||
#define dUr7 D30.F32
|
||||
#define dUi7 D31.F32
|
||||
#define qU0 Q8.F32
|
||||
#define qU1 Q12.F32
|
||||
#define qU2 Q9.F32
|
||||
#define qU3 Q13.F32
|
||||
#define qU4 Q10.F32
|
||||
#define qU5 Q14.F32
|
||||
#define qU6 Q11.F32
|
||||
#define qU7 Q15.F32
|
||||
|
||||
|
||||
#define dVr0 D24.F32
|
||||
#define dVi0 D25.F32
|
||||
#define dVr2 D26.F32
|
||||
#define dVi2 D27.F32
|
||||
#define dVr4 D28.F32
|
||||
#define dVi4 D29.F32
|
||||
#define dVr6 D30.F32
|
||||
#define dVi6 D31.F32
|
||||
#define dVr1 D16.F32
|
||||
#define dVi1 D17.F32
|
||||
#define dVr3 D18.F32
|
||||
#define dVi3 D19.F32
|
||||
#define dVr5 D20.F32
|
||||
#define dVi5 D21.F32
|
||||
#define dVr7 D22.F32
|
||||
#define dVi7 D23.F32
|
||||
#define qV0 Q12.F32
|
||||
#define qV1 Q8.F32
|
||||
#define qV2 Q13.F32
|
||||
#define qV3 Q9.F32
|
||||
#define qV4 Q14.F32
|
||||
#define qV5 Q10.F32
|
||||
#define qV6 Q15.F32
|
||||
#define qV7 Q11.F32
|
||||
|
||||
#define dYr0 D16.F32
|
||||
#define dYi0 D17.F32
|
||||
#define dYr2 D18.F32
|
||||
#define dYi2 D19.F32
|
||||
#define dYr4 D20.F32
|
||||
#define dYi4 D21.F32
|
||||
#define dYr6 D22.F32
|
||||
#define dYi6 D23.F32
|
||||
#define dYr1 D24.F32
|
||||
#define dYi1 D25.F32
|
||||
#define dYr3 D26.F32
|
||||
#define dYi3 D27.F32
|
||||
#define dYr5 D28.F32
|
||||
#define dYi5 D29.F32
|
||||
#define dYr7 D30.F32
|
||||
#define dYi7 D31.F32
|
||||
#define qY0 Q8.F32
|
||||
#define qY1 Q12.F32
|
||||
#define qY2 Q9.F32
|
||||
#define qY3 Q13.F32
|
||||
#define qY4 Q10.F32
|
||||
#define qY5 Q14.F32
|
||||
#define qY6 Q11.F32
|
||||
#define qY7 Q15.F32
|
||||
|
||||
#define dT0 D14.F32
|
||||
#define dT1 D15.F32
|
||||
|
||||
@// Define constants
|
||||
@ sqrt(1/2)
|
||||
ONEBYSQRT2: .float 0.7071067811865476e0
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse, name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// Update pSubFFTSize and pSubFFTNum regs
|
||||
@// subFFTSize = 1 for the first stage
|
||||
MOV subFFTSize,#8
|
||||
LDR t0,=ONEBYSQRT2
|
||||
|
||||
@// Note: setCount = subFFTNum/8 (reuse the grpSize reg for setCount)
|
||||
LSR grpSize,subFFTNum,#3
|
||||
MOV subFFTNum,grpSize
|
||||
|
||||
|
||||
@// pT0+1 increments pT0 by 8 bytes
|
||||
@// pT0+pointStep = increment of 8*pointStep bytes = grpSize bytes
|
||||
@// Note: outPointStep = pointStep for firststage
|
||||
|
||||
MOV pointStep,grpSize,LSL #3
|
||||
|
||||
|
||||
@// Calculate the step of input data for the next set
|
||||
@//MOV step1,pointStep,LSL #1 @// step1 = 2*pointStep
|
||||
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
|
||||
MOV step1,grpSize,LSL #4
|
||||
|
||||
MOV step2,pointStep,LSL #3
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
|
||||
SUB step2,step2,pointStep @// step2 = 7*pointStep
|
||||
@// setStep = - 7*pointStep+16
|
||||
RSB setStep,step2,#16
|
||||
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
|
||||
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
|
||||
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
|
||||
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
|
||||
@// data[7] & update pSrc for the next set
|
||||
@// setStep = -7*pointStep + 16
|
||||
VLD2 {dXr7,dXi7},[pSrc :128],setStep
|
||||
@// grp = 0 a special case since all the twiddle factors are 1
|
||||
@// Loop on the sets
|
||||
|
||||
radix8fsGrpZeroSetLoop\name :
|
||||
|
||||
@// Decrement setcount
|
||||
SUBS setCount,setCount,#2
|
||||
|
||||
|
||||
@// finish first stage of 8 point FFT
|
||||
|
||||
VADD qU0,qX0,qX4
|
||||
VADD qU2,qX1,qX5
|
||||
VADD qU4,qX2,qX6
|
||||
VADD qU6,qX3,qX7
|
||||
|
||||
@// finish second stage of 8 point FFT
|
||||
|
||||
VADD qV0,qU0,qU4
|
||||
VSUB qV2,qU0,qU4
|
||||
VADD qV4,qU2,qU6
|
||||
VSUB qV6,qU2,qU6
|
||||
|
||||
@// finish third stage of 8 point FFT
|
||||
|
||||
VADD qY0,qV0,qV4
|
||||
VSUB qY4,qV0,qV4
|
||||
VST2 {dYr0,dYi0},[pDst :128],step1 @// store y0
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VSUB dYr2,dVr2,dVi6
|
||||
VADD dYi2,dVi2,dVr6
|
||||
|
||||
VADD dYr6,dVr2,dVi6
|
||||
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y2
|
||||
VSUB dYi6,dVi2,dVr6
|
||||
|
||||
VSUB qU1,qX0,qX4
|
||||
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
|
||||
|
||||
VSUB qU3,qX1,qX5
|
||||
VSUB qU5,qX2,qX6
|
||||
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y6
|
||||
|
||||
.ELSE
|
||||
|
||||
VADD dYr6,dVr2,dVi6
|
||||
VSUB dYi6,dVi2,dVr6
|
||||
|
||||
VSUB dYr2,dVr2,dVi6
|
||||
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y2
|
||||
VADD dYi2,dVi2,dVr6
|
||||
|
||||
|
||||
VSUB qU1,qX0,qX4
|
||||
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
|
||||
VSUB qU3,qX1,qX5
|
||||
VSUB qU5,qX2,qX6
|
||||
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y6
|
||||
|
||||
|
||||
.ENDIF
|
||||
|
||||
@// finish first stage of 8 point FFT
|
||||
|
||||
VSUB qU7,qX3,qX7
|
||||
VLD1 dT0[0], [t0]
|
||||
|
||||
@// finish second stage of 8 point FFT
|
||||
|
||||
VSUB dVr1,dUr1,dUi5
|
||||
@// data[0] for next iteration
|
||||
VLD2 {dXr0,dXi0},[pSrc :128],pointStep
|
||||
VADD dVi1,dUi1,dUr5
|
||||
VADD dVr3,dUr1,dUi5
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
|
||||
VSUB dVi3,dUi1,dUr5
|
||||
|
||||
VSUB dVr5,dUr3,dUi7
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
|
||||
VADD dVi5,dUi3,dUr7
|
||||
VADD dVr7,dUr3,dUi7
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
|
||||
VSUB dVi7,dUi3,dUr7
|
||||
|
||||
@// finish third stage of 8 point FFT
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
@// calculate a*v5
|
||||
VMUL dT1,dVr5,dT0[0] @// use dVi0 for dT1
|
||||
|
||||
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
|
||||
VMUL dVi5,dVi5,dT0[0]
|
||||
|
||||
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
|
||||
VSUB dVr5,dT1,dVi5 @// a * V5
|
||||
VADD dVi5,dT1,dVi5
|
||||
|
||||
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
|
||||
|
||||
@// calculate b*v7
|
||||
VMUL dT1,dVr7,dT0[0]
|
||||
VMUL dVi7,dVi7,dT0[0]
|
||||
|
||||
VADD qY1,qV1,qV5
|
||||
VSUB qY5,qV1,qV5
|
||||
|
||||
|
||||
VADD dVr7,dT1,dVi7 @// b * V7
|
||||
VSUB dVi7,dVi7,dT1
|
||||
SUB pDst, pDst, step2 @// set pDst to y1
|
||||
|
||||
@// On the last iteration, this will read past the end of pSrc,
|
||||
@// so skip this read.
|
||||
BEQ radix8SkipLastUpdateInv\name
|
||||
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
|
||||
radix8SkipLastUpdateInv\name:
|
||||
|
||||
VSUB dYr3,dVr3,dVr7
|
||||
VSUB dYi3,dVi3,dVi7
|
||||
VST2 {dYr1,dYi1},[pDst :128],step1 @// store y1
|
||||
VADD dYr7,dVr3,dVr7
|
||||
VADD dYi7,dVi3,dVi7
|
||||
|
||||
|
||||
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y3
|
||||
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y5
|
||||
VST2 {dYr7,dYi7},[pDst :128] @// store y7
|
||||
ADD pDst, pDst, #16
|
||||
|
||||
.ELSE
|
||||
|
||||
@// calculate b*v7
|
||||
VMUL dT1,dVr7,dT0[0]
|
||||
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
|
||||
VMUL dVi7,dVi7,dT0[0]
|
||||
|
||||
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
|
||||
VADD dVr7,dT1,dVi7 @// b * V7
|
||||
VSUB dVi7,dVi7,dT1
|
||||
|
||||
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
|
||||
|
||||
@// calculate a*v5
|
||||
VMUL dT1,dVr5,dT0[0] @// use dVi0 for dT1
|
||||
VMUL dVi5,dVi5,dT0[0]
|
||||
|
||||
VADD dYr7,dVr3,dVr7
|
||||
VADD dYi7,dVi3,dVi7
|
||||
SUB pDst, pDst, step2 @// set pDst to y1
|
||||
|
||||
VSUB dVr5,dT1,dVi5 @// a * V5
|
||||
VADD dVi5,dT1,dVi5
|
||||
|
||||
@// On the last iteration, this will read past the end of pSrc,
|
||||
@// so skip this read.
|
||||
BEQ radix8SkipLastUpdateFwd\name
|
||||
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
|
||||
radix8SkipLastUpdateFwd\name:
|
||||
|
||||
VSUB qY5,qV1,qV5
|
||||
|
||||
VSUB dYr3,dVr3,dVr7
|
||||
VST2 {dYr7,dYi7},[pDst :128],step1 @// store y1
|
||||
VSUB dYi3,dVi3,dVi7
|
||||
VADD qY1,qV1,qV5
|
||||
|
||||
|
||||
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y3
|
||||
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y5
|
||||
VST2 {dYr1,dYi1},[pDst :128]! @// store y7
|
||||
|
||||
.ENDIF
|
||||
|
||||
|
||||
@// update pDst for the next set
|
||||
SUB pDst, pDst, step2
|
||||
BGT radix8fsGrpZeroSetLoop\name
|
||||
|
||||
|
||||
@// reset pSrc to pDst for the next stage
|
||||
SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
|
||||
MOV pDst,pPingPongBuf
|
||||
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
@// Allocate stack memory required by the function
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",FWD
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",INV
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
.end
|
@ -0,0 +1,170 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
|
||||
@//
|
||||
@//
|
||||
@// File Name: armSP_FFT_CToC_SC16_Radix2_fs_unsafe_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 6693
|
||||
@// Last Modified Date: Tue, 10 Jul 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute a Radix 2 FFT stage for a N point complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define pPingPongBuf r5
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define pointStep r3
|
||||
#define outPointStep r3
|
||||
#define grpSize r4
|
||||
#define setCount r4
|
||||
#define step r8
|
||||
#define dstStep r8
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dX0 D0.S16
|
||||
#define dX1 D1.S16
|
||||
#define dY0 D2.S16
|
||||
#define dY1 D3.S16
|
||||
#define dX0S32 D0.S32
|
||||
#define dX1S32 D1.S32
|
||||
#define dY0S32 D2.S32
|
||||
#define dY1S32 D3.S32
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse, name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
|
||||
@// update subFFTSize and subFFTNum into RN6 and RN7 for the next stage
|
||||
|
||||
|
||||
MOV subFFTSize,#2
|
||||
LSR grpSize,subFFTNum,#1
|
||||
MOV subFFTNum,grpSize
|
||||
|
||||
|
||||
@// pT0+1 increments pT0 by 8 bytes
|
||||
@// pT0+pointStep = increment of 4*pointStep bytes = 2*grpSize bytes
|
||||
@// Note: outPointStep = pointStep for firststage
|
||||
@// Note: setCount = grpSize/2 (reuse the updated grpSize for setCount)
|
||||
|
||||
MOV pointStep,grpSize,LSL #2
|
||||
RSB step,pointStep,#4
|
||||
|
||||
|
||||
@// Loop on the sets for grp zero: 1 set at a time
|
||||
|
||||
grpZeroSetLoop\name:
|
||||
|
||||
VLD1 {dX0S32[0]},[pSrc],pointStep
|
||||
VLD1 {dX1S32[0]},[pSrc],step @// step = -pointStep + 4
|
||||
SUBS setCount,setCount,#1 @// decrement the loop counter
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
|
||||
VHADD dY0,dX0,dX1
|
||||
VHSUB dY1,dX0,dX1
|
||||
|
||||
.ELSE
|
||||
|
||||
VADD dY0,dX0,dX1
|
||||
VSUB dY1,dX0,dX1
|
||||
|
||||
|
||||
.ENDIF
|
||||
|
||||
VST1 {dY0S32[0]},[pDst],outPointStep
|
||||
VST1 {dY1S32[0]},[pDst],dstStep @// dstStep = step = -pointStep + 4
|
||||
|
||||
BGT grpZeroSetLoop\name
|
||||
|
||||
|
||||
@// reset pSrc to pDst for the next stage
|
||||
SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
|
||||
MOV pDst,pPingPongBuf
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",FWD
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC16_Radix2_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",INV
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","FALSE",FWDSFS
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","TRUE",INVSFS
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
|
||||
.END
|
@ -0,0 +1,210 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
|
||||
@//
|
||||
@//
|
||||
@// File Name: armSP_FFT_CToC_SC16_Radix2_ls_unsafe_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 6741
|
||||
@// Last Modified Date: Wed, 18 Jul 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute a Radix 2 FFT stage for a N point complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
|
||||
#define outPointStep r3
|
||||
#define grpCount r4
|
||||
#define dstStep r5
|
||||
#define pTmp r4
|
||||
#define step r8
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dWr D0.S16
|
||||
#define dWi D1.S16
|
||||
#define dXr0 D2.S16
|
||||
#define dXi0 D3.S16
|
||||
#define dXr1 D4.S16
|
||||
#define dXi1 D5.S16
|
||||
#define dYr0 D6.S16
|
||||
#define dYi0 D7.S16
|
||||
#define dYr1 D8.S16
|
||||
#define dYi1 D9.S16
|
||||
#define qT0 Q5.S32
|
||||
#define qT1 Q6.S32
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse, name
|
||||
|
||||
|
||||
MOV outPointStep,subFFTSize,LSL #2
|
||||
@// Update grpCount and grpSize rightaway
|
||||
|
||||
MOV subFFTNum,#1 @//after the last stage
|
||||
LSL grpCount,subFFTSize,#1
|
||||
|
||||
@// update subFFTSize for the next stage
|
||||
MOV subFFTSize,grpCount
|
||||
|
||||
SUB step,outPointStep,#4 @// step = -4+outPointStep
|
||||
RSB dstStep,step,#0 @// dstStep = -4-outPointStep+8 = -step
|
||||
@//RSB dstStep,outPointStep,#16
|
||||
|
||||
|
||||
@// Loop on 2 grps at a time for the last stage
|
||||
|
||||
grpLoop\name:
|
||||
VLD2 {dWr[0],dWi[0]},[pTwiddle]! @// grp 0
|
||||
VLD2 {dWr[1],dWi[1]},[pTwiddle]! @// grp 1
|
||||
|
||||
@//VLD2 {dWr,dWi},[pTwiddle],#16
|
||||
|
||||
VLD4 {dXr0[0],dXi0[0],dXr1[0],dXi1[0]},[pSrc]! @// grp 0
|
||||
VLD4 {dXr0[1],dXi0[1],dXr1[1],dXi1[1]},[pSrc]! @// grp 1
|
||||
|
||||
|
||||
@//VLD4 {dXr0,dXi0,dXr1,dXi1},[pSrc],#32
|
||||
SUBS grpCount,grpCount,#4 @// grpCount is multiplied by 2
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMULL qT0,dXr1,dWr
|
||||
VMLAL qT0,dXi1,dWi @// real part
|
||||
VMULL qT1,dXi1,dWr
|
||||
VMLSL qT1,dXr1,dWi @// imag part
|
||||
|
||||
.ELSE
|
||||
VMULL qT0,dXr1,dWr
|
||||
VMLSL qT0,dXi1,dWi @// real part
|
||||
VMULL qT1,dXi1,dWr
|
||||
VMLAL qT1,dXr1,dWi @// imag part
|
||||
|
||||
.ENDIF
|
||||
|
||||
VRSHRN dXr1,qT0,#15
|
||||
VRSHRN dXi1,qT1,#15
|
||||
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
|
||||
VHSUB dYr0,dXr0,dXr1
|
||||
VHSUB dYi0,dXi0,dXi1
|
||||
VHADD dYr1,dXr0,dXr1
|
||||
VHADD dYi1,dXi0,dXi1
|
||||
|
||||
.ELSE
|
||||
|
||||
VSUB dYr0,dXr0,dXr1
|
||||
VSUB dYi0,dXi0,dXi1
|
||||
VADD dYr1,dXr0,dXr1
|
||||
VADD dYi1,dXi0,dXi1
|
||||
|
||||
|
||||
.ENDIF
|
||||
|
||||
VST2 {dYr0[0],dYi0[0]},[pDst]!
|
||||
VST2 {dYr0[1],dYi0[1]},[pDst],step @// step = -4+outPointStep
|
||||
|
||||
VST2 {dYr1[0],dYi1[0]},[pDst]!
|
||||
VST2 {dYr1[1],dYi1[1]},[pDst],dstStep @// dstStep = -4-outPointStep+8 = -step
|
||||
|
||||
@//VST2 {dYr0,dYi0},[pDst],outPointStep
|
||||
@//VST2 {dYr1,dYi1},[pDst],dstStep @// dstStep = step = -outPointStep + 16
|
||||
|
||||
BGT grpLoop\name
|
||||
|
||||
|
||||
@// Reset and Swap pSrc and pDst for the next stage
|
||||
MOV pTmp,pDst
|
||||
SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 2*size; pSrc -= 4*size bytes
|
||||
SUB pSrc,pTmp,outPointStep
|
||||
|
||||
@// Reset pTwiddle for the next stage
|
||||
SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 2*size bytes
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",FWD
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",INV
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","FALSE",FWDSFS
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","TRUE",INVSFS
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
|
||||
.END
|
@ -0,0 +1,216 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
|
||||
@//
|
||||
@//
|
||||
@// File Name: armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 6740
|
||||
@// Last Modified Date: Wed, 18 Jul 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute a Radix 2 FFT stage for a N point complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define outPointStep r3
|
||||
#define grpCount r4
|
||||
#define dstStep r5
|
||||
#define twStep r8
|
||||
#define pTmp r4
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dW1S32 D0.S32
|
||||
#define dW2S32 D1.S32
|
||||
#define dW1 D0.S16
|
||||
#define dW2 D1.S16
|
||||
|
||||
#define dX0 D2.S16
|
||||
#define dX1 D3.S16
|
||||
#define dX2 D4.S16
|
||||
#define dX3 D5.S16
|
||||
#define dY0 D6.S16
|
||||
#define dY1 D7.S16
|
||||
#define dY2 D8.S16
|
||||
#define dY3 D9.S16
|
||||
#define qT0 Q5.S32
|
||||
#define qT1 Q6.S32
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse, name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
|
||||
@// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
|
||||
|
||||
|
||||
LSL grpCount,subFFTSize,#1
|
||||
|
||||
|
||||
@// update subFFTSize for the next stage
|
||||
MOV subFFTSize,grpCount
|
||||
|
||||
@// pOut0+1 increments pOut0 by 8 bytes
|
||||
@// pOut0+outPointStep == increment of 4*outPointStep bytes = 2*size bytes
|
||||
SMULBB outPointStep,grpCount,subFFTNum
|
||||
MOV twStep,subFFTNum,LSL #1
|
||||
LSR subFFTNum,subFFTNum,#1 @//grpSize
|
||||
|
||||
|
||||
RSB dstStep,outPointStep,#8
|
||||
|
||||
|
||||
@// Note: pointStep is 8 in this case: so need of extra reg
|
||||
@// Loop on the groups: 2 groups at a time
|
||||
|
||||
grpLoop\name:
|
||||
|
||||
VLD1 dW1S32[],[pTwiddle],twStep @//[wi | wr]
|
||||
VLD1 dW2S32[],[pTwiddle],twStep
|
||||
|
||||
@// Process the sets for each grp: 2 sets at a time (no set looping required)
|
||||
|
||||
VLD1 dX0,[pSrc]! @// point0: of set0,set1 of grp0
|
||||
VLD1 dX1,[pSrc]! @// point1: of set0,set1 of grp0
|
||||
VLD1 dX2,[pSrc]! @// point0: of set0,set1 of grp1
|
||||
VLD1 dX3,[pSrc]! @// point1: of set0,set1 of grp1
|
||||
|
||||
SUBS grpCount,grpCount,#4 @// decrement the loop counter
|
||||
VUZP dW1,dW2
|
||||
VUZP dX1,dX3
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMULL qT0,dX1,dW1
|
||||
VMLAL qT0,dX3,dW2 @// real part
|
||||
VMULL qT1,dX3,dW1
|
||||
VMLSL qT1,dX1,dW2 @// imag part
|
||||
|
||||
.ELSE
|
||||
VMULL qT0,dX1,dW1
|
||||
VMLSL qT0,dX3,dW2 @// real part
|
||||
VMULL qT1,dX3,dW1
|
||||
VMLAL qT1,dX1,dW2 @// imag part
|
||||
|
||||
.ENDIF
|
||||
|
||||
VRSHRN dX1,qT0,#15
|
||||
VRSHRN dX3,qT1,#15
|
||||
|
||||
VZIP dX1,dX3
|
||||
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
|
||||
VHSUB dY0,dX0,dX1
|
||||
VHADD dY1,dX0,dX1
|
||||
VHSUB dY2,dX2,dX3
|
||||
VHADD dY3,dX2,dX3
|
||||
|
||||
.ELSE
|
||||
|
||||
VSUB dY0,dX0,dX1
|
||||
VADD dY1,dX0,dX1
|
||||
VSUB dY2,dX2,dX3
|
||||
VADD dY3,dX2,dX3
|
||||
|
||||
|
||||
|
||||
.ENDIF
|
||||
|
||||
VST1 dY0,[pDst],outPointStep @// point0: of set0,set1 of grp0
|
||||
VST1 dY1,[pDst],dstStep @// dstStep = -outPointStep + 8
|
||||
VST1 dY2,[pDst],outPointStep @// point0: of set0,set1 of grp1
|
||||
VST1 dY3,[pDst],dstStep @// point1: of set0,set1 of grp1
|
||||
|
||||
|
||||
BGT grpLoop\name
|
||||
|
||||
|
||||
@// Reset and Swap pSrc and pDst for the next stage
|
||||
MOV pTmp,pDst
|
||||
SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 2*size; pSrc -= 4*size bytes
|
||||
SUB pSrc,pTmp,outPointStep
|
||||
|
||||
@// Reset pTwiddle for the next stage
|
||||
SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 2*size bytes
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",FWD
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",INV
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","FALSE",FWDSFS
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","TRUE",INVSFS
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
.END
|
219
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix2_unsafe_s.S
Normal file
219
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix2_unsafe_s.S
Normal file
@ -0,0 +1,219 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
|
||||
@//
|
||||
@//
|
||||
@// File Name: armSP_FFT_CToC_SC16_Radix2_unsafe_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 5892
|
||||
@// Last Modified Date: Thu, 07 Jun 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute a Radix 2 FFT stage for a N point complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define outPointStep r3
|
||||
#define pointStep r4
|
||||
#define grpCount r5
|
||||
#define setCount r8
|
||||
#define step r10
|
||||
#define dstStep r11
|
||||
#define pTmp r9
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dW D0.S16
|
||||
#define dX0 D2.S16
|
||||
#define dX1 D3.S16
|
||||
#define dX2 D4.S16
|
||||
#define dX3 D5.S16
|
||||
#define dY0 D6.S16
|
||||
#define dY1 D7.S16
|
||||
#define dY2 D8.S16
|
||||
#define dY3 D9.S16
|
||||
#define qT0 Q3.S32
|
||||
#define qT1 Q4.S32
|
||||
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse, name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
|
||||
@// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
|
||||
|
||||
LSR subFFTNum,subFFTNum,#1 @//grpSize
|
||||
LSL grpCount,subFFTSize,#1
|
||||
|
||||
|
||||
@// pT0+1 increments pT0 by 8 bytes
|
||||
@// pT0+pointStep = increment of 4*pointStep bytes = 2*grpSize bytes
|
||||
MOV pointStep,subFFTNum,LSL #1
|
||||
|
||||
@// update subFFTSize for the next stage
|
||||
MOV subFFTSize,grpCount
|
||||
|
||||
@// pOut0+1 increments pOut0 by 8 bytes
|
||||
@// pOut0+outPointStep == increment of 4*outPointStep bytes = 2*size bytes
|
||||
SMULBB outPointStep,grpCount,pointStep
|
||||
LSL pointStep,pointStep,#1
|
||||
|
||||
|
||||
RSB step,pointStep,#16
|
||||
RSB dstStep,outPointStep,#16
|
||||
|
||||
@// Loop on the groups
|
||||
|
||||
grpLoop\name:
|
||||
|
||||
VLD1 dW,[pTwiddle],pointStep @//[wi | wr]
|
||||
MOV setCount,pointStep,LSR #2
|
||||
|
||||
|
||||
@// Loop on the sets: 4 at a time
|
||||
|
||||
|
||||
setLoop\name:
|
||||
|
||||
|
||||
VLD2 {dX0,dX1},[pSrc],pointStep @// point0: dX0-real part dX1-img part
|
||||
VLD2 {dX2,dX3},[pSrc],step @// point1: dX2-real part dX3-img part
|
||||
|
||||
SUBS setCount,setCount,#4
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMULL qT0,dX2,dW[0]
|
||||
VMLAL qT0,dX3,dW[1] @// real part
|
||||
VMULL qT1,dX3,dW[0]
|
||||
VMLSL qT1,dX2,dW[1] @// imag part
|
||||
|
||||
.ELSE
|
||||
|
||||
VMULL qT0,dX2,dW[0]
|
||||
VMLSL qT0,dX3,dW[1] @// real part
|
||||
VMULL qT1,dX3,dW[0]
|
||||
VMLAL qT1,dX2,dW[1] @// imag part
|
||||
|
||||
.ENDIF
|
||||
|
||||
VRSHRN dX2,qT0,#15
|
||||
VRSHRN dX3,qT1,#15
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
VHSUB dY0,dX0,dX2
|
||||
VHSUB dY1,dX1,dX3
|
||||
VHADD dY2,dX0,dX2
|
||||
VHADD dY3,dX1,dX3
|
||||
|
||||
.ELSE
|
||||
VSUB dY0,dX0,dX2
|
||||
VSUB dY1,dX1,dX3
|
||||
VADD dY2,dX0,dX2
|
||||
VADD dY3,dX1,dX3
|
||||
|
||||
.ENDIF
|
||||
|
||||
VST2 {dY0,dY1},[pDst],outPointStep
|
||||
VST2 {dY2,dY3},[pDst],dstStep @// dstStep = -outPointStep + 16
|
||||
|
||||
BGT setLoop\name
|
||||
|
||||
SUBS grpCount,grpCount,#2
|
||||
ADD pSrc,pSrc,pointStep
|
||||
BGT grpLoop\name
|
||||
|
||||
|
||||
@// Reset and Swap pSrc and pDst for the next stage
|
||||
MOV pTmp,pDst
|
||||
SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 2*size; pSrc -= 4*size bytes
|
||||
SUB pSrc,pTmp,outPointStep
|
||||
|
||||
@// Reset pTwiddle for the next stage
|
||||
SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 2*size bytes
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",FWD
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",INV
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","FALSE",FWDSFS
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","TRUE",INVSFS
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.END
|
@ -0,0 +1,314 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
|
||||
@//
|
||||
@//
|
||||
@// File Name: armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 7761
|
||||
@// Last Modified Date: Wed, 26 Sep 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute a first stage Radix 4 FFT stage for a N point complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define pPingPongBuf r5
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define grpSize r3
|
||||
@// Reuse grpSize as setCount
|
||||
#define setCount r3
|
||||
#define pointStep r4
|
||||
#define outPointStep r4
|
||||
#define setStep r8
|
||||
#define step1 r9
|
||||
#define step3 r10
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dXr0 D0.S16
|
||||
#define dXi0 D1.S16
|
||||
#define dXr1 D2.S16
|
||||
#define dXi1 D3.S16
|
||||
#define dXr2 D4.S16
|
||||
#define dXi2 D5.S16
|
||||
#define dXr3 D6.S16
|
||||
#define dXi3 D7.S16
|
||||
#define dYr0 D8.S16
|
||||
#define dYi0 D9.S16
|
||||
#define dYr1 D10.S16
|
||||
#define dYi1 D11.S16
|
||||
#define dYr2 D12.S16
|
||||
#define dYi2 D13.S16
|
||||
#define dYr3 D14.S16
|
||||
#define dYi3 D15.S16
|
||||
#define dZr0 D16.S16
|
||||
#define dZi0 D17.S16
|
||||
#define dZr1 D18.S16
|
||||
#define dZi1 D19.S16
|
||||
#define dZr2 D20.S16
|
||||
#define dZi2 D21.S16
|
||||
#define dZr3 D22.S16
|
||||
#define dZi3 D23.S16
|
||||
#define qY0 Q4.S16
|
||||
#define qY2 Q6.S16
|
||||
#define qX0 Q0.S16
|
||||
#define qX2 Q2.S16
|
||||
|
||||
#define qY1 Q5.S16
|
||||
#define qY3 Q7.S16
|
||||
#define qX1 Q1.S16
|
||||
#define qX3 Q3.S16
|
||||
#define qZ0 Q8.S16
|
||||
#define qZ1 Q9.S16
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse, name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
MOV pointStep,subFFTNum
|
||||
@// Update pSubFFTSize and pSubFFTNum regs
|
||||
|
||||
|
||||
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
|
||||
@// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
|
||||
LSR grpSize,subFFTNum,#2
|
||||
MOV subFFTNum,grpSize
|
||||
|
||||
|
||||
@// pT0+1 increments pT0 by 4 bytes
|
||||
@// pT0+pointStep = increment of 4*pointStep bytes = grpSize bytes
|
||||
@// Note: outPointStep = pointStep for firststage
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
|
||||
|
||||
|
||||
@// Calculate the step of input data for the next set
|
||||
@//MOV setStep,pointStep,LSL #1
|
||||
MOV setStep,grpSize,LSL #3
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
|
||||
MOV step1,setStep
|
||||
ADD setStep,setStep,pointStep @// setStep = 3*pointStep
|
||||
RSB setStep,setStep,#16 @// setStep = - 3*pointStep+16
|
||||
|
||||
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3]
|
||||
MOV subFFTSize,#4 @// subFFTSize = 1 for the first stage
|
||||
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
VHADD qY0,qX0,qX2 @// u0
|
||||
.ELSE
|
||||
VADD qY0,qX0,qX2 @// u0
|
||||
.ENDIF
|
||||
RSB step3,pointStep,#0
|
||||
|
||||
@// grp = 0 a special case since all the twiddle factors are 1
|
||||
@// Loop on the sets: 4 sets at a time
|
||||
|
||||
grpZeroSetLoop\name:
|
||||
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
|
||||
@// finish first stage of 4 point FFT
|
||||
|
||||
VHSUB qY2,qX0,qX2 @// u1
|
||||
SUBS setCount,setCount,#4 @// decrement the set loop counter
|
||||
|
||||
VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
|
||||
VHADD qY1,qX1,qX3 @// u2
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],step3
|
||||
VHSUB qY3,qX1,qX3 @// u3
|
||||
|
||||
|
||||
|
||||
@// finish second stage of 4 point FFT
|
||||
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
|
||||
VHADD qZ0,qY0,qY1 @// y0
|
||||
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],setStep
|
||||
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VHSUB dZr3,dYr2,dYi3 @// y3
|
||||
VHADD dZi3,dYi2,dYr3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
|
||||
VHSUB qZ1,qY0,qY1 @// y2
|
||||
VST2 {dZr3,dZi3},[pDst :128],outPointStep
|
||||
|
||||
VHADD dZr2,dYr2,dYi3 @// y1
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
VHSUB dZi2,dYi2,dYr3
|
||||
|
||||
VHADD qY0,qX0,qX2 @// u0 (next loop)
|
||||
VST2 {dZr2,dZi2},[pDst :128],setStep
|
||||
|
||||
|
||||
.ELSE
|
||||
|
||||
VHADD dZr2,dYr2,dYi3 @// y1
|
||||
VHSUB dZi2,dYi2,dYr3
|
||||
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VHSUB qZ1,qY0,qY1 @// y2
|
||||
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VHSUB dZr3,dYr2,dYi3 @// y3
|
||||
VHADD dZi3,dYi2,dYr3
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
VHADD qY0,qX0,qX2 @// u0 (next loop)
|
||||
VST2 {dZr3,dZi3},[pDst :128],setStep
|
||||
|
||||
.ENDIF
|
||||
|
||||
|
||||
.ELSE
|
||||
|
||||
@// finish first stage of 4 point FFT
|
||||
|
||||
VSUB qY2,qX0,qX2 @// u1
|
||||
SUBS setCount,setCount,#4 @// decrement the set loop counter
|
||||
|
||||
VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
|
||||
VADD qY1,qX1,qX3 @// u2
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],step3
|
||||
VSUB qY3,qX1,qX3 @// u3
|
||||
|
||||
|
||||
|
||||
@// finish second stage of 4 point FFT
|
||||
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
|
||||
VADD qZ0,qY0,qY1 @// y0
|
||||
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],setStep
|
||||
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VSUB dZr3,dYr2,dYi3 @// y3
|
||||
VADD dZi3,dYi2,dYr3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
|
||||
VSUB qZ1,qY0,qY1 @// y2
|
||||
VST2 {dZr3,dZi3},[pDst :128],outPointStep
|
||||
|
||||
VADD dZr2,dYr2,dYi3 @// y1
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
VSUB dZi2,dYi2,dYr3
|
||||
|
||||
VADD qY0,qX0,qX2 @// u0 (next loop)
|
||||
VST2 {dZr2,dZi2},[pDst :128],setStep
|
||||
|
||||
|
||||
.ELSE
|
||||
|
||||
VADD dZr2,dYr2,dYi3 @// y1
|
||||
VSUB dZi2,dYi2,dYr3
|
||||
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VSUB qZ1,qY0,qY1 @// y2
|
||||
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VSUB dZr3,dYr2,dYi3 @// y3
|
||||
VADD dZi3,dYi2,dYr3
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
VADD qY0,qX0,qX2 @// u0 (next loop)
|
||||
VST2 {dZr3,dZi3},[pDst :128],setStep
|
||||
|
||||
.ENDIF
|
||||
|
||||
|
||||
.ENDIF
|
||||
|
||||
BGT grpZeroSetLoop\name
|
||||
|
||||
|
||||
@// reset pSrc to pDst for the next stage
|
||||
SUB pSrc,pDst,pointStep @// pDst -= grpSize
|
||||
MOV pDst,pPingPongBuf
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",FWD
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",INV
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","FALSE",FWDSFS
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","TRUE",INVSFS
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.END
|
@ -0,0 +1,410 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
|
||||
@//
|
||||
@//
|
||||
@// File Name: armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 7765
|
||||
@// Last Modified Date: Thu, 27 Sep 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute a Radix 4 FFT stage for a N point complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
@//IMPORT armAAC_constTable
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define outPointStep r3
|
||||
#define grpCount r4
|
||||
#define dstStep r5
|
||||
#define pw1 r8
|
||||
#define pw2 r9
|
||||
#define pw3 r10
|
||||
#define pTmp r4
|
||||
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dButterfly1Real02 D0.S16
|
||||
#define dButterfly1Imag02 D1.S16
|
||||
#define dButterfly1Real13 D2.S16
|
||||
#define dButterfly1Imag13 D3.S16
|
||||
#define dButterfly2Real02 D4.S16
|
||||
#define dButterfly2Imag02 D5.S16
|
||||
#define dButterfly2Real13 D6.S16
|
||||
#define dButterfly2Imag13 D7.S16
|
||||
#define dXr0 D0.S16
|
||||
#define dXi0 D1.S16
|
||||
#define dXr1 D2.S16
|
||||
#define dXi1 D3.S16
|
||||
#define dXr2 D4.S16
|
||||
#define dXi2 D5.S16
|
||||
#define dXr3 D6.S16
|
||||
#define dXi3 D7.S16
|
||||
|
||||
#define dW1rS32 D8.S32
|
||||
#define dW1iS32 D9.S32
|
||||
#define dW2rS32 D10.S32
|
||||
#define dW2iS32 D11.S32
|
||||
#define dW3rS32 D12.S32
|
||||
#define dW3iS32 D13.S32
|
||||
|
||||
#define dW1r D8.S16
|
||||
#define dW1i D9.S16
|
||||
#define dW2r D10.S16
|
||||
#define dW2i D11.S16
|
||||
#define dW3r D12.S16
|
||||
#define dW3i D13.S16
|
||||
|
||||
#define dTmp0 D12.S16
|
||||
#define dTmp1 D13.S16
|
||||
#define dTmp1S32 D13.S32
|
||||
#define dTmp2S32 D14.S32
|
||||
#define dTmp3S32 D15.S32
|
||||
|
||||
#define dYr0 D18.S16
|
||||
#define dYi0 D19.S16
|
||||
#define dYr1 D16.S16
|
||||
#define dYi1 D17.S16
|
||||
#define dYr2 D20.S16
|
||||
#define dYi2 D21.S16
|
||||
#define dYr3 D14.S16
|
||||
#define dYi3 D15.S16
|
||||
#define qY0 Q9.S16
|
||||
#define qY1 Q8.S16
|
||||
#define qY2 Q10.S16
|
||||
#define qY3 Q7.S16
|
||||
|
||||
#define qX0 Q0.S16
|
||||
#define qX1 Q1.S16
|
||||
#define qX2 Q2.S16
|
||||
#define qX3 Q3.S16
|
||||
|
||||
#define qT0 Q9.S32
|
||||
#define qT1 Q10.S32
|
||||
#define qT2 Q7.S32
|
||||
#define qT3 Q8.S32
|
||||
|
||||
#define dZr0 D22.S16
|
||||
#define dZi0 D23.S16
|
||||
#define dZr1 D24.S16
|
||||
#define dZi1 D25.S16
|
||||
#define dZr2 D26.S16
|
||||
#define dZi2 D27.S16
|
||||
#define dZr3 D28.S16
|
||||
#define dZi3 D29.S16
|
||||
|
||||
#define qZ0 Q11.S16
|
||||
#define qZ1 Q12.S16
|
||||
#define qZ2 Q13.S16
|
||||
#define qZ3 Q14.S16
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse , name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
MOV pw2,pTwiddle
|
||||
VLD4 {dW2r,dW2i,dTmp0,dTmp1},[pw2 :256]!
|
||||
|
||||
MOV pw3,pTwiddle
|
||||
MOV pw1,pTwiddle
|
||||
@// pOut0+1 increments pOut0 by 8 bytes
|
||||
@// pOut0+outPointStep == increment of 4*outPointStep bytes
|
||||
MOV outPointStep,subFFTSize,LSL #2
|
||||
|
||||
VLD3 {dW3rS32,dTmp1S32,dTmp2S32},[pw3 :64]!
|
||||
MOV subFFTNum,#1 @//after the last stage
|
||||
LSL grpCount,subFFTSize,#2
|
||||
|
||||
|
||||
@// Update grpCount and grpSize rightaway
|
||||
VLD3 {dW3iS32,dTmp2S32,dTmp3S32},[pw3 :64]!
|
||||
|
||||
@// update subFFTSize for the next stage
|
||||
MOV subFFTSize,grpCount
|
||||
MOV dstStep,outPointStep,LSL #1
|
||||
|
||||
VLD2 {dW1r,dW1i}, [pw1 :128]!
|
||||
|
||||
|
||||
ADD dstStep,dstStep,outPointStep @// dstStep = 3*outPointStep
|
||||
RSB dstStep,dstStep,#16 @// dstStep = - 3*outPointStep+16
|
||||
|
||||
VLD4 {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
|
||||
VLD4 {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
|
||||
|
||||
@// Process 4 groups at a time
|
||||
|
||||
grpLoop\name:
|
||||
|
||||
|
||||
@// Rearrange the third twiddle
|
||||
VUZP dW3r,dW3i
|
||||
SUBS grpCount,grpCount,#16 @// grpCount is multiplied by 4
|
||||
|
||||
|
||||
VUZP dButterfly1Real13, dButterfly2Real13 @// B.r D.r
|
||||
VUZP dButterfly1Imag13, dButterfly2Imag13 @// B.i D.i
|
||||
VUZP dButterfly1Real02, dButterfly2Real02 @// A.r C.r
|
||||
VUZP dButterfly1Imag02, dButterfly2Imag02 @// A.i C.i
|
||||
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMULL qT0,dXr1,dW1r
|
||||
VMLAL qT0,dXi1,dW1i @// real part
|
||||
VMULL qT1,dXi1,dW1r
|
||||
VMLSL qT1,dXr1,dW1i @// imag part
|
||||
|
||||
.ELSE
|
||||
VMULL qT0,dXr1,dW1r
|
||||
VMLSL qT0,dXi1,dW1i @// real part
|
||||
VMULL qT1,dXi1,dW1r
|
||||
VMLAL qT1,dXr1,dW1i @// imag part
|
||||
|
||||
.ENDIF
|
||||
|
||||
@// Load the first twiddle for 4 groups : w^1
|
||||
@// w^1 twiddle (i+0,i+1,i+2,i+3) for group 0,1,2,3
|
||||
|
||||
VLD2 {dW1r,dW1i}, [pw1 :128]!
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMULL qT2,dXr2,dW2r
|
||||
VMLAL qT2,dXi2,dW2i @// real part
|
||||
VMULL qT3,dXi2,dW2r
|
||||
VMLSL qT3,dXr2,dW2i @// imag part
|
||||
|
||||
.ELSE
|
||||
VMULL qT2,dXr2,dW2r
|
||||
VMLSL qT2,dXi2,dW2i @// real part
|
||||
VMULL qT3,dXi2,dW2r
|
||||
VMLAL qT3,dXr2,dW2i @// imag part
|
||||
|
||||
.ENDIF
|
||||
|
||||
VRSHRN dZr1,qT0,#15
|
||||
VRSHRN dZi1,qT1,#15
|
||||
|
||||
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMULL qT0,dXr3,dW3r
|
||||
VMLAL qT0,dXi3,dW3i @// real part
|
||||
VMULL qT1,dXi3,dW3r
|
||||
VMLSL qT1,dXr3,dW3i @// imag part
|
||||
|
||||
.ELSE
|
||||
VMULL qT0,dXr3,dW3r
|
||||
VMLSL qT0,dXi3,dW3i @// real part
|
||||
VMULL qT1,dXi3,dW3r
|
||||
VMLAL qT1,dXr3,dW3i @// imag part
|
||||
|
||||
.ENDIF
|
||||
|
||||
@// Load the second twiddle for 4 groups : w^2
|
||||
@// w^2 twiddle (2i+0,2i+2,2i+4,2i+6) for group 0,1,2,3
|
||||
VLD4 {dW2r,dW2i,dTmp0,dTmp1},[pw2 :256]!
|
||||
|
||||
|
||||
VRSHRN dZr2,qT2,#15
|
||||
VRSHRN dZi2,qT3,#15
|
||||
|
||||
@// Load the third twiddle for 4 groups : w^3
|
||||
@// w^3 twiddle (3i+0,3i+3,3i+6,3i+9) for group 0,1,2,3
|
||||
|
||||
VLD3 {dW3rS32,dTmp1S32,dTmp2S32},[pw3 :64]!
|
||||
|
||||
VRSHRN dZr3,qT0,#15
|
||||
VRSHRN dZi3,qT1,#15
|
||||
|
||||
VLD3 {dW3iS32,dTmp2S32,dTmp3S32},[pw3 :64]!
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
|
||||
@// finish first stage of 4 point FFT
|
||||
|
||||
VHADD qY0,qX0,qZ2
|
||||
VHSUB qY2,qX0,qZ2
|
||||
VHADD qY1,qZ1,qZ3
|
||||
VLD4 {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
|
||||
|
||||
VHSUB qY3,qZ1,qZ3
|
||||
|
||||
@// finish second stage of 4 point FFT
|
||||
|
||||
VHSUB qZ0,qY2,qY1
|
||||
VHADD qZ2,qY2,qY1
|
||||
VLD4 {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
|
||||
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VHADD dZr3,dYr0,dYi3 @// y3 = u0-ju3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VHSUB dZi3,dYi0,dYr3
|
||||
|
||||
VHSUB dZr1,dYr0,dYi3 @// y1 = u0+ju3
|
||||
VHADD dZi1,dYi0,dYr3
|
||||
VST2 {dZr3,dZi3},[pDst :128],outPointStep
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VST2 {dZr1,dZi1},[pDst :128],dstStep @// dstStep = -3*outPointStep + 16
|
||||
|
||||
.ELSE
|
||||
|
||||
VHSUB dZr1,dYr0,dYi3 @// y1 = u0+ju3
|
||||
VHADD dZi1,dYi0,dYr3
|
||||
|
||||
VHADD dZr3,dYr0,dYi3 @// y3 = u0-ju3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VHSUB dZi3,dYi0,dYr3
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VST2 {dZr3,dZi3},[pDst :128],dstStep @// dstStep = -3*outPointStep + 16
|
||||
|
||||
.ENDIF
|
||||
|
||||
.ELSE
|
||||
|
||||
@// finish first stage of 4 point FFT
|
||||
|
||||
VADD qY0,qX0,qZ2
|
||||
VSUB qY2,qX0,qZ2
|
||||
VADD qY1,qZ1,qZ3
|
||||
VLD4 {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
|
||||
|
||||
VSUB qY3,qZ1,qZ3
|
||||
|
||||
@// finish second stage of 4 point FFT
|
||||
|
||||
VSUB qZ0,qY2,qY1
|
||||
VADD qZ2,qY2,qY1
|
||||
VLD4 {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
|
||||
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VADD dZr3,dYr0,dYi3 @// y3 = u0-ju3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VSUB dZi3,dYi0,dYr3
|
||||
|
||||
VSUB dZr1,dYr0,dYi3 @// y1 = u0+ju3
|
||||
VADD dZi1,dYi0,dYr3
|
||||
VST2 {dZr3,dZi3},[pDst :128],outPointStep
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VST2 {dZr1,dZi1},[pDst :128],dstStep @// dstStep = -3*outPointStep + 16
|
||||
|
||||
.ELSE
|
||||
|
||||
VSUB dZr1,dYr0,dYi3 @// y1 = u0+ju3
|
||||
VADD dZi1,dYi0,dYr3
|
||||
|
||||
VADD dZr3,dYr0,dYi3 @// y3 = u0-ju3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VSUB dZi3,dYi0,dYr3
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VST2 {dZr3,dZi3},[pDst :128],dstStep @// dstStep = -3*outPointStep + 16
|
||||
|
||||
.ENDIF
|
||||
|
||||
|
||||
|
||||
|
||||
.ENDIF
|
||||
|
||||
BGT grpLoop\name
|
||||
|
||||
|
||||
@// Reset and Swap pSrc and pDst for the next stage
|
||||
MOV pTmp,pDst
|
||||
SUB pSrc,pSrc,#64 @// Extra increment currently done in the loop
|
||||
SUB pDst,pSrc,outPointStep,LSL #2 @// pDst -= size; pSrc -= 4*size bytes
|
||||
SUB pSrc,pTmp,outPointStep
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",FWD
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",INV
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","FALSE",FWDSFS
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","TRUE",INVSFS
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.END
|
400
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S
Normal file
400
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S
Normal file
@ -0,0 +1,400 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
|
||||
@//
|
||||
@//
|
||||
@// File Name: armSP_FFT_CToC_SC16_Radix4_unsafe_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 7761
|
||||
@// Last Modified Date: Wed, 26 Sep 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute a Radix 4 FFT stage for a N point complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define grpCount r3
|
||||
#define pointStep r4
|
||||
#define outPointStep r5
|
||||
#define stepTwiddle r12
|
||||
#define setCount r14
|
||||
#define srcStep r8
|
||||
#define setStep r9
|
||||
#define dstStep r10
|
||||
#define twStep r11
|
||||
#define t1 r3
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dW1 D0.S16
|
||||
#define dW2 D1.S16
|
||||
#define dW3 D2.S16
|
||||
|
||||
#define dXr0 D4.S16
|
||||
#define dXi0 D5.S16
|
||||
#define dXr1 D6.S16
|
||||
#define dXi1 D7.S16
|
||||
#define dXr2 D8.S16
|
||||
#define dXi2 D9.S16
|
||||
#define dXr3 D10.S16
|
||||
#define dXi3 D11.S16
|
||||
#define dYr0 D12.S16
|
||||
#define dYi0 D13.S16
|
||||
#define dYr1 D14.S16
|
||||
#define dYi1 D15.S16
|
||||
#define dYr2 D16.S16
|
||||
#define dYi2 D17.S16
|
||||
#define dYr3 D18.S16
|
||||
#define dYi3 D19.S16
|
||||
#define qT0 Q8.S32
|
||||
#define qT1 Q9.S32
|
||||
#define qT2 Q6.S32
|
||||
#define qT3 Q7.S32
|
||||
|
||||
#define dZr0 D20.S16
|
||||
#define dZi0 D21.S16
|
||||
#define dZr1 D22.S16
|
||||
#define dZi1 D23.S16
|
||||
#define dZr2 D24.S16
|
||||
#define dZi2 D25.S16
|
||||
#define dZr3 D26.S16
|
||||
#define dZi3 D27.S16
|
||||
#define qY0 Q6.S16
|
||||
#define qY1 Q7.S16
|
||||
#define qY2 Q8.S16
|
||||
#define qY3 Q9.S16
|
||||
#define qX0 Q2.S16
|
||||
#define qZ0 Q10.S16
|
||||
#define qZ1 Q11.S16
|
||||
#define qZ2 Q12.S16
|
||||
#define qZ3 Q13.S16
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse , name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
|
||||
@// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
|
||||
|
||||
LSL grpCount,subFFTSize,#2
|
||||
LSR subFFTNum,subFFTNum,#2
|
||||
MOV subFFTSize,grpCount
|
||||
|
||||
|
||||
@// pOut0+1 increments pOut0 by 4 bytes
|
||||
@// pOut0+outPointStep == increment of 4*outPointStep bytes = size bytes
|
||||
|
||||
MOV stepTwiddle,#0
|
||||
SMULBB outPointStep,grpCount,subFFTNum
|
||||
|
||||
@// pT0+1 increments pT0 by 4 bytes
|
||||
@// pT0+pointStep = increment of 4*pointStep bytes = grpSize bytes
|
||||
|
||||
LSL pointStep,subFFTNum,#2 @// 2*grpSize
|
||||
|
||||
VLD1 dW1,[pTwiddle :64] @//[wi | wr]
|
||||
MOV srcStep,pointStep,LSL #1 @// srcStep = 2*pointStep
|
||||
VLD1 dW2,[pTwiddle :64] @//[wi | wr]
|
||||
ADD setStep,srcStep,pointStep @// setStep = 3*pointStep
|
||||
SUB srcStep,srcStep,#16 @// srcStep = 2*pointStep-16
|
||||
VLD1 dW3,[pTwiddle :64]
|
||||
@//RSB setStep,setStep,#16 @// setStep = - 3*pointStep+16
|
||||
RSB setStep,setStep,#0 @// setStep = - 3*pointStep
|
||||
|
||||
MOV dstStep,outPointStep,LSL #1
|
||||
ADD dstStep,dstStep,outPointStep @// dstStep = 3*outPointStep
|
||||
RSB dstStep,dstStep,#16 @// dstStep = - 3*outPointStep+16
|
||||
|
||||
|
||||
|
||||
grpLoop\name:
|
||||
|
||||
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
|
||||
ADD stepTwiddle,stepTwiddle,pointStep
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
|
||||
ADD pTwiddle,pTwiddle,stepTwiddle @// set pTwiddle to the first point
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
|
||||
MOV twStep,stepTwiddle,LSL #2
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & reset pSrc
|
||||
|
||||
SUB twStep,stepTwiddle,twStep @// twStep = -3*stepTwiddle
|
||||
|
||||
|
||||
MOV setCount,pointStep,LSR #2
|
||||
ADD pSrc,pSrc,#16 @// set pSrc to data[0] of the next set
|
||||
ADD pSrc,pSrc,pointStep @// increment to data[1] of the next set
|
||||
|
||||
@// Loop on the sets : 4 at a time
|
||||
|
||||
setLoop\name:
|
||||
|
||||
SUBS setCount,setCount,#4 @// decrement the loop counter
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMULL qT0,dXr1,dW1[0]
|
||||
VMLAL qT0,dXi1,dW1[1] @// real part
|
||||
VMULL qT1,dXi1,dW1[0]
|
||||
VMLSL qT1,dXr1,dW1[1] @// imag part
|
||||
|
||||
.ELSE
|
||||
VMULL qT0,dXr1,dW1[0]
|
||||
VMLSL qT0,dXi1,dW1[1] @// real part
|
||||
VMULL qT1,dXi1,dW1[0]
|
||||
VMLAL qT1,dXr1,dW1[1] @// imag part
|
||||
|
||||
.ENDIF
|
||||
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMULL qT2,dXr2,dW2[0]
|
||||
VMLAL qT2,dXi2,dW2[1] @// real part
|
||||
VMULL qT3,dXi2,dW2[0]
|
||||
VMLSL qT3,dXr2,dW2[1] @// imag part
|
||||
|
||||
.ELSE
|
||||
VMULL qT2,dXr2,dW2[0]
|
||||
VMLSL qT2,dXi2,dW2[1] @// real part
|
||||
VMULL qT3,dXi2,dW2[0]
|
||||
VMLAL qT3,dXr2,dW2[1] @// imag part
|
||||
|
||||
.ENDIF
|
||||
|
||||
VRSHRN dZr1,qT0,#15
|
||||
VRSHRN dZi1,qT1,#15
|
||||
|
||||
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMULL qT0,dXr3,dW3[0]
|
||||
VMLAL qT0,dXi3,dW3[1] @// real part
|
||||
VMULL qT1,dXi3,dW3[0]
|
||||
VMLSL qT1,dXr3,dW3[1] @// imag part
|
||||
|
||||
.ELSE
|
||||
VMULL qT0,dXr3,dW3[0]
|
||||
VMLSL qT0,dXi3,dW3[1] @// real part
|
||||
VMULL qT1,dXi3,dW3[0]
|
||||
VMLAL qT1,dXr3,dW3[1] @// imag part
|
||||
|
||||
.ENDIF
|
||||
|
||||
VRSHRN dZr2,qT2,#15
|
||||
VRSHRN dZi2,qT3,#15
|
||||
|
||||
|
||||
VRSHRN dZr3,qT0,#15
|
||||
VRSHRN dZi3,qT1,#15
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
|
||||
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
|
||||
@// finish first stage of 4 point FFT
|
||||
VHADD qY0,qX0,qZ2
|
||||
VHSUB qY2,qX0,qZ2
|
||||
|
||||
VLD2 {dXr0,dXi0},[pSrc :128]! @// data[0]
|
||||
VHADD qY1,qZ1,qZ3
|
||||
VHSUB qY3,qZ1,qZ3
|
||||
|
||||
|
||||
@// finish second stage of 4 point FFT
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VHSUB qZ0,qY2,qY1
|
||||
|
||||
VHADD dZr2,dYr0,dYi3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VHSUB dZi2,dYi0,dYr3
|
||||
|
||||
VHADD qZ1,qY2,qY1
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
|
||||
VHSUB dZr3,dYr0,dYi3
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
VHADD dZi3,dYi0,dYr3
|
||||
VST2 {dZr3,dZi3},[pDst :128],dstStep
|
||||
|
||||
|
||||
.ELSE
|
||||
|
||||
VHSUB qZ0,qY2,qY1
|
||||
|
||||
VHSUB dZr3,dYr0,dYi3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VHADD dZi3,dYi0,dYr3
|
||||
|
||||
VHADD qZ1,qY2,qY1
|
||||
VST2 {dZr3,dZi3},[pDst :128],outPointStep
|
||||
|
||||
VHADD dZr2,dYr0,dYi3
|
||||
VHSUB dZi2,dYi0,dYr3
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
VST2 {dZr2,dZi2},[pDst :128],dstStep
|
||||
|
||||
|
||||
.ENDIF
|
||||
|
||||
|
||||
.ELSE
|
||||
|
||||
@// finish first stage of 4 point FFT
|
||||
VADD qY0,qX0,qZ2
|
||||
VSUB qY2,qX0,qZ2
|
||||
|
||||
VLD2 {dXr0,dXi0},[pSrc]! @// data[0]
|
||||
VADD qY1,qZ1,qZ3
|
||||
VSUB qY3,qZ1,qZ3
|
||||
|
||||
|
||||
@// finish second stage of 4 point FFT
|
||||
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VSUB qZ0,qY2,qY1
|
||||
|
||||
VADD dZr2,dYr0,dYi3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VSUB dZi2,dYi0,dYr3
|
||||
|
||||
VADD qZ1,qY2,qY1
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
|
||||
VSUB dZr3,dYr0,dYi3
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
VADD dZi3,dYi0,dYr3
|
||||
VST2 {dZr3,dZi3},[pDst :128],dstStep
|
||||
|
||||
|
||||
.ELSE
|
||||
|
||||
VSUB qZ0,qY2,qY1
|
||||
|
||||
VSUB dZr3,dYr0,dYi3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VADD dZi3,dYi0,dYr3
|
||||
|
||||
VADD qZ1,qY2,qY1
|
||||
VST2 {dZr3,dZi3},[pDst :128],outPointStep
|
||||
|
||||
VADD dZr2,dYr0,dYi3
|
||||
VSUB dZi2,dYi0,dYr3
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
VST2 {dZr2,dZi2},[pDst :128],dstStep
|
||||
|
||||
|
||||
.ENDIF
|
||||
|
||||
|
||||
|
||||
.ENDIF
|
||||
|
||||
ADD pSrc,pSrc,pointStep @// increment to data[1] of the next set
|
||||
BGT setLoop\name
|
||||
|
||||
VLD1 dW1,[pTwiddle :64],stepTwiddle @//[wi | wr]
|
||||
SUBS grpCount,grpCount,#4 @// subtract 4 since grpCount multiplied by 4
|
||||
VLD1 dW2,[pTwiddle :64],stepTwiddle @//[wi | wr]
|
||||
ADD pSrc,pSrc,srcStep @// increment pSrc for the next grp
|
||||
VLD1 dW3,[pTwiddle :64],twStep @//[wi | wr]
|
||||
|
||||
|
||||
|
||||
BGT grpLoop\name
|
||||
|
||||
|
||||
@// Reset and Swap pSrc and pDst for the next stage
|
||||
MOV t1,pDst
|
||||
SUB pDst,pSrc,outPointStep,LSL #2 @// pDst -= size; pSrc -= 4*size bytes
|
||||
SUB pSrc,t1,outPointStep
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",FWD
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",INV
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","FALSE",FWDSFS
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","TRUE",INVSFS
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.END
|
@ -0,0 +1,619 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
|
||||
@//
|
||||
@//
|
||||
@// File Name: armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 7766
|
||||
@// Last Modified Date: Thu, 27 Sep 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute a first stage Radix 8 FFT stage for a N point complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
@// dest buffer for the next stage (not pSrc for first stage)
|
||||
#define pPingPongBuf r5
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define grpSize r3
|
||||
@// Reuse grpSize as setCount
|
||||
#define setCount r3
|
||||
#define pointStep r4
|
||||
#define outPointStep r4
|
||||
#define setStep r8
|
||||
#define step1 r9
|
||||
#define step2 r10
|
||||
#define t0 r11
|
||||
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dXr0 D14.S16
|
||||
#define dXi0 D15.S16
|
||||
#define dXr1 D2.S16
|
||||
#define dXi1 D3.S16
|
||||
#define dXr2 D4.S16
|
||||
#define dXi2 D5.S16
|
||||
#define dXr3 D6.S16
|
||||
#define dXi3 D7.S16
|
||||
#define dXr4 D8.S16
|
||||
#define dXi4 D9.S16
|
||||
#define dXr5 D10.S16
|
||||
#define dXi5 D11.S16
|
||||
#define dXr6 D12.S16
|
||||
#define dXi6 D13.S16
|
||||
#define dXr7 D0.S16
|
||||
#define dXi7 D1.S16
|
||||
#define qX0 Q7.S16
|
||||
#define qX1 Q1.S16
|
||||
#define qX2 Q2.S16
|
||||
#define qX3 Q3.S16
|
||||
#define qX4 Q4.S16
|
||||
#define qX5 Q5.S16
|
||||
#define qX6 Q6.S16
|
||||
#define qX7 Q0.S16
|
||||
|
||||
#define dUr0 D16.S16
|
||||
#define dUi0 D17.S16
|
||||
#define dUr2 D18.S16
|
||||
#define dUi2 D19.S16
|
||||
#define dUr4 D20.S16
|
||||
#define dUi4 D21.S16
|
||||
#define dUr6 D22.S16
|
||||
#define dUi6 D23.S16
|
||||
#define dUr1 D24.S16
|
||||
#define dUi1 D25.S16
|
||||
#define dUr3 D26.S16
|
||||
#define dUi3 D27.S16
|
||||
#define dUr5 D28.S16
|
||||
#define dUi5 D29.S16
|
||||
@// reuse dXr7 and dXi7
|
||||
#define dUr7 D30.S16
|
||||
#define dUi7 D31.S16
|
||||
#define qU0 Q8.S16
|
||||
#define qU1 Q12.S16
|
||||
#define qU2 Q9.S16
|
||||
#define qU3 Q13.S16
|
||||
#define qU4 Q10.S16
|
||||
#define qU5 Q14.S16
|
||||
#define qU6 Q11.S16
|
||||
#define qU7 Q15.S16
|
||||
|
||||
|
||||
|
||||
#define dVr0 D24.S16
|
||||
#define dVi0 D25.S16
|
||||
#define dVr2 D26.S16
|
||||
#define dVi2 D27.S16
|
||||
#define dVr4 D28.S16
|
||||
#define dVi4 D29.S16
|
||||
#define dVr6 D30.S16
|
||||
#define dVi6 D31.S16
|
||||
#define dVr1 D16.S16
|
||||
#define dVi1 D17.S16
|
||||
#define dVr3 D18.S16
|
||||
#define dVi3 D19.S16
|
||||
#define dVr5 D20.S16
|
||||
#define dVi5 D21.S16
|
||||
@// reuse dUi7
|
||||
#define dVr7 D22.S16
|
||||
@// reuse dUr7
|
||||
#define dVi7 D23.S16
|
||||
#define qV0 Q12.S16
|
||||
#define qV1 Q8.S16
|
||||
#define qV2 Q13.S16
|
||||
#define qV3 Q9.S16
|
||||
#define qV4 Q14.S16
|
||||
#define qV5 Q10.S16
|
||||
#define qV6 Q15.S16
|
||||
#define qV7 Q11.S16
|
||||
|
||||
|
||||
|
||||
#define dYr0 D16.S16
|
||||
#define dYi0 D17.S16
|
||||
#define dYr2 D18.S16
|
||||
#define dYi2 D19.S16
|
||||
#define dYr4 D20.S16
|
||||
#define dYi4 D21.S16
|
||||
#define dYr6 D22.S16
|
||||
#define dYi6 D23.S16
|
||||
#define dYr1 D24.S16
|
||||
#define dYi1 D25.S16
|
||||
#define dYr3 D26.S16
|
||||
#define dYi3 D27.S16
|
||||
#define dYr5 D28.S16
|
||||
#define dYi5 D29.S16
|
||||
@// reuse dYr4 and dYi4
|
||||
#define dYr7 D30.S16
|
||||
#define dYi7 D31.S16
|
||||
#define qY0 Q8.S16
|
||||
#define qY1 Q12.S16
|
||||
#define qY2 Q9.S16
|
||||
#define qY3 Q13.S16
|
||||
#define qY4 Q10.S16
|
||||
#define qY5 Q14.S16
|
||||
#define qY6 Q11.S16
|
||||
#define qY7 Q15.S16
|
||||
|
||||
|
||||
#define dT0 D0.S16
|
||||
#define dT1 D1.S16
|
||||
|
||||
|
||||
@// Define constants
|
||||
.set ONEBYSQRT2, 0x00005A82 @// Q15 format
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse , name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// Update pSubFFTSize and pSubFFTNum regs
|
||||
MOV subFFTSize,#8 @// subFFTSize = 1 for the first stage
|
||||
LDR t0,=ONEBYSQRT2 @// t0=(1/sqrt(2)) as Q15 format
|
||||
|
||||
@// Note: setCount = subFFTNum/8 (reuse the grpSize reg for setCount)
|
||||
LSR grpSize,subFFTNum,#3
|
||||
MOV subFFTNum,grpSize
|
||||
|
||||
|
||||
@// pT0+1 increments pT0 by 4 bytes
|
||||
@// pT0+pointStep = increment of 4*pointStep bytes = grpSize/2 bytes
|
||||
@// Note: outPointStep = pointStep for firststage
|
||||
|
||||
MOV pointStep,grpSize,LSL #2
|
||||
|
||||
|
||||
@// Calculate the step of input data for the next set
|
||||
@//MOV step1,pointStep,LSL #1 @// step1 = 2*pointStep
|
||||
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
|
||||
MOV step1,grpSize,LSL #3
|
||||
|
||||
MOV step2,pointStep,LSL #3
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
|
||||
SUB step2,step2,pointStep @// step2 = 7*pointStep
|
||||
RSB setStep,step2,#16 @// setStep = - 7*pointStep+16
|
||||
|
||||
|
||||
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
|
||||
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
|
||||
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
|
||||
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
|
||||
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] & update pSrc for the next set
|
||||
@// setStep = -7*pointStep + 16
|
||||
@// grp = 0 a special case since all the twiddle factors are 1
|
||||
@// Loop on the sets : 4 sets at a time
|
||||
|
||||
grpZeroSetLoop\name:
|
||||
|
||||
@// Decrement setcount
|
||||
SUBS setCount,setCount,#4 @// decrement the set loop counter
|
||||
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
@// finish first stage of 8 point FFT
|
||||
|
||||
VHADD qU0,qX0,qX4
|
||||
VHADD qU2,qX1,qX5
|
||||
VHADD qU4,qX2,qX6
|
||||
VHADD qU6,qX3,qX7
|
||||
|
||||
@// finish second stage of 8 point FFT
|
||||
|
||||
VHADD qV0,qU0,qU4
|
||||
VHSUB qV2,qU0,qU4
|
||||
VHADD qV4,qU2,qU6
|
||||
VHSUB qV6,qU2,qU6
|
||||
|
||||
@// finish third stage of 8 point FFT
|
||||
|
||||
VHADD qY0,qV0,qV4
|
||||
VHSUB qY4,qV0,qV4
|
||||
VST2 {dYr0,dYi0},[pDst :128],step1 @// store y0
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VHSUB dYr2,dVr2,dVi6
|
||||
VHADD dYi2,dVi2,dVr6
|
||||
|
||||
VHADD dYr6,dVr2,dVi6
|
||||
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y2
|
||||
VHSUB dYi6,dVi2,dVr6
|
||||
|
||||
VHSUB qU1,qX0,qX4
|
||||
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
|
||||
|
||||
VHSUB qU3,qX1,qX5
|
||||
VHSUB qU5,qX2,qX6
|
||||
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y6
|
||||
|
||||
.ELSE
|
||||
|
||||
VHADD dYr6,dVr2,dVi6
|
||||
VHSUB dYi6,dVi2,dVr6
|
||||
|
||||
VHSUB dYr2,dVr2,dVi6
|
||||
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y2
|
||||
VHADD dYi2,dVi2,dVr6
|
||||
|
||||
|
||||
VHSUB qU1,qX0,qX4
|
||||
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
|
||||
VHSUB qU3,qX1,qX5
|
||||
VHSUB qU5,qX2,qX6
|
||||
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y6
|
||||
|
||||
|
||||
.ENDIF
|
||||
|
||||
@// finish first stage of 8 point FFT
|
||||
|
||||
VHSUB qU7,qX3,qX7
|
||||
VMOV dT0[0],t0
|
||||
|
||||
@// finish second stage of 8 point FFT
|
||||
|
||||
VHSUB dVr1,dUr1,dUi5
|
||||
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration
|
||||
VHADD dVi1,dUi1,dUr5
|
||||
VHADD dVr3,dUr1,dUi5
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
|
||||
VHSUB dVi3,dUi1,dUr5
|
||||
|
||||
VHSUB dVr5,dUr3,dUi7
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
|
||||
VHADD dVi5,dUi3,dUr7
|
||||
VHADD dVr7,dUr3,dUi7
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
|
||||
VHSUB dVi7,dUi3,dUr7
|
||||
|
||||
@// finish third stage of 8 point FFT
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
@// calculate a*v5
|
||||
VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
|
||||
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
|
||||
VQRDMULH dVi5,dVi5,dT0[0]
|
||||
|
||||
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
|
||||
VSUB dVr5,dT1,dVi5 @// a * V5
|
||||
VADD dVi5,dT1,dVi5
|
||||
|
||||
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
|
||||
|
||||
@// calculate b*v7
|
||||
VQRDMULH dT1,dVr7,dT0[0]
|
||||
VQRDMULH dVi7,dVi7,dT0[0]
|
||||
|
||||
VHADD qY1,qV1,qV5
|
||||
VHSUB qY5,qV1,qV5
|
||||
|
||||
|
||||
VADD dVr7,dT1,dVi7 @// b * V7
|
||||
VSUB dVi7,dVi7,dT1
|
||||
SUB pDst, pDst, step2 @// set pDst to y1
|
||||
|
||||
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
|
||||
|
||||
|
||||
VHSUB dYr3,dVr3,dVr7
|
||||
VHSUB dYi3,dVi3,dVi7
|
||||
VST2 {dYr1,dYi1},[pDst :128],step1 @// store y1
|
||||
VHADD dYr7,dVr3,dVr7
|
||||
VHADD dYi7,dVi3,dVi7
|
||||
|
||||
|
||||
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y3
|
||||
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y5
|
||||
#if 0
|
||||
VST2 {dYr7,dYi7},[pDst :128],#16 @// store y7
|
||||
#else
|
||||
VST2 {dYr7,dYi7},[pDst :128]! @// store y7
|
||||
#endif
|
||||
.ELSE
|
||||
|
||||
@// calculate b*v7
|
||||
VQRDMULH dT1,dVr7,dT0[0]
|
||||
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
|
||||
VQRDMULH dVi7,dVi7,dT0[0]
|
||||
|
||||
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
|
||||
VADD dVr7,dT1,dVi7 @// b * V7
|
||||
VSUB dVi7,dVi7,dT1
|
||||
|
||||
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
|
||||
|
||||
@// calculate a*v5
|
||||
VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
|
||||
VQRDMULH dVi5,dVi5,dT0[0]
|
||||
|
||||
VHADD dYr7,dVr3,dVr7
|
||||
VHADD dYi7,dVi3,dVi7
|
||||
SUB pDst, pDst, step2 @// set pDst to y1
|
||||
|
||||
VSUB dVr5,dT1,dVi5 @// a * V5
|
||||
VADD dVi5,dT1,dVi5
|
||||
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
|
||||
|
||||
VHSUB qY5,qV1,qV5
|
||||
|
||||
VHSUB dYr3,dVr3,dVr7
|
||||
VST2 {dYr7,dYi7},[pDst :128],step1 @// store y1
|
||||
VHSUB dYi3,dVi3,dVi7
|
||||
VHADD qY1,qV1,qV5
|
||||
|
||||
|
||||
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y3
|
||||
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y5
|
||||
#if 0
|
||||
VST2 {dYr1,dYi1},[pDst :128],#16 @// store y7
|
||||
#else
|
||||
VST2 {dYr1,dYi1},[pDst :128]! @// store y7
|
||||
#endif
|
||||
|
||||
.ENDIF
|
||||
|
||||
|
||||
|
||||
.ELSE
|
||||
@// finish first stage of 8 point FFT
|
||||
|
||||
VADD qU0,qX0,qX4
|
||||
VADD qU2,qX1,qX5
|
||||
VADD qU4,qX2,qX6
|
||||
VADD qU6,qX3,qX7
|
||||
|
||||
@// finish second stage of 8 point FFT
|
||||
|
||||
VADD qV0,qU0,qU4
|
||||
VSUB qV2,qU0,qU4
|
||||
VADD qV4,qU2,qU6
|
||||
VSUB qV6,qU2,qU6
|
||||
|
||||
@// finish third stage of 8 point FFT
|
||||
|
||||
VADD qY0,qV0,qV4
|
||||
VSUB qY4,qV0,qV4
|
||||
VST2 {dYr0,dYi0},[pDst :128],step1 @// store y0
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VSUB dYr2,dVr2,dVi6
|
||||
VADD dYi2,dVi2,dVr6
|
||||
|
||||
VADD dYr6,dVr2,dVi6
|
||||
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y2
|
||||
VSUB dYi6,dVi2,dVr6
|
||||
|
||||
VSUB qU1,qX0,qX4
|
||||
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
|
||||
|
||||
VSUB qU3,qX1,qX5
|
||||
VSUB qU5,qX2,qX6
|
||||
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y6
|
||||
|
||||
.ELSE
|
||||
|
||||
VADD dYr6,dVr2,dVi6
|
||||
VSUB dYi6,dVi2,dVr6
|
||||
|
||||
VSUB dYr2,dVr2,dVi6
|
||||
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y2
|
||||
VADD dYi2,dVi2,dVr6
|
||||
|
||||
|
||||
VSUB qU1,qX0,qX4
|
||||
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
|
||||
VSUB qU3,qX1,qX5
|
||||
VSUB qU5,qX2,qX6
|
||||
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y6
|
||||
|
||||
|
||||
.ENDIF
|
||||
|
||||
@// finish first stage of 8 point FFT
|
||||
|
||||
VSUB qU7,qX3,qX7
|
||||
VMOV dT0[0],t0
|
||||
|
||||
@// finish second stage of 8 point FFT
|
||||
|
||||
VSUB dVr1,dUr1,dUi5
|
||||
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration
|
||||
VADD dVi1,dUi1,dUr5
|
||||
VADD dVr3,dUr1,dUi5
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
|
||||
VSUB dVi3,dUi1,dUr5
|
||||
|
||||
VSUB dVr5,dUr3,dUi7
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
|
||||
VADD dVi5,dUi3,dUr7
|
||||
VADD dVr7,dUr3,dUi7
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
|
||||
VSUB dVi7,dUi3,dUr7
|
||||
|
||||
@// finish third stage of 8 point FFT
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
@// calculate a*v5
|
||||
VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
|
||||
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
|
||||
VQRDMULH dVi5,dVi5,dT0[0]
|
||||
|
||||
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
|
||||
VSUB dVr5,dT1,dVi5 @// a * V5
|
||||
VADD dVi5,dT1,dVi5
|
||||
|
||||
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
|
||||
|
||||
@// calculate b*v7
|
||||
VQRDMULH dT1,dVr7,dT0[0]
|
||||
VQRDMULH dVi7,dVi7,dT0[0]
|
||||
|
||||
VADD qY1,qV1,qV5
|
||||
VSUB qY5,qV1,qV5
|
||||
|
||||
|
||||
VADD dVr7,dT1,dVi7 @// b * V7
|
||||
VSUB dVi7,dVi7,dT1
|
||||
SUB pDst, pDst, step2 @// set pDst to y1
|
||||
|
||||
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
|
||||
|
||||
|
||||
VSUB dYr3,dVr3,dVr7
|
||||
VSUB dYi3,dVi3,dVi7
|
||||
VST2 {dYr1,dYi1},[pDst :128],step1 @// store y1
|
||||
VADD dYr7,dVr3,dVr7
|
||||
VADD dYi7,dVi3,dVi7
|
||||
|
||||
|
||||
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y3
|
||||
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y5
|
||||
#if 0
|
||||
VST2 {dYr7,dYi7},[pDst :128],#16 @// store y7
|
||||
#else
|
||||
VST2 {dYr7,dYi7},[pDst :128]! @// store y7
|
||||
#endif
|
||||
.ELSE
|
||||
|
||||
@// calculate b*v7
|
||||
VQRDMULH dT1,dVr7,dT0[0]
|
||||
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
|
||||
VQRDMULH dVi7,dVi7,dT0[0]
|
||||
|
||||
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
|
||||
VADD dVr7,dT1,dVi7 @// b * V7
|
||||
VSUB dVi7,dVi7,dT1
|
||||
|
||||
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
|
||||
|
||||
@// calculate a*v5
|
||||
VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
|
||||
VQRDMULH dVi5,dVi5,dT0[0]
|
||||
|
||||
VADD dYr7,dVr3,dVr7
|
||||
VADD dYi7,dVi3,dVi7
|
||||
SUB pDst, pDst, step2 @// set pDst to y1
|
||||
|
||||
VSUB dVr5,dT1,dVi5 @// a * V5
|
||||
VADD dVi5,dT1,dVi5
|
||||
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
|
||||
|
||||
VSUB qY5,qV1,qV5
|
||||
|
||||
VSUB dYr3,dVr3,dVr7
|
||||
VST2 {dYr7,dYi7},[pDst :128],step1 @// store y1
|
||||
VSUB dYi3,dVi3,dVi7
|
||||
VADD qY1,qV1,qV5
|
||||
|
||||
|
||||
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y3
|
||||
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y5
|
||||
#if 0
|
||||
VST2 {dYr1,dYi1},[pDst :128],#16 @// store y7
|
||||
#else
|
||||
VST2 {dYr1,dYi1},[pDst :128]! @// store y7
|
||||
#endif
|
||||
|
||||
.ENDIF
|
||||
|
||||
|
||||
.ENDIF
|
||||
|
||||
SUB pDst, pDst, step2 @// update pDst for the next set
|
||||
BGT grpZeroSetLoop\name
|
||||
|
||||
|
||||
@// reset pSrc to pDst for the next stage
|
||||
SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
|
||||
MOV pDst,pPingPongBuf
|
||||
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
@// Allocate stack memory required by the function
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",FWD
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",INV
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","FALSE",FWDSFS
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","TRUE",INVSFS
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.END
|
@ -0,0 +1,163 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
|
||||
@//
|
||||
@//
|
||||
@// File Name: armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 5995
|
||||
@// Last Modified Date: Fri, 08 Jun 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute the first stage of a Radix 2 DIT in-order out-of-place FFT
|
||||
@// stage for a N point complex signal.
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define pPingPongBuf r5
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define pointStep r3
|
||||
#define outPointStep r3
|
||||
#define grpSize r4
|
||||
#define setCount r4
|
||||
#define step r8
|
||||
#define dstStep r8
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dX0 D0.S32
|
||||
#define dX1 D1.S32
|
||||
#define dY0 D2.S32
|
||||
#define dY1 D3.S32
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse, name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
|
||||
@// update subFFTSize and subFFTNum into RN6 and RN7 for the next stage
|
||||
|
||||
|
||||
MOV subFFTSize,#2
|
||||
LSR grpSize,subFFTNum,#1
|
||||
MOV subFFTNum,grpSize
|
||||
|
||||
|
||||
@// pT0+1 increments pT0 by 8 bytes
|
||||
@// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes
|
||||
@// Note: outPointStep = pointStep for firststage
|
||||
@// Note: setCount = grpSize/2 (reuse the updated grpSize for setCount)
|
||||
|
||||
MOV pointStep,grpSize,LSL #3
|
||||
RSB step,pointStep,#8
|
||||
|
||||
|
||||
@// Loop on the sets for grp zero
|
||||
|
||||
grpZeroSetLoop\name :
|
||||
|
||||
VLD1 dX0,[pSrc],pointStep
|
||||
VLD1 dX1,[pSrc],step @// step = -pointStep + 8
|
||||
SUBS setCount,setCount,#1 @// decrement the loop counter
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
|
||||
VHADD dY0,dX0,dX1
|
||||
VHSUB dY1,dX0,dX1
|
||||
|
||||
.ELSE
|
||||
|
||||
VADD dY0,dX0,dX1
|
||||
VSUB dY1,dX0,dX1
|
||||
|
||||
|
||||
.ENDIF
|
||||
|
||||
VST1 dY0,[pDst],outPointStep
|
||||
VST1 dY1,[pDst],dstStep @// dstStep = step = -pointStep + 8
|
||||
|
||||
BGT grpZeroSetLoop\name
|
||||
|
||||
|
||||
@// reset pSrc to pDst for the next stage
|
||||
SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
|
||||
MOV pDst,pPingPongBuf
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",fwd
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC32_Radix2_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",inv
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","FALSE",fwdsfs
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","TRUE",invsfs
|
||||
M_END
|
||||
|
||||
.end
|
@ -0,0 +1,184 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// File Name: armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 7493
|
||||
@// Last Modified Date: Mon, 24 Sep 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute the last stage of a Radix 2 DIT in-order out-of-place FFT
|
||||
@// stage for a N point complex signal.
|
||||
@//
|
||||
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
|
||||
#define outPointStep r3
|
||||
#define grpCount r4
|
||||
#define dstStep r5
|
||||
#define pTmp r4
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dWr D0.S32
|
||||
#define dWi d1.s32
|
||||
#define dXr0 d2.s32
|
||||
#define dXi0 d3.s32
|
||||
#define dXr1 d4.s32
|
||||
#define dXi1 d5.s32
|
||||
#define dYr0 d6.s32
|
||||
#define dYi0 d7.s32
|
||||
#define dYr1 d8.s32
|
||||
#define dYi1 d9.s32
|
||||
#define qT0 q5.s64
|
||||
#define qT1 q6.s64
|
||||
|
||||
.macro FFTSTAGE scaled, inverse, name
|
||||
|
||||
|
||||
MOV outPointStep,subFFTSize,LSL #3
|
||||
@// Update grpCount and grpSize rightaway
|
||||
|
||||
MOV subFFTNum,#1 @//after the last stage
|
||||
LSL grpCount,subFFTSize,#1
|
||||
|
||||
@// update subFFTSize for the next stage
|
||||
MOV subFFTSize,grpCount
|
||||
|
||||
RSB dstStep,outPointStep,#16
|
||||
|
||||
|
||||
@// Loop on 2 grps at a time for the last stage
|
||||
|
||||
grpLoop\name :
|
||||
VLD2 {dWr,dWi},[pTwiddle :64]!
|
||||
|
||||
VLD4 {dXr0,dXi0,dXr1,dXi1},[pSrc :128]!
|
||||
SUBS grpCount,grpCount,#4 @// grpCount is multiplied by 2
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMULL qT0,dWr,dXr1
|
||||
VMLAL qT0,dWi,dXi1 @// real part
|
||||
VMULL qT1,dWr,dXi1
|
||||
VMLSL qT1,dWi,dXr1 @// imag part
|
||||
|
||||
.else
|
||||
|
||||
VMULL qT0,dWr,dXr1
|
||||
VMLSL qT0,dWi,dXi1 @// real part
|
||||
VMULL qT1,dWr,dXi1
|
||||
VMLAL qT1,dWi,dXr1 @// imag part
|
||||
|
||||
.endif
|
||||
|
||||
VRSHRN dXr1,qT0,#31
|
||||
VRSHRN dXi1,qT1,#31
|
||||
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
|
||||
VHSUB dYr0,dXr0,dXr1
|
||||
VHSUB dYi0,dXi0,dXi1
|
||||
VHADD dYr1,dXr0,dXr1
|
||||
VHADD dYi1,dXi0,dXi1
|
||||
|
||||
.else
|
||||
|
||||
VSUB dYr0,dXr0,dXr1
|
||||
VSUB dYi0,dXi0,dXi1
|
||||
VADD dYr1,dXr0,dXr1
|
||||
VADD dYi1,dXi0,dXi1
|
||||
|
||||
|
||||
.endif
|
||||
|
||||
VST2 {dYr0,dYi0},[pDst],outPointStep
|
||||
VST2 {dYr1,dYi1},[pDst],dstStep @// dstStep = step = -outPointStep + 16
|
||||
|
||||
bgt grpLoop\name
|
||||
|
||||
|
||||
@// Reset and Swap pSrc and pDst for the next stage
|
||||
MOV pTmp,pDst
|
||||
SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 4*size; pSrc -= 8*size bytes
|
||||
SUB pSrc,pTmp,outPointStep
|
||||
|
||||
@// Reset pTwiddle for the next stage
|
||||
SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 4*size bytes
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe,r4,""
|
||||
FFTSTAGE "FALSE","FALSE",fwd
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",inv
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","FALSE",fwdsfs
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","TRUE",invsfs
|
||||
M_END
|
||||
|
||||
.end
|
216
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix2_unsafe_s.S
Normal file
216
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix2_unsafe_s.S
Normal file
@ -0,0 +1,216 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// File Name: armSP_FFT_CToC_SC32_Radix2_unsafe_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 5638
|
||||
@// Last Modified Date: Wed, 06 Jun 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute a Radix 2 DIT in-order out-of-place FFT stage for a N point complex signal.
|
||||
@// This handle the general stage, not the first or last stage.
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define outPointStep r3
|
||||
#define pointStep r4
|
||||
#define grpCount r5
|
||||
#define setCount r8
|
||||
@//const RN 9
|
||||
#define step r10
|
||||
#define dstStep r11
|
||||
#define pTable r9
|
||||
#define pTmp r9
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dW D0.S32
|
||||
#define dX0 D2.S32
|
||||
#define dX1 D3.S32
|
||||
#define dX2 D4.S32
|
||||
#define dX3 D5.S32
|
||||
#define dY0 D6.S32
|
||||
#define dY1 D7.S32
|
||||
#define dY2 D8.S32
|
||||
#define dY3 D9.S32
|
||||
#define qT0 Q3.S64
|
||||
#define qT1 Q4.S64
|
||||
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse, name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
|
||||
@// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
|
||||
|
||||
LSR subFFTNum,subFFTNum,#1 @//grpSize
|
||||
LSL grpCount,subFFTSize,#1
|
||||
|
||||
|
||||
@// pT0+1 increments pT0 by 8 bytes
|
||||
@// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes
|
||||
MOV pointStep,subFFTNum,LSL #2
|
||||
|
||||
@// update subFFTSize for the next stage
|
||||
MOV subFFTSize,grpCount
|
||||
|
||||
@// pOut0+1 increments pOut0 by 8 bytes
|
||||
@// pOut0+outPointStep == increment of 8*outPointStep bytes = 4*size bytes
|
||||
SMULBB outPointStep,grpCount,pointStep
|
||||
LSL pointStep,pointStep,#1
|
||||
|
||||
|
||||
RSB step,pointStep,#16
|
||||
RSB dstStep,outPointStep,#16
|
||||
|
||||
@// Loop on the groups
|
||||
|
||||
grpLoop\name :
|
||||
MOV setCount,pointStep,LSR #3
|
||||
VLD1 dW,[pTwiddle],pointStep @//[wi | wr]
|
||||
|
||||
|
||||
@// Loop on the sets
|
||||
|
||||
|
||||
setLoop\name :
|
||||
|
||||
|
||||
VLD2 {dX0,dX1},[pSrc],pointStep @// point0: dX0-real part dX1-img part
|
||||
VLD2 {dX2,dX3},[pSrc],step @// point1: dX2-real part dX3-img part
|
||||
|
||||
SUBS setCount,setCount,#2
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMULL qT0,dX2,dW[0]
|
||||
VMLAL qT0,dX3,dW[1] @// real part
|
||||
VMULL qT1,dX3,dW[0]
|
||||
VMLSL qT1,dX2,dW[1] @// imag part
|
||||
|
||||
.else
|
||||
|
||||
VMULL qT0,dX2,dW[0]
|
||||
VMLSL qT0,dX3,dW[1] @// real part
|
||||
VMULL qT1,dX3,dW[0]
|
||||
VMLAL qT1,dX2,dW[1] @// imag part
|
||||
|
||||
.endif
|
||||
|
||||
VRSHRN dX2,qT0,#31
|
||||
VRSHRN dX3,qT1,#31
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
VHSUB dY0,dX0,dX2
|
||||
VHSUB dY1,dX1,dX3
|
||||
VHADD dY2,dX0,dX2
|
||||
VHADD dY3,dX1,dX3
|
||||
|
||||
.else
|
||||
VSUB dY0,dX0,dX2
|
||||
VSUB dY1,dX1,dX3
|
||||
VADD dY2,dX0,dX2
|
||||
VADD dY3,dX1,dX3
|
||||
|
||||
.endif
|
||||
|
||||
VST2 {dY0,dY1},[pDst],outPointStep
|
||||
VST2 {dY2,dY3},[pDst],dstStep @// dstStep = -outPointStep + 16
|
||||
|
||||
BGT setLoop\name
|
||||
|
||||
SUBS grpCount,grpCount,#2
|
||||
ADD pSrc,pSrc,pointStep
|
||||
BGT grpLoop\name
|
||||
|
||||
|
||||
@// Reset and Swap pSrc and pDst for the next stage
|
||||
MOV pTmp,pDst
|
||||
SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 4*size; pSrc -= 8*size bytes
|
||||
SUB pSrc,pTmp,outPointStep
|
||||
|
||||
@// Reset pTwiddle for the next stage
|
||||
SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 4*size bytes
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",FWD
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",INV
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","FALSE",FWDSFS
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","TRUE",INVSFS
|
||||
M_END
|
||||
|
||||
.end
|
@ -0,0 +1,320 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// File Name: armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 7767
|
||||
@// Last Modified Date: Thu, 27 Sep 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute a first stage Radix 4 FFT stage for a N point complex signal
|
||||
@//
|
||||
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define pPingPongBuf r5
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define grpSize r3
|
||||
@// Reuse grpSize as setCount
|
||||
#define setCount r3
|
||||
#define pointStep r4
|
||||
#define outPointStep r4
|
||||
#define setStep r8
|
||||
#define step1 r9
|
||||
#define step3 r10
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dXr0 D0.S32
|
||||
#define dXi0 D1.S32
|
||||
#define dXr1 D2.S32
|
||||
#define dXi1 D3.S32
|
||||
#define dXr2 D4.S32
|
||||
#define dXi2 D5.S32
|
||||
#define dXr3 D6.S32
|
||||
#define dXi3 D7.S32
|
||||
#define dYr0 D8.S32
|
||||
#define dYi0 D9.S32
|
||||
#define dYr1 D10.S32
|
||||
#define dYi1 D11.S32
|
||||
#define dYr2 D12.S32
|
||||
#define dYi2 D13.S32
|
||||
#define dYr3 D14.S32
|
||||
#define dYi3 D15.S32
|
||||
#define qX0 Q0.S32
|
||||
#define qX1 Q1.S32
|
||||
#define qX2 Q2.S32
|
||||
#define qX3 Q3.S32
|
||||
#define qY0 Q4.S32
|
||||
#define qY1 Q5.S32
|
||||
#define qY2 Q6.S32
|
||||
#define qY3 Q7.S32
|
||||
#define dZr0 D16.S32
|
||||
#define dZi0 D17.S32
|
||||
#define dZr1 D18.S32
|
||||
#define dZi1 D19.S32
|
||||
#define dZr2 D20.S32
|
||||
#define dZi2 D21.S32
|
||||
#define dZr3 D22.S32
|
||||
#define dZi3 D23.S32
|
||||
#define qZ0 Q8.S32
|
||||
#define qZ1 Q9.S32
|
||||
#define qZ2 Q10.S32
|
||||
#define qZ3 Q11.S32
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse, name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// pT0+1 increments pT0 by 8 bytes
|
||||
@// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
|
||||
@// Note: outPointStep = pointStep for firststage
|
||||
|
||||
MOV pointStep,subFFTNum,LSL #1
|
||||
|
||||
|
||||
@// Update pSubFFTSize and pSubFFTNum regs
|
||||
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
|
||||
MOV subFFTSize,#4 @// subFFTSize = 1 for the first stage
|
||||
|
||||
@// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
|
||||
LSR grpSize,subFFTNum,#2
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
|
||||
MOV subFFTNum,grpSize
|
||||
|
||||
|
||||
@// Calculate the step of input data for the next set
|
||||
@//MOV setStep,pointStep,LSL #1
|
||||
MOV setStep,grpSize,LSL #4
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
|
||||
ADD setStep,setStep,pointStep @// setStep = 3*pointStep
|
||||
RSB setStep,setStep,#16 @// setStep = - 3*pointStep+16
|
||||
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
|
||||
MOV step1,pointStep,LSL #1 @// step1 = 2*pointStep
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
VHADD qY0,qX0,qX2
|
||||
.else
|
||||
VADD qY0,qX0,qX2
|
||||
.endif
|
||||
|
||||
RSB step3,pointStep,#0 @// step3 = -pointStep
|
||||
|
||||
@// grp = 0 a special case since all the twiddle factors are 1
|
||||
@// Loop on the sets : 2 sets at a time
|
||||
|
||||
grpZeroSetLoop\name :
|
||||
|
||||
|
||||
|
||||
@// Decrement setcount
|
||||
SUBS setCount,setCount,#2 @// decrement the set loop counter
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
|
||||
@// finish first stage of 4 point FFT
|
||||
|
||||
VHSUB qY2,qX0,qX2
|
||||
|
||||
VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
|
||||
VHADD qY1,qX1,qX3
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],step3 @// data[2]
|
||||
VHSUB qY3,qX1,qX3
|
||||
|
||||
|
||||
@// finish second stage of 4 point FFT
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
|
||||
VHADD qZ0,qY0,qY1
|
||||
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
|
||||
VHSUB dZr3,dYr2,dYi3
|
||||
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VHADD dZi3,dYi2,dYr3
|
||||
|
||||
VHSUB qZ1,qY0,qY1
|
||||
VST2 {dZr3,dZi3},[pDst :128],outPointStep
|
||||
|
||||
VHADD dZr2,dYr2,dYi3
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
VHSUB dZi2,dYi2,dYr3
|
||||
|
||||
VHADD qY0,qX0,qX2 @// u0 for next iteration
|
||||
VST2 {dZr2,dZi2},[pDst :128],setStep
|
||||
|
||||
|
||||
.else
|
||||
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
|
||||
VHADD qZ0,qY0,qY1
|
||||
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
|
||||
VHADD dZr2,dYr2,dYi3
|
||||
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VHSUB dZi2,dYi2,dYr3
|
||||
|
||||
VHSUB qZ1,qY0,qY1
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
|
||||
VHSUB dZr3,dYr2,dYi3
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
VHADD dZi3,dYi2,dYr3
|
||||
|
||||
VHADD qY0,qX0,qX2 @// u0 for next iteration
|
||||
VST2 {dZr3,dZi3},[pDst :128],setStep
|
||||
|
||||
.endif
|
||||
|
||||
|
||||
|
||||
.else
|
||||
|
||||
@// finish first stage of 4 point FFT
|
||||
|
||||
|
||||
VSUB qY2,qX0,qX2
|
||||
|
||||
VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
|
||||
VADD qY1,qX1,qX3
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],step3 @// data[2]
|
||||
VSUB qY3,qX1,qX3
|
||||
|
||||
|
||||
@// finish second stage of 4 point FFT
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
|
||||
VADD qZ0,qY0,qY1
|
||||
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
|
||||
VSUB dZr3,dYr2,dYi3
|
||||
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VADD dZi3,dYi2,dYr3
|
||||
|
||||
VSUB qZ1,qY0,qY1
|
||||
VST2 {dZr3,dZi3},[pDst :128],outPointStep
|
||||
|
||||
VADD dZr2,dYr2,dYi3
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
VSUB dZi2,dYi2,dYr3
|
||||
|
||||
VADD qY0,qX0,qX2 @// u0 for next iteration
|
||||
VST2 {dZr2,dZi2},[pDst :128],setStep
|
||||
|
||||
|
||||
.else
|
||||
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
|
||||
VADD qZ0,qY0,qY1
|
||||
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
|
||||
VADD dZr2,dYr2,dYi3
|
||||
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VSUB dZi2,dYi2,dYr3
|
||||
|
||||
VSUB qZ1,qY0,qY1
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
|
||||
VSUB dZr3,dYr2,dYi3
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
VADD dZi3,dYi2,dYr3
|
||||
|
||||
VADD qY0,qX0,qX2 @// u0 for next iteration
|
||||
VST2 {dZr3,dZi3},[pDst :128],setStep
|
||||
|
||||
.endif
|
||||
|
||||
.endif
|
||||
|
||||
BGT grpZeroSetLoop\name
|
||||
|
||||
@// reset pSrc to pDst for the next stage
|
||||
SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
|
||||
MOV pDst,pPingPongBuf
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",fwd
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",inv
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","FALSE",fwdsfs
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","TRUE",invsfs
|
||||
M_END
|
||||
|
||||
.end
|
@ -0,0 +1,404 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// File Name: armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 7767
|
||||
@// Last Modified Date: Thu, 27 Sep 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute a Radix 4 FFT stage for a N point complex signal
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
@//IMPORT armAAC_constTable
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define outPointStep r3
|
||||
#define grpCount r4
|
||||
#define dstStep r5
|
||||
#define grpTwStep r8
|
||||
#define stepTwiddle r9
|
||||
#define twStep r10
|
||||
#define pTmp r4
|
||||
#define step16 r11
|
||||
#define step24 r12
|
||||
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dButterfly1Real02 D0.S32
|
||||
#define dButterfly1Imag02 D1.S32
|
||||
#define dButterfly1Real13 D2.S32
|
||||
#define dButterfly1Imag13 D3.S32
|
||||
#define dButterfly2Real02 D4.S32
|
||||
#define dButterfly2Imag02 D5.S32
|
||||
#define dButterfly2Real13 D6.S32
|
||||
#define dButterfly2Imag13 D7.S32
|
||||
#define dXr0 D0.S32
|
||||
#define dXi0 D1.S32
|
||||
#define dXr1 D2.S32
|
||||
#define dXi1 D3.S32
|
||||
#define dXr2 D4.S32
|
||||
#define dXi2 D5.S32
|
||||
#define dXr3 D6.S32
|
||||
#define dXi3 D7.S32
|
||||
|
||||
#define dYr0 D16.S32
|
||||
#define dYi0 D17.S32
|
||||
#define dYr1 D18.S32
|
||||
#define dYi1 D19.S32
|
||||
#define dYr2 D20.S32
|
||||
#define dYi2 D21.S32
|
||||
#define dYr3 D22.S32
|
||||
#define dYi3 D23.S32
|
||||
|
||||
#define dW1r D8.S32
|
||||
#define dW1i D9.S32
|
||||
#define dW2r D10.S32
|
||||
#define dW2i D11.S32
|
||||
#define dW3r D12.S32
|
||||
#define dW3i D13.S32
|
||||
#define qT0 Q7.S64
|
||||
#define qT1 Q8.S64
|
||||
#define qT2 Q9.S64
|
||||
#define qT3 Q10.S64
|
||||
#define qT4 Q11.S64
|
||||
#define qT5 Q12.S64
|
||||
|
||||
#define dZr0 D14.S32
|
||||
#define dZi0 D15.S32
|
||||
#define dZr1 D26.S32
|
||||
#define dZi1 D27.S32
|
||||
#define dZr2 D28.S32
|
||||
#define dZi2 D29.S32
|
||||
#define dZr3 D30.S32
|
||||
#define dZi3 D31.S32
|
||||
|
||||
#define qX0 Q0.S32
|
||||
#define qY0 Q8.S32
|
||||
#define qY1 Q9.S32
|
||||
#define qY2 Q10.S32
|
||||
#define qY3 Q11.S32
|
||||
#define qZ0 Q7.S32
|
||||
#define qZ1 Q13.S32
|
||||
#define qZ2 Q14.S32
|
||||
#define qZ3 Q15.S32
|
||||
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse , name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
|
||||
@// pOut0+1 increments pOut0 by 8 bytes
|
||||
@// pOut0+outPointStep == increment of 8*outPointStep bytes
|
||||
MOV outPointStep,subFFTSize,LSL #3
|
||||
|
||||
@// Update grpCount and grpSize rightaway
|
||||
|
||||
VLD2 {dW1r,dW1i},[pTwiddle :128] @// [wi|wr]
|
||||
MOV step16,#16
|
||||
LSL grpCount,subFFTSize,#2
|
||||
|
||||
VLD1 dW2r,[pTwiddle :64] @// [wi|wr]
|
||||
MOV subFFTNum,#1 @//after the last stage
|
||||
|
||||
VLD1 dW3r,[pTwiddle :64],step16 @// [wi|wr]
|
||||
MOV stepTwiddle,#0
|
||||
|
||||
VLD1 dW2i,[pTwiddle :64]! @// [wi|wr]
|
||||
SUB grpTwStep,stepTwiddle,#8 @// grpTwStep = -8 to start with
|
||||
|
||||
@// update subFFTSize for the next stage
|
||||
MOV subFFTSize,grpCount
|
||||
VLD1 dW3i,[pTwiddle :64],grpTwStep @// [wi|wr]
|
||||
MOV dstStep,outPointStep,LSL #1
|
||||
|
||||
VLD4 {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
|
||||
ADD dstStep,dstStep,outPointStep @// dstStep = 3*outPointStep
|
||||
RSB dstStep,dstStep,#16 @// dstStep = - 3*outPointStep+16
|
||||
MOV step24,#24
|
||||
|
||||
VLD4 {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
|
||||
|
||||
|
||||
@// Process two groups at a time
|
||||
|
||||
grpLoop\name :
|
||||
|
||||
VZIP dW2r,dW2i
|
||||
ADD stepTwiddle,stepTwiddle,#16 @// increment for the next iteration
|
||||
VZIP dW3r,dW3i
|
||||
ADD grpTwStep,stepTwiddle,#4
|
||||
VUZP dButterfly1Real13, dButterfly2Real13 @// B.r D.r
|
||||
SUB twStep,stepTwiddle,#16 @// -16+stepTwiddle
|
||||
VUZP dButterfly1Imag13, dButterfly2Imag13 @// B.i D.i
|
||||
MOV grpTwStep,grpTwStep,LSL #1
|
||||
VUZP dButterfly1Real02, dButterfly2Real02 @// A.r C.r
|
||||
RSB grpTwStep,grpTwStep,#0 @// -8-2*stepTwiddle
|
||||
|
||||
|
||||
VUZP dButterfly1Imag02, dButterfly2Imag02 @// A.i C.i
|
||||
|
||||
|
||||
SUBS grpCount,grpCount,#8 @// grpCount is multiplied by 4
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMULL qT0,dW1r,dXr1
|
||||
VMLAL qT0,dW1i,dXi1 @// real part
|
||||
VMULL qT1,dW1r,dXi1
|
||||
VMLSL qT1,dW1i,dXr1 @// imag part
|
||||
|
||||
.else
|
||||
|
||||
VMULL qT0,dW1r,dXr1
|
||||
VMLSL qT0,dW1i,dXi1 @// real part
|
||||
VMULL qT1,dW1r,dXi1
|
||||
VMLAL qT1,dW1i,dXr1 @// imag part
|
||||
|
||||
.endif
|
||||
|
||||
VLD2 {dW1r,dW1i},[pTwiddle :128],stepTwiddle @// [wi|wr]
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMULL qT2,dW2r,dXr2
|
||||
VMLAL qT2,dW2i,dXi2 @// real part
|
||||
VMULL qT3,dW2r,dXi2
|
||||
VLD1 dW2r,[pTwiddle :64],step16 @// [wi|wr]
|
||||
VMLSL qT3,dW2i,dXr2 @// imag part
|
||||
|
||||
.else
|
||||
|
||||
VMULL qT2,dW2r,dXr2
|
||||
VMLSL qT2,dW2i,dXi2 @// real part
|
||||
VMULL qT3,dW2r,dXi2
|
||||
VLD1 dW2r,[pTwiddle :64],step16 @// [wi|wr]
|
||||
VMLAL qT3,dW2i,dXr2 @// imag part
|
||||
|
||||
.endif
|
||||
|
||||
|
||||
VRSHRN dZr1,qT0,#31
|
||||
VLD1 dW2i,[pTwiddle :64],twStep @// [wi|wr]
|
||||
VRSHRN dZi1,qT1,#31
|
||||
|
||||
VMOV qZ0,qX0 @// move qX0 so as to load for the next iteration
|
||||
VLD4 {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
|
||||
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMULL qT4,dW3r,dXr3
|
||||
VMLAL qT4,dW3i,dXi3 @// real part
|
||||
VMULL qT5,dW3r,dXi3
|
||||
VLD1 dW3r,[pTwiddle :64],step24
|
||||
VMLSL qT5,dW3i,dXr3 @// imag part
|
||||
|
||||
.else
|
||||
|
||||
VMULL qT4,dW3r,dXr3
|
||||
VMLSL qT4,dW3i,dXi3 @// real part
|
||||
VMULL qT5,dW3r,dXi3
|
||||
VLD1 dW3r,[pTwiddle :64],step24
|
||||
VMLAL qT5,dW3i,dXr3 @// imag part
|
||||
|
||||
.endif
|
||||
|
||||
VRSHRN dZr2,qT2,#31
|
||||
VLD1 dW3i,[pTwiddle :64],grpTwStep @// [wi|wr]
|
||||
VRSHRN dZi2,qT3,#31
|
||||
|
||||
VRSHRN dZr3,qT4,#31
|
||||
VRSHRN dZi3,qT5,#31
|
||||
VLD4 {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
|
||||
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
|
||||
@// finish first stage of 4 point FFT
|
||||
|
||||
VHADD qY0,qZ0,qZ2
|
||||
VHSUB qY2,qZ0,qZ2
|
||||
VHADD qY1,qZ1,qZ3
|
||||
VHSUB qY3,qZ1,qZ3
|
||||
|
||||
|
||||
@// finish second stage of 4 point FFT
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VHSUB qZ0,qY2,qY1
|
||||
|
||||
VHADD dZr3,dYr0,dYi3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VHSUB dZi3,dYi0,dYr3
|
||||
|
||||
VHADD qZ2,qY2,qY1
|
||||
VST2 {dZr3,dZi3},[pDst :128],outPointStep
|
||||
|
||||
VHSUB dZr1,dYr0,dYi3
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VHADD dZi1,dYi0,dYr3
|
||||
|
||||
VST2 {dZr1,dZi1},[pDst :128],dstStep @// dstStep = -outPointStep + 16
|
||||
|
||||
|
||||
.else
|
||||
|
||||
VHSUB qZ0,qY2,qY1
|
||||
|
||||
VHSUB dZr1,dYr0,dYi3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VHADD dZi1,dYi0,dYr3
|
||||
|
||||
VHADD qZ2,qY2,qY1
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
|
||||
VHADD dZr3,dYr0,dYi3
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VHSUB dZi3,dYi0,dYr3
|
||||
|
||||
VST2 {dZr3,dZi3},[pDst :128],dstStep @// dstStep = -outPointStep + 16
|
||||
|
||||
|
||||
.endif
|
||||
|
||||
|
||||
|
||||
.else
|
||||
|
||||
@// finish first stage of 4 point FFT
|
||||
|
||||
VADD qY0,qZ0,qZ2
|
||||
VSUB qY2,qZ0,qZ2
|
||||
VADD qY1,qZ1,qZ3
|
||||
VSUB qY3,qZ1,qZ3
|
||||
|
||||
|
||||
@// finish second stage of 4 point FFT
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VSUB qZ0,qY2,qY1
|
||||
|
||||
VADD dZr3,dYr0,dYi3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VSUB dZi3,dYi0,dYr3
|
||||
|
||||
VADD qZ2,qY2,qY1
|
||||
VST2 {dZr3,dZi3},[pDst :128],outPointStep
|
||||
|
||||
VSUB dZr1,dYr0,dYi3
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VADD dZi1,dYi0,dYr3
|
||||
|
||||
VST2 {dZr1,dZi1},[pDst :128],dstStep @// dstStep = -outPointStep + 16
|
||||
|
||||
|
||||
.else
|
||||
|
||||
VSUB qZ0,qY2,qY1
|
||||
|
||||
VSUB dZr1,dYr0,dYi3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VADD dZi1,dYi0,dYr3
|
||||
|
||||
VADD qZ2,qY2,qY1
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
|
||||
VADD dZr3,dYr0,dYi3
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VSUB dZi3,dYi0,dYr3
|
||||
|
||||
VST2 {dZr3,dZi3},[pDst :128],dstStep @// dstStep = -outPointStep + 16
|
||||
|
||||
|
||||
.endif
|
||||
|
||||
.endif
|
||||
|
||||
BGT grpLoop\name
|
||||
|
||||
|
||||
@// Reset and Swap pSrc and pDst for the next stage
|
||||
MOV pTmp,pDst
|
||||
SUB pSrc,pSrc,#64 @// Extra increment done in final iteration of the loop
|
||||
SUB pDst,pSrc,outPointStep,LSL #2 @// pDst -= 4*size; pSrc -= 8*size bytes
|
||||
SUB pSrc,pTmp,outPointStep
|
||||
SUB pTwiddle,pTwiddle,subFFTSize,LSL #1
|
||||
SUB pTwiddle,pTwiddle,#16 @// Extra increment done in final iteration of the loop
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",fwd
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",inv
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","FALSE",fwdsfs
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","TRUE",invsfs
|
||||
M_END
|
||||
|
||||
|
||||
.end
|
395
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_unsafe_s.S
Normal file
395
media/openmax_dl/dl/sp/src/armSP_FFT_CToC_SC32_Radix4_unsafe_s.S
Normal file
@ -0,0 +1,395 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// File Name: armSP_FFT_CToC_SC32_Radix4_unsafe_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 7767
|
||||
@// Last Modified Date: Thu, 27 Sep 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute a Radix 4 FFT stage for a N point complex signal
|
||||
@//
|
||||
|
||||
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define grpCount r3
|
||||
#define pointStep r4
|
||||
#define outPointStep r5
|
||||
#define stepTwiddle r12
|
||||
#define setCount r14
|
||||
#define srcStep r8
|
||||
#define setStep r9
|
||||
#define dstStep r10
|
||||
#define twStep r11
|
||||
#define t1 r3
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dW1 D0.S32
|
||||
#define dW2 D1.S32
|
||||
#define dW3 D2.S32
|
||||
|
||||
#define dXr0 D4.S32
|
||||
#define dXi0 D5.S32
|
||||
#define dXr1 D6.S32
|
||||
#define dXi1 D7.S32
|
||||
#define dXr2 D8.S32
|
||||
#define dXi2 D9.S32
|
||||
#define dXr3 D10.S32
|
||||
#define dXi3 D11.S32
|
||||
#define dYr0 D12.S32
|
||||
#define dYi0 D13.S32
|
||||
#define dYr1 D14.S32
|
||||
#define dYi1 D15.S32
|
||||
#define dYr2 D16.S32
|
||||
#define dYi2 D17.S32
|
||||
#define dYr3 D18.S32
|
||||
#define dYi3 D19.S32
|
||||
#define qT0 Q8.S64
|
||||
#define qT1 Q9.S64
|
||||
#define qT2 Q6.S64
|
||||
#define qT3 Q7.S64
|
||||
|
||||
#define dZr0 D20.S32
|
||||
#define dZi0 D21.S32
|
||||
#define dZr1 D22.S32
|
||||
#define dZi1 D23.S32
|
||||
#define dZr2 D24.S32
|
||||
#define dZi2 D25.S32
|
||||
#define dZr3 D26.S32
|
||||
#define dZi3 D27.S32
|
||||
|
||||
#define qY0 Q6.S32
|
||||
#define qY1 Q7.S32
|
||||
#define qY2 Q8.S32
|
||||
#define qY3 Q9.S32
|
||||
#define qX0 Q2.S32
|
||||
#define qZ0 Q10.S32
|
||||
#define qZ1 Q11.S32
|
||||
#define qZ2 Q12.S32
|
||||
#define qZ3 Q13.S32
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse , name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
|
||||
@// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
|
||||
|
||||
LSL grpCount,subFFTSize,#2
|
||||
LSR subFFTNum,subFFTNum,#2
|
||||
MOV subFFTSize,grpCount
|
||||
|
||||
VLD1 dW1,[pTwiddle] @//[wi | wr]
|
||||
@// pT0+1 increments pT0 by 8 bytes
|
||||
@// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
|
||||
MOV pointStep,subFFTNum,LSL #1
|
||||
|
||||
|
||||
@// pOut0+1 increments pOut0 by 8 bytes
|
||||
@// pOut0+outPointStep == increment of 8*outPointStep bytes = 2*size bytes
|
||||
|
||||
MOV stepTwiddle,#0
|
||||
VLD1 dW2,[pTwiddle] @//[wi | wr]
|
||||
SMULBB outPointStep,grpCount,pointStep
|
||||
LSL pointStep,pointStep,#2 @// 2*grpSize
|
||||
|
||||
VLD1 dW3,[pTwiddle] @//[wi | wr]
|
||||
MOV srcStep,pointStep,LSL #1 @// srcStep = 2*pointStep
|
||||
ADD setStep,srcStep,pointStep @// setStep = 3*pointStep
|
||||
@//RSB setStep,setStep,#16 @// setStep = - 3*pointStep+16
|
||||
RSB setStep,setStep,#0 @// setStep = - 3*pointStep
|
||||
SUB srcStep,srcStep,#16 @// srcStep = 2*pointStep-16
|
||||
|
||||
MOV dstStep,outPointStep,LSL #1
|
||||
ADD dstStep,dstStep,outPointStep @// dstStep = 3*outPointStep
|
||||
RSB dstStep,dstStep,#16 @// dstStep = - 3*outPointStep+16
|
||||
|
||||
|
||||
|
||||
grpLoop\name :
|
||||
|
||||
VLD2 {dXr0,dXi0},[pSrc],pointStep @// data[0]
|
||||
ADD stepTwiddle,stepTwiddle,pointStep
|
||||
VLD2 {dXr1,dXi1},[pSrc],pointStep @// data[1]
|
||||
ADD pTwiddle,pTwiddle,stepTwiddle @// set pTwiddle to the first point
|
||||
VLD2 {dXr2,dXi2},[pSrc],pointStep @// data[2]
|
||||
MOV twStep,stepTwiddle,LSL #2
|
||||
|
||||
VLD2 {dXr3,dXi3},[pSrc],setStep @// data[3] & update pSrc for the next set
|
||||
SUB twStep,stepTwiddle,twStep @// twStep = -3*stepTwiddle
|
||||
|
||||
MOV setCount,pointStep,LSR #3
|
||||
ADD pSrc,pSrc,#16 @// set pSrc to data[0] of the next set
|
||||
ADD pSrc,pSrc,pointStep @// increment to data[1] of the next set
|
||||
|
||||
|
||||
@// Loop on the sets
|
||||
|
||||
setLoop\name :
|
||||
|
||||
|
||||
|
||||
SUBS setCount,setCount,#2 @// decrement the loop counter
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMULL qT0,dXr1,dW1[0]
|
||||
VMLAL qT0,dXi1,dW1[1] @// real part
|
||||
VMULL qT1,dXi1,dW1[0]
|
||||
VMLSL qT1,dXr1,dW1[1] @// imag part
|
||||
|
||||
.else
|
||||
VMULL qT0,dXr1,dW1[0]
|
||||
VMLSL qT0,dXi1,dW1[1] @// real part
|
||||
VMULL qT1,dXi1,dW1[0]
|
||||
VMLAL qT1,dXr1,dW1[1] @// imag part
|
||||
|
||||
.endif
|
||||
|
||||
VLD2 {dXr1,dXi1},[pSrc],pointStep @// data[1] for next iteration
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMULL qT2,dXr2,dW2[0]
|
||||
VMLAL qT2,dXi2,dW2[1] @// real part
|
||||
VMULL qT3,dXi2,dW2[0]
|
||||
VMLSL qT3,dXr2,dW2[1] @// imag part
|
||||
|
||||
.else
|
||||
VMULL qT2,dXr2,dW2[0]
|
||||
VMLSL qT2,dXi2,dW2[1] @// real part
|
||||
VMULL qT3,dXi2,dW2[0]
|
||||
VMLAL qT3,dXr2,dW2[1] @// imag part
|
||||
|
||||
.endif
|
||||
|
||||
VRSHRN dZr1,qT0,#31
|
||||
VRSHRN dZi1,qT1,#31
|
||||
VLD2 {dXr2,dXi2},[pSrc],pointStep @// data[2] for next iteration
|
||||
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
VMULL qT0,dXr3,dW3[0]
|
||||
VMLAL qT0,dXi3,dW3[1] @// real part
|
||||
VMULL qT1,dXi3,dW3[0]
|
||||
VMLSL qT1,dXr3,dW3[1] @// imag part
|
||||
|
||||
.else
|
||||
VMULL qT0,dXr3,dW3[0]
|
||||
VMLSL qT0,dXi3,dW3[1] @// real part
|
||||
VMULL qT1,dXi3,dW3[0]
|
||||
VMLAL qT1,dXr3,dW3[1] @// imag part
|
||||
|
||||
.endif
|
||||
|
||||
VRSHRN dZr2,qT2,#31
|
||||
VRSHRN dZi2,qT3,#31
|
||||
|
||||
|
||||
VRSHRN dZr3,qT0,#31
|
||||
VRSHRN dZi3,qT1,#31
|
||||
VLD2 {dXr3,dXi3},[pSrc],setStep @// data[3] & update pSrc to data[0]
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
|
||||
@// finish first stage of 4 point FFT
|
||||
VHADD qY0,qX0,qZ2
|
||||
VHSUB qY2,qX0,qZ2
|
||||
|
||||
VLD2 {dXr0,dXi0},[pSrc]! @// data[0] for next iteration
|
||||
VHADD qY1,qZ1,qZ3
|
||||
VHSUB qY3,qZ1,qZ3
|
||||
|
||||
@// finish second stage of 4 point FFT
|
||||
|
||||
VHSUB qZ0,qY2,qY1
|
||||
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VHADD dZr3,dYr0,dYi3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VHSUB dZi3,dYi0,dYr3
|
||||
|
||||
VHADD qZ2,qY2,qY1
|
||||
VST2 {dZr3,dZi3},[pDst :128],outPointStep
|
||||
|
||||
VHSUB dZr1,dYr0,dYi3
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VHADD dZi1,dYi0,dYr3
|
||||
|
||||
VST2 {dZr1,dZi1},[pDst :128],dstStep
|
||||
|
||||
|
||||
.else
|
||||
|
||||
VHSUB dZr1,dYr0,dYi3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VHADD dZi1,dYi0,dYr3
|
||||
|
||||
VHADD qZ2,qY2,qY1
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
|
||||
VHADD dZr3,dYr0,dYi3
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VHSUB dZi3,dYi0,dYr3
|
||||
|
||||
VST2 {dZr3,dZi3},[pDst :128],dstStep
|
||||
|
||||
|
||||
.endif
|
||||
|
||||
|
||||
.else
|
||||
|
||||
@// finish first stage of 4 point FFT
|
||||
VADD qY0,qX0,qZ2
|
||||
VSUB qY2,qX0,qZ2
|
||||
|
||||
VLD2 {dXr0,dXi0},[pSrc :128]! @// data[0] for next iteration
|
||||
VADD qY1,qZ1,qZ3
|
||||
VSUB qY3,qZ1,qZ3
|
||||
|
||||
@// finish second stage of 4 point FFT
|
||||
|
||||
VSUB qZ0,qY2,qY1
|
||||
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VADD dZr3,dYr0,dYi3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VSUB dZi3,dYi0,dYr3
|
||||
|
||||
VADD qZ2,qY2,qY1
|
||||
VST2 {dZr3,dZi3},[pDst :128],outPointStep
|
||||
|
||||
VSUB dZr1,dYr0,dYi3
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VADD dZi1,dYi0,dYr3
|
||||
|
||||
VST2 {dZr1,dZi1},[pDst :128],dstStep
|
||||
|
||||
|
||||
.else
|
||||
|
||||
VSUB dZr1,dYr0,dYi3
|
||||
VST2 {dZr0,dZi0},[pDst :128],outPointStep
|
||||
VADD dZi1,dYi0,dYr3
|
||||
|
||||
VADD qZ2,qY2,qY1
|
||||
VST2 {dZr1,dZi1},[pDst :128],outPointStep
|
||||
|
||||
VADD dZr3,dYr0,dYi3
|
||||
VST2 {dZr2,dZi2},[pDst :128],outPointStep
|
||||
VSUB dZi3,dYi0,dYr3
|
||||
|
||||
VST2 {dZr3,dZi3},[pDst :128],dstStep
|
||||
|
||||
|
||||
.endif
|
||||
|
||||
.endif
|
||||
|
||||
ADD pSrc,pSrc,pointStep @// increment to data[1] of the next set
|
||||
BGT setLoop\name
|
||||
|
||||
|
||||
VLD1 dW1,[pTwiddle :64],stepTwiddle @//[wi | wr]
|
||||
SUBS grpCount,grpCount,#4 @// subtract 4 since grpCount multiplied by 4
|
||||
VLD1 dW2,[pTwiddle :64],stepTwiddle @//[wi | wr]
|
||||
ADD pSrc,pSrc,srcStep @// increment pSrc for the next grp
|
||||
VLD1 dW3,[pTwiddle :64],twStep @//[wi | wr]
|
||||
BGT grpLoop\name
|
||||
|
||||
|
||||
@// Reset and Swap pSrc and pDst for the next stage
|
||||
MOV t1,pDst
|
||||
SUB pDst,pSrc,outPointStep,LSL #2 @// pDst -= 2*size; pSrc -= 8*size bytes
|
||||
SUB pSrc,t1,outPointStep
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",FWD
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",INV
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","FALSE",FWDSFS
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","TRUE",INVSFS
|
||||
M_END
|
||||
|
||||
|
||||
.end
|
@ -0,0 +1,595 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// File Name: armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 7770
|
||||
@// Last Modified Date: Thu, 27 Sep 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute a first stage Radix 8 FFT stage for a N point complex signal
|
||||
@//
|
||||
|
||||
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r2
|
||||
#define pTwiddle r1
|
||||
#define subFFTNum r6
|
||||
#define subFFTSize r7
|
||||
@// dest buffer for the next stage (not pSrc for first stage)
|
||||
#define pPingPongBuf r5
|
||||
|
||||
|
||||
@//Output Registers
|
||||
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define grpSize r3
|
||||
@// Reuse grpSize as setCount
|
||||
#define setCount r3
|
||||
#define pointStep r4
|
||||
#define outPointStep r4
|
||||
#define setStep r8
|
||||
#define step1 r9
|
||||
#define step2 r10
|
||||
#define t0 r11
|
||||
|
||||
|
||||
@// Neon Registers
|
||||
|
||||
#define dXr0 D0.S32
|
||||
#define dXi0 D1.S32
|
||||
#define dXr1 D2.S32
|
||||
#define dXi1 D3.S32
|
||||
#define dXr2 D4.S32
|
||||
#define dXi2 D5.S32
|
||||
#define dXr3 D6.S32
|
||||
#define dXi3 D7.S32
|
||||
#define dXr4 D8.S32
|
||||
#define dXi4 D9.S32
|
||||
#define dXr5 D10.S32
|
||||
#define dXi5 D11.S32
|
||||
#define dXr6 D12.S32
|
||||
#define dXi6 D13.S32
|
||||
#define dXr7 D14.S32
|
||||
#define dXi7 D15.S32
|
||||
#define qX0 Q0.S32
|
||||
#define qX1 Q1.S32
|
||||
#define qX2 Q2.S32
|
||||
#define qX3 Q3.S32
|
||||
#define qX4 Q4.S32
|
||||
#define qX5 Q5.S32
|
||||
#define qX6 Q6.S32
|
||||
#define qX7 Q7.S32
|
||||
|
||||
#define dUr0 D16.S32
|
||||
#define dUi0 D17.S32
|
||||
#define dUr2 D18.S32
|
||||
#define dUi2 D19.S32
|
||||
#define dUr4 D20.S32
|
||||
#define dUi4 D21.S32
|
||||
#define dUr6 D22.S32
|
||||
#define dUi6 D23.S32
|
||||
#define dUr1 D24.S32
|
||||
#define dUi1 D25.S32
|
||||
#define dUr3 D26.S32
|
||||
#define dUi3 D27.S32
|
||||
#define dUr5 D28.S32
|
||||
#define dUi5 D29.S32
|
||||
@// reuse dXr7 and dXi7
|
||||
#define dUr7 D30.S32
|
||||
#define dUi7 D31.S32
|
||||
#define qU0 Q8.S32
|
||||
#define qU1 Q12.S32
|
||||
#define qU2 Q9.S32
|
||||
#define qU3 Q13.S32
|
||||
#define qU4 Q10.S32
|
||||
#define qU5 Q14.S32
|
||||
#define qU6 Q11.S32
|
||||
#define qU7 Q15.S32
|
||||
|
||||
|
||||
|
||||
#define dVr0 D24.S32
|
||||
#define dVi0 D25.S32
|
||||
#define dVr2 D26.S32
|
||||
#define dVi2 D27.S32
|
||||
#define dVr4 D28.S32
|
||||
#define dVi4 D29.S32
|
||||
#define dVr6 D30.S32
|
||||
#define dVi6 D31.S32
|
||||
#define dVr1 D16.S32
|
||||
#define dVi1 D17.S32
|
||||
#define dVr3 D18.S32
|
||||
#define dVi3 D19.S32
|
||||
#define dVr5 D20.S32
|
||||
#define dVi5 D21.S32
|
||||
#define dVr7 D22.S32
|
||||
#define dVi7 D23.S32
|
||||
#define qV0 Q12.S32
|
||||
#define qV1 Q8.S32
|
||||
#define qV2 Q13.S32
|
||||
#define qV3 Q9.S32
|
||||
#define qV4 Q14.S32
|
||||
#define qV5 Q10.S32
|
||||
#define qV6 Q15.S32
|
||||
#define qV7 Q11.S32
|
||||
|
||||
|
||||
|
||||
#define dYr0 D16.S32
|
||||
#define dYi0 D17.S32
|
||||
#define dYr2 D18.S32
|
||||
#define dYi2 D19.S32
|
||||
#define dYr4 D20.S32
|
||||
#define dYi4 D21.S32
|
||||
#define dYr6 D22.S32
|
||||
#define dYi6 D23.S32
|
||||
#define dYr1 D24.S32
|
||||
#define dYi1 D25.S32
|
||||
#define dYr3 D26.S32
|
||||
#define dYi3 D27.S32
|
||||
#define dYr5 D28.S32
|
||||
#define dYi5 D29.S32
|
||||
#define dYr7 D30.S32
|
||||
#define dYi7 D31.S32
|
||||
#define qY0 Q8.S32
|
||||
#define qY1 Q12.S32
|
||||
#define qY2 Q9.S32
|
||||
#define qY3 Q13.S32
|
||||
#define qY4 Q10.S32
|
||||
#define qY5 Q14.S32
|
||||
#define qY6 Q11.S32
|
||||
#define qY7 Q15.S32
|
||||
|
||||
|
||||
#define dT0 D14.S32
|
||||
#define dT1 D15.S32
|
||||
|
||||
@// Define constants
|
||||
.set ONEBYSQRT2, 0x5A82799A @// Q31 format
|
||||
|
||||
|
||||
.MACRO FFTSTAGE scaled, inverse, name
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// Update pSubFFTSize and pSubFFTNum regs
|
||||
MOV subFFTSize,#8 @// subFFTSize = 1 for the first stage
|
||||
LDR t0,=ONEBYSQRT2 @// t0=(1/sqrt(2)) as Q31 value
|
||||
|
||||
@// Note: setCount = subFFTNum/8 (reuse the grpSize reg for setCount)
|
||||
LSR grpSize,subFFTNum,#3
|
||||
MOV subFFTNum,grpSize
|
||||
|
||||
|
||||
@// pT0+1 increments pT0 by 8 bytes
|
||||
@// pT0+pointStep = increment of 8*pointStep bytes = grpSize bytes
|
||||
@// Note: outPointStep = pointStep for firststage
|
||||
|
||||
MOV pointStep,grpSize,LSL #3
|
||||
|
||||
|
||||
@// Calculate the step of input data for the next set
|
||||
@//MOV step1,pointStep,LSL #1 @// step1 = 2*pointStep
|
||||
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
|
||||
MOV step1,grpSize,LSL #4
|
||||
|
||||
MOV step2,pointStep,LSL #3
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
|
||||
SUB step2,step2,pointStep @// step2 = 7*pointStep
|
||||
RSB setStep,step2,#16 @// setStep = - 7*pointStep+16
|
||||
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
|
||||
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
|
||||
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
|
||||
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
|
||||
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7] & update pSrc for the next set
|
||||
@// setStep = -7*pointStep + 16
|
||||
@// grp = 0 a special case since all the twiddle factors are 1
|
||||
@// Loop on the sets
|
||||
|
||||
grpZeroSetLoop\name :
|
||||
|
||||
@// Decrement setcount
|
||||
SUBS setCount,setCount,#2 @// decrement the set loop counter
|
||||
|
||||
|
||||
.ifeqs "\scaled", "TRUE"
|
||||
@// finish first stage of 8 point FFT
|
||||
|
||||
VHADD qU0,qX0,qX4
|
||||
VHADD qU2,qX1,qX5
|
||||
VHADD qU4,qX2,qX6
|
||||
VHADD qU6,qX3,qX7
|
||||
|
||||
@// finish second stage of 8 point FFT
|
||||
|
||||
VHADD qV0,qU0,qU4
|
||||
VHSUB qV2,qU0,qU4
|
||||
VHADD qV4,qU2,qU6
|
||||
VHSUB qV6,qU2,qU6
|
||||
|
||||
@// finish third stage of 8 point FFT
|
||||
|
||||
VHADD qY0,qV0,qV4
|
||||
VHSUB qY4,qV0,qV4
|
||||
VST2 {dYr0,dYi0},[pDst :128],step1 @// store y0
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VHSUB dYr2,dVr2,dVi6
|
||||
VHADD dYi2,dVi2,dVr6
|
||||
|
||||
VHADD dYr6,dVr2,dVi6
|
||||
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y2
|
||||
VHSUB dYi6,dVi2,dVr6
|
||||
|
||||
VHSUB qU1,qX0,qX4
|
||||
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
|
||||
|
||||
VHSUB qU3,qX1,qX5
|
||||
VHSUB qU5,qX2,qX6
|
||||
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y6
|
||||
|
||||
.ELSE
|
||||
|
||||
VHADD dYr6,dVr2,dVi6
|
||||
VHSUB dYi6,dVi2,dVr6
|
||||
|
||||
VHSUB dYr2,dVr2,dVi6
|
||||
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y2
|
||||
VHADD dYi2,dVi2,dVr6
|
||||
|
||||
|
||||
VHSUB qU1,qX0,qX4
|
||||
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
|
||||
VHSUB qU3,qX1,qX5
|
||||
VHSUB qU5,qX2,qX6
|
||||
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y6
|
||||
|
||||
|
||||
.ENDIF
|
||||
|
||||
@// finish first stage of 8 point FFT
|
||||
|
||||
VHSUB qU7,qX3,qX7
|
||||
VMOV dT0[0],t0
|
||||
|
||||
@// finish second stage of 8 point FFT
|
||||
|
||||
VHSUB dVr1,dUr1,dUi5
|
||||
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration
|
||||
VHADD dVi1,dUi1,dUr5
|
||||
VHADD dVr3,dUr1,dUi5
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
|
||||
VHSUB dVi3,dUi1,dUr5
|
||||
|
||||
VHSUB dVr5,dUr3,dUi7
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
|
||||
VHADD dVi5,dUi3,dUr7
|
||||
VHADD dVr7,dUr3,dUi7
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
|
||||
VHSUB dVi7,dUi3,dUr7
|
||||
|
||||
@// finish third stage of 8 point FFT
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
@// calculate a*v5
|
||||
VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
|
||||
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
|
||||
VQRDMULH dVi5,dVi5,dT0[0]
|
||||
|
||||
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
|
||||
VSUB dVr5,dT1,dVi5 @// a * V5
|
||||
VADD dVi5,dT1,dVi5
|
||||
|
||||
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
|
||||
|
||||
@// calculate b*v7
|
||||
VQRDMULH dT1,dVr7,dT0[0]
|
||||
VQRDMULH dVi7,dVi7,dT0[0]
|
||||
|
||||
VHADD qY1,qV1,qV5
|
||||
VHSUB qY5,qV1,qV5
|
||||
|
||||
|
||||
VADD dVr7,dT1,dVi7 @// b * V7
|
||||
VSUB dVi7,dVi7,dT1
|
||||
SUB pDst, pDst, step2 @// set pDst to y1
|
||||
|
||||
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
|
||||
|
||||
|
||||
VHSUB dYr3,dVr3,dVr7
|
||||
VHSUB dYi3,dVi3,dVi7
|
||||
VST2 {dYr1,dYi1},[pDst :128],step1 @// store y1
|
||||
VHADD dYr7,dVr3,dVr7
|
||||
VHADD dYi7,dVi3,dVi7
|
||||
|
||||
|
||||
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y3
|
||||
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y5
|
||||
VST2 {dYr7,dYi7},[pDst :128]! @// store y7
|
||||
|
||||
.ELSE
|
||||
|
||||
@// calculate b*v7
|
||||
VQRDMULH dT1,dVr7,dT0[0]
|
||||
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
|
||||
VQRDMULH dVi7,dVi7,dT0[0]
|
||||
|
||||
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
|
||||
VADD dVr7,dT1,dVi7 @// b * V7
|
||||
VSUB dVi7,dVi7,dT1
|
||||
|
||||
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
|
||||
|
||||
@// calculate a*v5
|
||||
VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
|
||||
VQRDMULH dVi5,dVi5,dT0[0]
|
||||
|
||||
VHADD dYr7,dVr3,dVr7
|
||||
VHADD dYi7,dVi3,dVi7
|
||||
SUB pDst, pDst, step2 @// set pDst to y1
|
||||
|
||||
VSUB dVr5,dT1,dVi5 @// a * V5
|
||||
VADD dVi5,dT1,dVi5
|
||||
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
|
||||
|
||||
VHSUB qY5,qV1,qV5
|
||||
|
||||
VHSUB dYr3,dVr3,dVr7
|
||||
VST2 {dYr7,dYi7},[pDst :128],step1 @// store y1
|
||||
VHSUB dYi3,dVi3,dVi7
|
||||
VHADD qY1,qV1,qV5
|
||||
|
||||
|
||||
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y3
|
||||
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y5
|
||||
VST2 {dYr1,dYi1},[pDst :128]! @// store y7
|
||||
|
||||
.ENDIF
|
||||
|
||||
|
||||
|
||||
.ELSE
|
||||
@// finish first stage of 8 point FFT
|
||||
|
||||
VADD qU0,qX0,qX4
|
||||
VADD qU2,qX1,qX5
|
||||
VADD qU4,qX2,qX6
|
||||
VADD qU6,qX3,qX7
|
||||
|
||||
@// finish second stage of 8 point FFT
|
||||
|
||||
VADD qV0,qU0,qU4
|
||||
VSUB qV2,qU0,qU4
|
||||
VADD qV4,qU2,qU6
|
||||
VSUB qV6,qU2,qU6
|
||||
|
||||
@// finish third stage of 8 point FFT
|
||||
|
||||
VADD qY0,qV0,qV4
|
||||
VSUB qY4,qV0,qV4
|
||||
VST2 {dYr0,dYi0},[pDst :128],step1 @// store y0
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
VSUB dYr2,dVr2,dVi6
|
||||
VADD dYi2,dVi2,dVr6
|
||||
|
||||
VADD dYr6,dVr2,dVi6
|
||||
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y2
|
||||
VSUB dYi6,dVi2,dVr6
|
||||
|
||||
VSUB qU1,qX0,qX4
|
||||
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
|
||||
|
||||
VSUB qU3,qX1,qX5
|
||||
VSUB qU5,qX2,qX6
|
||||
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y6
|
||||
|
||||
.ELSE
|
||||
|
||||
VADD dYr6,dVr2,dVi6
|
||||
VSUB dYi6,dVi2,dVr6
|
||||
|
||||
VSUB dYr2,dVr2,dVi6
|
||||
VST2 {dYr6,dYi6},[pDst :128],step1 @// store y2
|
||||
VADD dYi2,dVi2,dVr6
|
||||
|
||||
|
||||
VSUB qU1,qX0,qX4
|
||||
VST2 {dYr4,dYi4},[pDst :128],step1 @// store y4
|
||||
VSUB qU3,qX1,qX5
|
||||
VSUB qU5,qX2,qX6
|
||||
VST2 {dYr2,dYi2},[pDst :128],step1 @// store y6
|
||||
|
||||
|
||||
.ENDIF
|
||||
|
||||
@// finish first stage of 8 point FFT
|
||||
|
||||
VSUB qU7,qX3,qX7
|
||||
VMOV dT0[0],t0
|
||||
|
||||
@// finish second stage of 8 point FFT
|
||||
|
||||
VSUB dVr1,dUr1,dUi5
|
||||
VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] for next iteration
|
||||
VADD dVi1,dUi1,dUr5
|
||||
VADD dVr3,dUr1,dUi5
|
||||
VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
|
||||
VSUB dVi3,dUi1,dUr5
|
||||
|
||||
VSUB dVr5,dUr3,dUi7
|
||||
VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
|
||||
VADD dVi5,dUi3,dUr7
|
||||
VADD dVr7,dUr3,dUi7
|
||||
VLD2 {dXr3,dXi3},[pSrc :128],pointStep @// data[3]
|
||||
VSUB dVi7,dUi3,dUr7
|
||||
|
||||
@// finish third stage of 8 point FFT
|
||||
|
||||
.ifeqs "\inverse", "TRUE"
|
||||
|
||||
@// calculate a*v5
|
||||
VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
|
||||
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
|
||||
VQRDMULH dVi5,dVi5,dT0[0]
|
||||
|
||||
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
|
||||
VSUB dVr5,dT1,dVi5 @// a * V5
|
||||
VADD dVi5,dT1,dVi5
|
||||
|
||||
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
|
||||
|
||||
@// calculate b*v7
|
||||
VQRDMULH dT1,dVr7,dT0[0]
|
||||
VQRDMULH dVi7,dVi7,dT0[0]
|
||||
|
||||
VADD qY1,qV1,qV5
|
||||
VSUB qY5,qV1,qV5
|
||||
|
||||
|
||||
VADD dVr7,dT1,dVi7 @// b * V7
|
||||
VSUB dVi7,dVi7,dT1
|
||||
SUB pDst, pDst, step2 @// set pDst to y1
|
||||
|
||||
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
|
||||
|
||||
|
||||
VSUB dYr3,dVr3,dVr7
|
||||
VSUB dYi3,dVi3,dVi7
|
||||
VST2 {dYr1,dYi1},[pDst :128],step1 @// store y1
|
||||
VADD dYr7,dVr3,dVr7
|
||||
VADD dYi7,dVi3,dVi7
|
||||
|
||||
|
||||
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y3
|
||||
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y5
|
||||
VST2 {dYr7,dYi7},[pDst :128]! @// store y7
|
||||
|
||||
.ELSE
|
||||
|
||||
@// calculate b*v7
|
||||
VQRDMULH dT1,dVr7,dT0[0]
|
||||
VLD2 {dXr4,dXi4},[pSrc :128],pointStep @// data[4]
|
||||
VQRDMULH dVi7,dVi7,dT0[0]
|
||||
|
||||
VLD2 {dXr5,dXi5},[pSrc :128],pointStep @// data[5]
|
||||
VADD dVr7,dT1,dVi7 @// b * V7
|
||||
VSUB dVi7,dVi7,dT1
|
||||
|
||||
VLD2 {dXr6,dXi6},[pSrc :128],pointStep @// data[6]
|
||||
|
||||
@// calculate a*v5
|
||||
VQRDMULH dT1,dVr5,dT0[0] @// use dVi0 for dT1
|
||||
VQRDMULH dVi5,dVi5,dT0[0]
|
||||
|
||||
VADD dYr7,dVr3,dVr7
|
||||
VADD dYi7,dVi3,dVi7
|
||||
SUB pDst, pDst, step2 @// set pDst to y1
|
||||
|
||||
VSUB dVr5,dT1,dVi5 @// a * V5
|
||||
VADD dVi5,dT1,dVi5
|
||||
VLD2 {dXr7,dXi7},[pSrc :128],setStep @// data[7]
|
||||
|
||||
VSUB qY5,qV1,qV5
|
||||
|
||||
VSUB dYr3,dVr3,dVr7
|
||||
VST2 {dYr7,dYi7},[pDst :128],step1 @// store y1
|
||||
VSUB dYi3,dVi3,dVi7
|
||||
VADD qY1,qV1,qV5
|
||||
|
||||
|
||||
VST2 {dYr5,dYi5},[pDst :128],step1 @// store y3
|
||||
VST2 {dYr3,dYi3},[pDst :128],step1 @// store y5
|
||||
VST2 {dYr1,dYi1},[pDst :128]! @// store y7
|
||||
|
||||
.ENDIF
|
||||
|
||||
|
||||
.ENDIF
|
||||
|
||||
SUB pDst, pDst, step2 @// update pDst for the next set
|
||||
BGT grpZeroSetLoop\name
|
||||
|
||||
|
||||
@// reset pSrc to pDst for the next stage
|
||||
SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
|
||||
MOV pDst,pPingPongBuf
|
||||
|
||||
|
||||
|
||||
.endm
|
||||
|
||||
|
||||
@// Allocate stack memory required by the function
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC32_Radix8_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","FALSE",FWD
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "FALSE","TRUE",INV
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","FALSE",FWDSFS
|
||||
M_END
|
||||
|
||||
|
||||
M_START armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe,r4
|
||||
FFTSTAGE "TRUE","TRUE",INVSFS
|
||||
M_END
|
||||
|
||||
|
||||
.end
|
4643
media/openmax_dl/dl/sp/src/armSP_FFT_F32TwiddleTable.c
Normal file
4643
media/openmax_dl/dl/sp/src/armSP_FFT_F32TwiddleTable.c
Normal file
File diff suppressed because it is too large
Load Diff
556
media/openmax_dl/dl/sp/src/armSP_FFT_S32TwiddleTable.c
Normal file
556
media/openmax_dl/dl/sp/src/armSP_FFT_S32TwiddleTable.c
Normal file
@ -0,0 +1,556 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*
|
||||
* This file was originally licensed as follows. It has been
|
||||
* relicensed with permission from the copyright holders.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* File Name: armSP_FFT_S32TwiddleTable.c
|
||||
* OpenMAX DL: v1.0.2
|
||||
* Last Modified Revision: 6781
|
||||
* Last Modified Date: Wed, 25 Jul 2007
|
||||
*
|
||||
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
*
|
||||
*
|
||||
*
|
||||
* Description:
|
||||
* Twiddle table for Forward FFT in Q31 format.
|
||||
* It contains complex pairs [-cos (W * i), -sin (W * i)] where W = -2*PI/N
|
||||
* and 0<= i<= N/8. N is the max size of the FFT. Here N = 2^12.
|
||||
* Values for N/8 < i < N are generated in the FFTInit function using the
|
||||
* symmetries of cos and sine.
|
||||
*
|
||||
* NOTE: The values are stored negated. This is to represent '1' which cannot be otherwise
|
||||
* represented as Q31 in 32 bits.
|
||||
**/
|
||||
|
||||
#include "dl/api/omxtypes.h"
|
||||
|
||||
|
||||
const OMX_S32 armSP_FFT_S32TwiddleTable[1026] ={
|
||||
|
||||
0x80000000, 0x0,
|
||||
0x800009df, 0x3243f5,
|
||||
0x8000277a, 0x6487e3,
|
||||
0x800058d4, 0x96cbc1,
|
||||
0x80009dea, 0xc90f88,
|
||||
0x8000f6bd, 0xfb5330,
|
||||
0x8001634e, 0x12d96b1,
|
||||
0x8001e39b, 0x15fda03,
|
||||
0x800277a6, 0x1921d20,
|
||||
0x80031f6d, 0x1c45ffe,
|
||||
0x8003daf1, 0x1f6a297,
|
||||
0x8004aa32, 0x228e4e2,
|
||||
0x80058d2f, 0x25b26d7,
|
||||
0x800683e8, 0x28d6870,
|
||||
0x80078e5e, 0x2bfa9a4,
|
||||
0x8008ac90, 0x2f1ea6c,
|
||||
0x8009de7e, 0x3242abf,
|
||||
0x800b2427, 0x3566a96,
|
||||
0x800c7d8c, 0x388a9ea,
|
||||
0x800deaad, 0x3bae8b2,
|
||||
0x800f6b88, 0x3ed26e6,
|
||||
0x8011001f, 0x41f6480,
|
||||
0x8012a86f, 0x451a177,
|
||||
0x8014647b, 0x483ddc3,
|
||||
0x80163440, 0x4b6195d,
|
||||
0x801817bf, 0x4e8543e,
|
||||
0x801a0ef8, 0x51a8e5c,
|
||||
0x801c19ea, 0x54cc7b1,
|
||||
0x801e3895, 0x57f0035,
|
||||
0x80206af8, 0x5b137df,
|
||||
0x8022b114, 0x5e36ea9,
|
||||
0x80250ae7, 0x615a48b,
|
||||
0x80277872, 0x647d97c,
|
||||
0x8029f9b4, 0x67a0d76,
|
||||
0x802c8ead, 0x6ac406f,
|
||||
0x802f375d, 0x6de7262,
|
||||
0x8031f3c2, 0x710a345,
|
||||
0x8034c3dd, 0x742d311,
|
||||
0x8037a7ac, 0x77501be,
|
||||
0x803a9f31, 0x7a72f45,
|
||||
0x803daa6a, 0x7d95b9e,
|
||||
0x8040c956, 0x80b86c2,
|
||||
0x8043fbf6, 0x83db0a7,
|
||||
0x80474248, 0x86fd947,
|
||||
0x804a9c4d, 0x8a2009a,
|
||||
0x804e0a04, 0x8d42699,
|
||||
0x80518b6b, 0x9064b3a,
|
||||
0x80552084, 0x9386e78,
|
||||
0x8058c94c, 0x96a9049,
|
||||
0x805c85c4, 0x99cb0a7,
|
||||
0x806055eb, 0x9cecf89,
|
||||
0x806439c0, 0xa00ece8,
|
||||
0x80683143, 0xa3308bd,
|
||||
0x806c3c74, 0xa6522fe,
|
||||
0x80705b50, 0xa973ba5,
|
||||
0x80748dd9, 0xac952aa,
|
||||
0x8078d40d, 0xafb6805,
|
||||
0x807d2dec, 0xb2d7baf,
|
||||
0x80819b74, 0xb5f8d9f,
|
||||
0x80861ca6, 0xb919dcf,
|
||||
0x808ab180, 0xbc3ac35,
|
||||
0x808f5a02, 0xbf5b8cb,
|
||||
0x8094162c, 0xc27c389,
|
||||
0x8098e5fb, 0xc59cc68,
|
||||
0x809dc971, 0xc8bd35e,
|
||||
0x80a2c08b, 0xcbdd865,
|
||||
0x80a7cb49, 0xcefdb76,
|
||||
0x80ace9ab, 0xd21dc87,
|
||||
0x80b21baf, 0xd53db92,
|
||||
0x80b76156, 0xd85d88f,
|
||||
0x80bcba9d, 0xdb7d376,
|
||||
0x80c22784, 0xde9cc40,
|
||||
0x80c7a80a, 0xe1bc2e4,
|
||||
0x80cd3c2f, 0xe4db75b,
|
||||
0x80d2e3f2, 0xe7fa99e,
|
||||
0x80d89f51, 0xeb199a4,
|
||||
0x80de6e4c, 0xee38766,
|
||||
0x80e450e2, 0xf1572dc,
|
||||
0x80ea4712, 0xf475bff,
|
||||
0x80f050db, 0xf7942c7,
|
||||
0x80f66e3c, 0xfab272b,
|
||||
0x80fc9f35, 0xfdd0926,
|
||||
0x8102e3c4, 0x100ee8ad,
|
||||
0x81093be8, 0x1040c5bb,
|
||||
0x810fa7a0, 0x1072a048,
|
||||
0x811626ec, 0x10a4784b,
|
||||
0x811cb9ca, 0x10d64dbd,
|
||||
0x8123603a, 0x11082096,
|
||||
0x812a1a3a, 0x1139f0cf,
|
||||
0x8130e7c9, 0x116bbe60,
|
||||
0x8137c8e6, 0x119d8941,
|
||||
0x813ebd90, 0x11cf516a,
|
||||
0x8145c5c7, 0x120116d5,
|
||||
0x814ce188, 0x1232d979,
|
||||
0x815410d4, 0x1264994e,
|
||||
0x815b53a8, 0x1296564d,
|
||||
0x8162aa04, 0x12c8106f,
|
||||
0x816a13e6, 0x12f9c7aa,
|
||||
0x8171914e, 0x132b7bf9,
|
||||
0x8179223a, 0x135d2d53,
|
||||
0x8180c6a9, 0x138edbb1,
|
||||
0x81887e9a, 0x13c0870a,
|
||||
0x81904a0c, 0x13f22f58,
|
||||
0x819828fd, 0x1423d492,
|
||||
0x81a01b6d, 0x145576b1,
|
||||
0x81a82159, 0x148715ae,
|
||||
0x81b03ac2, 0x14b8b17f,
|
||||
0x81b867a5, 0x14ea4a1f,
|
||||
0x81c0a801, 0x151bdf86,
|
||||
0x81c8fbd6, 0x154d71aa,
|
||||
0x81d16321, 0x157f0086,
|
||||
0x81d9dde1, 0x15b08c12,
|
||||
0x81e26c16, 0x15e21445,
|
||||
0x81eb0dbe, 0x16139918,
|
||||
0x81f3c2d7, 0x16451a83,
|
||||
0x81fc8b60, 0x1676987f,
|
||||
0x82056758, 0x16a81305,
|
||||
0x820e56be, 0x16d98a0c,
|
||||
0x82175990, 0x170afd8d,
|
||||
0x82206fcc, 0x173c6d80,
|
||||
0x82299971, 0x176dd9de,
|
||||
0x8232d67f, 0x179f429f,
|
||||
0x823c26f3, 0x17d0a7bc,
|
||||
0x82458acc, 0x1802092c,
|
||||
0x824f0208, 0x183366e9,
|
||||
0x82588ca7, 0x1864c0ea,
|
||||
0x82622aa6, 0x18961728,
|
||||
0x826bdc04, 0x18c7699b,
|
||||
0x8275a0c0, 0x18f8b83c,
|
||||
0x827f78d8, 0x192a0304,
|
||||
0x8289644b, 0x195b49ea,
|
||||
0x82936317, 0x198c8ce7,
|
||||
0x829d753a, 0x19bdcbf3,
|
||||
0x82a79ab3, 0x19ef0707,
|
||||
0x82b1d381, 0x1a203e1b,
|
||||
0x82bc1fa2, 0x1a517128,
|
||||
0x82c67f14, 0x1a82a026,
|
||||
0x82d0f1d5, 0x1ab3cb0d,
|
||||
0x82db77e5, 0x1ae4f1d6,
|
||||
0x82e61141, 0x1b161479,
|
||||
0x82f0bde8, 0x1b4732ef,
|
||||
0x82fb7dd8, 0x1b784d30,
|
||||
0x83065110, 0x1ba96335,
|
||||
0x8311378d, 0x1bda74f6,
|
||||
0x831c314e, 0x1c0b826a,
|
||||
0x83273e52, 0x1c3c8b8c,
|
||||
0x83325e97, 0x1c6d9053,
|
||||
0x833d921b, 0x1c9e90b8,
|
||||
0x8348d8dc, 0x1ccf8cb3,
|
||||
0x835432d8, 0x1d00843d,
|
||||
0x835fa00f, 0x1d31774d,
|
||||
0x836b207d, 0x1d6265dd,
|
||||
0x8376b422, 0x1d934fe5,
|
||||
0x83825afb, 0x1dc4355e,
|
||||
0x838e1507, 0x1df5163f,
|
||||
0x8399e244, 0x1e25f282,
|
||||
0x83a5c2b0, 0x1e56ca1e,
|
||||
0x83b1b649, 0x1e879d0d,
|
||||
0x83bdbd0e, 0x1eb86b46,
|
||||
0x83c9d6fc, 0x1ee934c3,
|
||||
0x83d60412, 0x1f19f97b,
|
||||
0x83e2444d, 0x1f4ab968,
|
||||
0x83ee97ad, 0x1f7b7481,
|
||||
0x83fafe2e, 0x1fac2abf,
|
||||
0x840777d0, 0x1fdcdc1b,
|
||||
0x84140490, 0x200d888d,
|
||||
0x8420a46c, 0x203e300d,
|
||||
0x842d5762, 0x206ed295,
|
||||
0x843a1d70, 0x209f701c,
|
||||
0x8446f695, 0x20d0089c,
|
||||
0x8453e2cf, 0x21009c0c,
|
||||
0x8460e21a, 0x21312a65,
|
||||
0x846df477, 0x2161b3a0,
|
||||
0x847b19e1, 0x219237b5,
|
||||
0x84885258, 0x21c2b69c,
|
||||
0x84959dd9, 0x21f3304f,
|
||||
0x84a2fc62, 0x2223a4c5,
|
||||
0x84b06df2, 0x225413f8,
|
||||
0x84bdf286, 0x22847de0,
|
||||
0x84cb8a1b, 0x22b4e274,
|
||||
0x84d934b1, 0x22e541af,
|
||||
0x84e6f244, 0x23159b88,
|
||||
0x84f4c2d4, 0x2345eff8,
|
||||
0x8502a65c, 0x23763ef7,
|
||||
0x85109cdd, 0x23a6887f,
|
||||
0x851ea652, 0x23d6cc87,
|
||||
0x852cc2bb, 0x24070b08,
|
||||
0x853af214, 0x243743fa,
|
||||
0x8549345c, 0x24677758,
|
||||
0x85578991, 0x2497a517,
|
||||
0x8565f1b0, 0x24c7cd33,
|
||||
0x85746cb8, 0x24f7efa2,
|
||||
0x8582faa5, 0x25280c5e,
|
||||
0x85919b76, 0x2558235f,
|
||||
0x85a04f28, 0x2588349d,
|
||||
0x85af15b9, 0x25b84012,
|
||||
0x85bdef28, 0x25e845b6,
|
||||
0x85ccdb70, 0x26184581,
|
||||
0x85dbda91, 0x26483f6c,
|
||||
0x85eaec88, 0x26783370,
|
||||
0x85fa1153, 0x26a82186,
|
||||
0x860948ef, 0x26d809a5,
|
||||
0x86189359, 0x2707ebc7,
|
||||
0x8627f091, 0x2737c7e3,
|
||||
0x86376092, 0x27679df4,
|
||||
0x8646e35c, 0x27976df1,
|
||||
0x865678eb, 0x27c737d3,
|
||||
0x8666213c, 0x27f6fb92,
|
||||
0x8675dc4f, 0x2826b928,
|
||||
0x8685aa20, 0x2856708d,
|
||||
0x86958aac, 0x288621b9,
|
||||
0x86a57df2, 0x28b5cca5,
|
||||
0x86b583ee, 0x28e5714b,
|
||||
0x86c59c9f, 0x29150fa1,
|
||||
0x86d5c802, 0x2944a7a2,
|
||||
0x86e60614, 0x29743946,
|
||||
0x86f656d3, 0x29a3c485,
|
||||
0x8706ba3d, 0x29d34958,
|
||||
0x8717304e, 0x2a02c7b8,
|
||||
0x8727b905, 0x2a323f9e,
|
||||
0x8738545e, 0x2a61b101,
|
||||
0x87490258, 0x2a911bdc,
|
||||
0x8759c2ef, 0x2ac08026,
|
||||
0x876a9621, 0x2aefddd8,
|
||||
0x877b7bec, 0x2b1f34eb,
|
||||
0x878c744d, 0x2b4e8558,
|
||||
0x879d7f41, 0x2b7dcf17,
|
||||
0x87ae9cc5, 0x2bad1221,
|
||||
0x87bfccd7, 0x2bdc4e6f,
|
||||
0x87d10f75, 0x2c0b83fa,
|
||||
0x87e2649b, 0x2c3ab2b9,
|
||||
0x87f3cc48, 0x2c69daa6,
|
||||
0x88054677, 0x2c98fbba,
|
||||
0x8816d327, 0x2cc815ee,
|
||||
0x88287256, 0x2cf72939,
|
||||
0x883a23ff, 0x2d263596,
|
||||
0x884be821, 0x2d553afc,
|
||||
0x885dbeb8, 0x2d843964,
|
||||
0x886fa7c2, 0x2db330c7,
|
||||
0x8881a33d, 0x2de2211e,
|
||||
0x8893b125, 0x2e110a62,
|
||||
0x88a5d177, 0x2e3fec8b,
|
||||
0x88b80432, 0x2e6ec792,
|
||||
0x88ca4951, 0x2e9d9b70,
|
||||
0x88dca0d3, 0x2ecc681e,
|
||||
0x88ef0ab4, 0x2efb2d95,
|
||||
0x890186f2, 0x2f29ebcc,
|
||||
0x89141589, 0x2f58a2be,
|
||||
0x8926b677, 0x2f875262,
|
||||
0x893969b9, 0x2fb5fab2,
|
||||
0x894c2f4c, 0x2fe49ba7,
|
||||
0x895f072e, 0x30133539,
|
||||
0x8971f15a, 0x3041c761,
|
||||
0x8984edcf, 0x30705217,
|
||||
0x8997fc8a, 0x309ed556,
|
||||
0x89ab1d87, 0x30cd5115,
|
||||
0x89be50c3, 0x30fbc54d,
|
||||
0x89d1963c, 0x312a31f8,
|
||||
0x89e4edef, 0x3158970e,
|
||||
0x89f857d8, 0x3186f487,
|
||||
0x8a0bd3f5, 0x31b54a5e,
|
||||
0x8a1f6243, 0x31e39889,
|
||||
0x8a3302be, 0x3211df04,
|
||||
0x8a46b564, 0x32401dc6,
|
||||
0x8a5a7a31, 0x326e54c7,
|
||||
0x8a6e5123, 0x329c8402,
|
||||
0x8a823a36, 0x32caab6f,
|
||||
0x8a963567, 0x32f8cb07,
|
||||
0x8aaa42b4, 0x3326e2c3,
|
||||
0x8abe6219, 0x3354f29b,
|
||||
0x8ad29394, 0x3382fa88,
|
||||
0x8ae6d720, 0x33b0fa84,
|
||||
0x8afb2cbb, 0x33def287,
|
||||
0x8b0f9462, 0x340ce28b,
|
||||
0x8b240e11, 0x343aca87,
|
||||
0x8b3899c6, 0x3468aa76,
|
||||
0x8b4d377c, 0x34968250,
|
||||
0x8b61e733, 0x34c4520d,
|
||||
0x8b76a8e4, 0x34f219a8,
|
||||
0x8b8b7c8f, 0x351fd918,
|
||||
0x8ba0622f, 0x354d9057,
|
||||
0x8bb559c1, 0x357b3f5d,
|
||||
0x8bca6343, 0x35a8e625,
|
||||
0x8bdf7eb0, 0x35d684a6,
|
||||
0x8bf4ac05, 0x36041ad9,
|
||||
0x8c09eb40, 0x3631a8b8,
|
||||
0x8c1f3c5d, 0x365f2e3b,
|
||||
0x8c349f58, 0x368cab5c,
|
||||
0x8c4a142f, 0x36ba2014,
|
||||
0x8c5f9ade, 0x36e78c5b,
|
||||
0x8c753362, 0x3714f02a,
|
||||
0x8c8addb7, 0x37424b7b,
|
||||
0x8ca099da, 0x376f9e46,
|
||||
0x8cb667c8, 0x379ce885,
|
||||
0x8ccc477d, 0x37ca2a30,
|
||||
0x8ce238f6, 0x37f76341,
|
||||
0x8cf83c30, 0x382493b0,
|
||||
0x8d0e5127, 0x3851bb77,
|
||||
0x8d2477d8, 0x387eda8e,
|
||||
0x8d3ab03f, 0x38abf0ef,
|
||||
0x8d50fa59, 0x38d8fe93,
|
||||
0x8d675623, 0x39060373,
|
||||
0x8d7dc399, 0x3932ff87,
|
||||
0x8d9442b8, 0x395ff2c9,
|
||||
0x8daad37b, 0x398cdd32,
|
||||
0x8dc175e0, 0x39b9bebc,
|
||||
0x8dd829e4, 0x39e6975e,
|
||||
0x8deeef82, 0x3a136712,
|
||||
0x8e05c6b7, 0x3a402dd2,
|
||||
0x8e1caf80, 0x3a6ceb96,
|
||||
0x8e33a9da, 0x3a99a057,
|
||||
0x8e4ab5bf, 0x3ac64c0f,
|
||||
0x8e61d32e, 0x3af2eeb7,
|
||||
0x8e790222, 0x3b1f8848,
|
||||
0x8e904298, 0x3b4c18ba,
|
||||
0x8ea7948c, 0x3b78a007,
|
||||
0x8ebef7fb, 0x3ba51e29,
|
||||
0x8ed66ce1, 0x3bd19318,
|
||||
0x8eedf33b, 0x3bfdfecd,
|
||||
0x8f058b04, 0x3c2a6142,
|
||||
0x8f1d343a, 0x3c56ba70,
|
||||
0x8f34eed8, 0x3c830a50,
|
||||
0x8f4cbadb, 0x3caf50da,
|
||||
0x8f649840, 0x3cdb8e09,
|
||||
0x8f7c8701, 0x3d07c1d6,
|
||||
0x8f94871d, 0x3d33ec39,
|
||||
0x8fac988f, 0x3d600d2c,
|
||||
0x8fc4bb53, 0x3d8c24a8,
|
||||
0x8fdcef66, 0x3db832a6,
|
||||
0x8ff534c4, 0x3de4371f,
|
||||
0x900d8b69, 0x3e10320d,
|
||||
0x9025f352, 0x3e3c2369,
|
||||
0x903e6c7b, 0x3e680b2c,
|
||||
0x9056f6df, 0x3e93e950,
|
||||
0x906f927c, 0x3ebfbdcd,
|
||||
0x90883f4d, 0x3eeb889c,
|
||||
0x90a0fd4e, 0x3f1749b8,
|
||||
0x90b9cc7d, 0x3f430119,
|
||||
0x90d2acd4, 0x3f6eaeb8,
|
||||
0x90eb9e50, 0x3f9a5290,
|
||||
0x9104a0ee, 0x3fc5ec98,
|
||||
0x911db4a9, 0x3ff17cca,
|
||||
0x9136d97d, 0x401d0321,
|
||||
0x91500f67, 0x40487f94,
|
||||
0x91695663, 0x4073f21d,
|
||||
0x9182ae6d, 0x409f5ab6,
|
||||
0x919c1781, 0x40cab958,
|
||||
0x91b5919a, 0x40f60dfb,
|
||||
0x91cf1cb6, 0x4121589b,
|
||||
0x91e8b8d0, 0x414c992f,
|
||||
0x920265e4, 0x4177cfb1,
|
||||
0x921c23ef, 0x41a2fc1a,
|
||||
0x9235f2ec, 0x41ce1e65,
|
||||
0x924fd2d7, 0x41f93689,
|
||||
0x9269c3ac, 0x42244481,
|
||||
0x9283c568, 0x424f4845,
|
||||
0x929dd806, 0x427a41d0,
|
||||
0x92b7fb82, 0x42a5311b,
|
||||
0x92d22fd9, 0x42d0161e,
|
||||
0x92ec7505, 0x42faf0d4,
|
||||
0x9306cb04, 0x4325c135,
|
||||
0x932131d1, 0x4350873c,
|
||||
0x933ba968, 0x437b42e1,
|
||||
0x935631c5, 0x43a5f41e,
|
||||
0x9370cae4, 0x43d09aed,
|
||||
0x938b74c1, 0x43fb3746,
|
||||
0x93a62f57, 0x4425c923,
|
||||
0x93c0faa3, 0x4450507e,
|
||||
0x93dbd6a0, 0x447acd50,
|
||||
0x93f6c34a, 0x44a53f93,
|
||||
0x9411c09e, 0x44cfa740,
|
||||
0x942cce96, 0x44fa0450,
|
||||
0x9447ed2f, 0x452456bd,
|
||||
0x94631c65, 0x454e9e80,
|
||||
0x947e5c33, 0x4578db93,
|
||||
0x9499ac95, 0x45a30df0,
|
||||
0x94b50d87, 0x45cd358f,
|
||||
0x94d07f05, 0x45f7526b,
|
||||
0x94ec010b, 0x4621647d,
|
||||
0x95079394, 0x464b6bbe,
|
||||
0x9523369c, 0x46756828,
|
||||
0x953eea1e, 0x469f59b4,
|
||||
0x955aae17, 0x46c9405c,
|
||||
0x95768283, 0x46f31c1a,
|
||||
0x9592675c, 0x471cece7,
|
||||
0x95ae5c9f, 0x4746b2bc,
|
||||
0x95ca6247, 0x47706d93,
|
||||
0x95e67850, 0x479a1d67,
|
||||
0x96029eb6, 0x47c3c22f,
|
||||
0x961ed574, 0x47ed5be6,
|
||||
0x963b1c86, 0x4816ea86,
|
||||
0x965773e7, 0x48406e08,
|
||||
0x9673db94, 0x4869e665,
|
||||
0x96905388, 0x48935397,
|
||||
0x96acdbbe, 0x48bcb599,
|
||||
0x96c97432, 0x48e60c62,
|
||||
0x96e61ce0, 0x490f57ee,
|
||||
0x9702d5c3, 0x49389836,
|
||||
0x971f9ed7, 0x4961cd33,
|
||||
0x973c7817, 0x498af6df,
|
||||
0x9759617f, 0x49b41533,
|
||||
0x97765b0a, 0x49dd282a,
|
||||
0x979364b5, 0x4a062fbd,
|
||||
0x97b07e7a, 0x4a2f2be6,
|
||||
0x97cda855, 0x4a581c9e,
|
||||
0x97eae242, 0x4a8101de,
|
||||
0x98082c3b, 0x4aa9dba2,
|
||||
0x9825863d, 0x4ad2a9e2,
|
||||
0x9842f043, 0x4afb6c98,
|
||||
0x98606a49, 0x4b2423be,
|
||||
0x987df449, 0x4b4ccf4d,
|
||||
0x989b8e40, 0x4b756f40,
|
||||
0x98b93828, 0x4b9e0390,
|
||||
0x98d6f1fe, 0x4bc68c36,
|
||||
0x98f4bbbc, 0x4bef092d,
|
||||
0x9912955f, 0x4c177a6e,
|
||||
0x99307ee0, 0x4c3fdff4,
|
||||
0x994e783d, 0x4c6839b7,
|
||||
0x996c816f, 0x4c9087b1,
|
||||
0x998a9a74, 0x4cb8c9dd,
|
||||
0x99a8c345, 0x4ce10034,
|
||||
0x99c6fbde, 0x4d092ab0,
|
||||
0x99e5443b, 0x4d31494b,
|
||||
0x9a039c57, 0x4d595bfe,
|
||||
0x9a22042d, 0x4d8162c4,
|
||||
0x9a407bb9, 0x4da95d96,
|
||||
0x9a5f02f5, 0x4dd14c6e,
|
||||
0x9a7d99de, 0x4df92f46,
|
||||
0x9a9c406e, 0x4e210617,
|
||||
0x9abaf6a1, 0x4e48d0dd,
|
||||
0x9ad9bc71, 0x4e708f8f,
|
||||
0x9af891db, 0x4e984229,
|
||||
0x9b1776da, 0x4ebfe8a5,
|
||||
0x9b366b68, 0x4ee782fb,
|
||||
0x9b556f81, 0x4f0f1126,
|
||||
0x9b748320, 0x4f369320,
|
||||
0x9b93a641, 0x4f5e08e3,
|
||||
0x9bb2d8de, 0x4f857269,
|
||||
0x9bd21af3, 0x4faccfab,
|
||||
0x9bf16c7a, 0x4fd420a4,
|
||||
0x9c10cd70, 0x4ffb654d,
|
||||
0x9c303dcf, 0x50229da1,
|
||||
0x9c4fbd93, 0x5049c999,
|
||||
0x9c6f4cb6, 0x5070e92f,
|
||||
0x9c8eeb34, 0x5097fc5e,
|
||||
0x9cae9907, 0x50bf031f,
|
||||
0x9cce562c, 0x50e5fd6d,
|
||||
0x9cee229c, 0x510ceb40,
|
||||
0x9d0dfe54, 0x5133cc94,
|
||||
0x9d2de94d, 0x515aa162,
|
||||
0x9d4de385, 0x518169a5,
|
||||
0x9d6decf4, 0x51a82555,
|
||||
0x9d8e0597, 0x51ced46e,
|
||||
0x9dae2d68, 0x51f576ea,
|
||||
0x9dce6463, 0x521c0cc2,
|
||||
0x9deeaa82, 0x524295f0,
|
||||
0x9e0effc1, 0x5269126e,
|
||||
0x9e2f641b, 0x528f8238,
|
||||
0x9e4fd78a, 0x52b5e546,
|
||||
0x9e705a09, 0x52dc3b92,
|
||||
0x9e90eb94, 0x53028518,
|
||||
0x9eb18c26, 0x5328c1d0,
|
||||
0x9ed23bb9, 0x534ef1b5,
|
||||
0x9ef2fa49, 0x537514c2,
|
||||
0x9f13c7d0, 0x539b2af0,
|
||||
0x9f34a449, 0x53c13439,
|
||||
0x9f558fb0, 0x53e73097,
|
||||
0x9f7689ff, 0x540d2005,
|
||||
0x9f979331, 0x5433027d,
|
||||
0x9fb8ab41, 0x5458d7f9,
|
||||
0x9fd9d22a, 0x547ea073,
|
||||
0x9ffb07e7, 0x54a45be6,
|
||||
0xa01c4c73, 0x54ca0a4b,
|
||||
0xa03d9fc8, 0x54efab9c,
|
||||
0xa05f01e1, 0x55153fd4,
|
||||
0xa08072ba, 0x553ac6ee,
|
||||
0xa0a1f24d, 0x556040e2,
|
||||
0xa0c38095, 0x5585adad,
|
||||
0xa0e51d8c, 0x55ab0d46,
|
||||
0xa106c92f, 0x55d05faa,
|
||||
0xa1288376, 0x55f5a4d2,
|
||||
0xa14a4c5e, 0x561adcb9,
|
||||
0xa16c23e1, 0x56400758,
|
||||
0xa18e09fa, 0x566524aa,
|
||||
0xa1affea3, 0x568a34a9,
|
||||
0xa1d201d7, 0x56af3750,
|
||||
0xa1f41392, 0x56d42c99,
|
||||
0xa21633cd, 0x56f9147e,
|
||||
0xa2386284, 0x571deefa,
|
||||
0xa25a9fb1, 0x5742bc06,
|
||||
0xa27ceb4f, 0x57677b9d,
|
||||
0xa29f4559, 0x578c2dba,
|
||||
0xa2c1adc9, 0x57b0d256,
|
||||
0xa2e4249b, 0x57d5696d,
|
||||
0xa306a9c8, 0x57f9f2f8,
|
||||
0xa3293d4b, 0x581e6ef1,
|
||||
0xa34bdf20, 0x5842dd54,
|
||||
0xa36e8f41, 0x58673e1b,
|
||||
0xa3914da8, 0x588b9140,
|
||||
0xa3b41a50, 0x58afd6bd,
|
||||
0xa3d6f534, 0x58d40e8c,
|
||||
0xa3f9de4e, 0x58f838a9,
|
||||
0xa41cd599, 0x591c550e,
|
||||
0xa43fdb10, 0x594063b5,
|
||||
0xa462eeac, 0x59646498,
|
||||
0xa486106a, 0x598857b2,
|
||||
0xa4a94043, 0x59ac3cfd,
|
||||
0xa4cc7e32, 0x59d01475,
|
||||
0xa4efca31, 0x59f3de12,
|
||||
0xa513243b, 0x5a1799d1,
|
||||
0xa5368c4b, 0x5a3b47ab,
|
||||
0xa55a025b, 0x5a5ee79a,
|
||||
0xa57d8666, 0x5a82799a
|
||||
};
|
||||
|
||||
/*End of File*/
|
192
media/openmax_dl/dl/sp/src/omxSP_FFTFwd_CToC_FC32_Sfs_s.S
Normal file
192
media/openmax_dl/dl/sp/src/omxSP_FFTFwd_CToC_FC32_Sfs_s.S
Normal file
@ -0,0 +1,192 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This is a modification of omxSP_FFTFwd_CToC_SC32_Sfs_s.s
|
||||
@// to support float instead of SC32.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute an inverse FFT for a complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
.extern armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
.extern armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_FC32_Sfs_Radix2_ls_OutOfPlace_unsafe
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r1
|
||||
#define pFFTSpec r2
|
||||
|
||||
|
||||
@// Output registers
|
||||
#define result r0
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define argTwiddle r1
|
||||
#define argDst r2
|
||||
#define argScale r4
|
||||
#define tmpOrder r4
|
||||
#define pTwiddle r4
|
||||
#define pOut r5
|
||||
#define subFFTSize r7
|
||||
#define subFFTNum r6
|
||||
#define N r6
|
||||
#define order r14
|
||||
#define diff r9
|
||||
@// Total num of radix stages required to comple the FFT
|
||||
#define count r8
|
||||
#define x0r r4
|
||||
#define x0i r5
|
||||
#define diffMinusOne r2
|
||||
|
||||
@// Neon registers
|
||||
|
||||
#define dX0 D0.F32
|
||||
|
||||
|
||||
@// Allocate stack memory required by the function
|
||||
|
||||
@// Write function header
|
||||
M_START omxSP_FFTFwd_CToC_FC32_Sfs,r11,d15
|
||||
|
||||
@ Structure offsets for the FFTSpec
|
||||
.set ARMsFFTSpec_N, 0
|
||||
.set ARMsFFTSpec_pBitRev, 4
|
||||
.set ARMsFFTSpec_pTwiddle, 8
|
||||
.set ARMsFFTSpec_pBuf, 12
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// Read the size from structure and take log
|
||||
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
|
||||
|
||||
@// Read other structure parameters
|
||||
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
|
||||
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
|
||||
|
||||
CLZ order,N @// N = 2^order
|
||||
RSB order,order,#31
|
||||
MOV subFFTSize,#1
|
||||
@//MOV subFFTNum,N
|
||||
|
||||
CMP order,#3
|
||||
BGT orderGreaterthan3 @// order > 3
|
||||
|
||||
CMP order,#1
|
||||
BGE orderGreaterthan0 @// order > 0
|
||||
VLD1 dX0,[pSrc]
|
||||
VST1 dX0,[pDst]
|
||||
MOV pSrc,pDst
|
||||
BLT FFTEnd
|
||||
|
||||
orderGreaterthan0:
|
||||
@// set the buffers appropriately for various orders
|
||||
CMP order,#2
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
@// Pass the first stage destination in RN5
|
||||
MOVEQ pOut,pDst
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
CMP order,#1
|
||||
BGT orderGreaterthan1
|
||||
@// order = 1
|
||||
BL armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
orderGreaterthan1:
|
||||
CMP order,#2
|
||||
BGT orderGreaterthan2
|
||||
@// order = 2
|
||||
BL armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
|
||||
BL armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
orderGreaterthan2: @// order =3
|
||||
BL armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
|
||||
BL armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe
|
||||
BL armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
orderGreaterthan3:
|
||||
@// Set input args to fft stages
|
||||
TST order, #2
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
@// Pass the first stage destination in RN5
|
||||
MOVEQ pOut,pDst
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
@//check for even or odd order
|
||||
@// NOTE: The following combination of BL's would work fine even though
|
||||
@// the first BL would corrupt the flags. This is because the end of
|
||||
@// the "grpZeroSetLoop" loop inside
|
||||
@// armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag
|
||||
@// to EQ
|
||||
|
||||
TST order,#0x00000001
|
||||
BLEQ armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
|
||||
BLNE armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
|
||||
|
||||
CMP subFFTNum,#4
|
||||
BLT FFTEnd
|
||||
|
||||
|
||||
unscaledRadix4Loop:
|
||||
BEQ lastStageUnscaledRadix4
|
||||
BL armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe
|
||||
CMP subFFTNum,#4
|
||||
B unscaledRadix4Loop
|
||||
|
||||
lastStageUnscaledRadix4:
|
||||
BL armSP_FFTFwd_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
FFTEnd:
|
||||
|
||||
@// Set return value
|
||||
MOV result, #OMX_Sts_NoErr
|
||||
|
||||
@// Write function tail
|
||||
M_END
|
||||
|
||||
.end
|
356
media/openmax_dl/dl/sp/src/omxSP_FFTFwd_CToC_SC16_Sfs_s.S
Normal file
356
media/openmax_dl/dl/sp/src/omxSP_FFTFwd_CToC_SC16_Sfs_s.S
Normal file
@ -0,0 +1,356 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
|
||||
@//
|
||||
@//
|
||||
@// File Name: omxSP_FFTFwd_CToC_SC16_Sfs_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 6729
|
||||
@// Last Modified Date: Tue, 17 Jul 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute an inverse FFT for a complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
.extern armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r1
|
||||
#define pFFTSpec r2
|
||||
#define scale r3
|
||||
|
||||
|
||||
@// Output registers
|
||||
#define result r0
|
||||
|
||||
@//Local Scratch Registers
|
||||
#define argTwiddle r1
|
||||
#define argDst r2
|
||||
#define argScale r4
|
||||
#define pTwiddle r4
|
||||
#define tmpOrder r4
|
||||
#define pOut r5
|
||||
#define subFFTSize r7
|
||||
#define subFFTNum r6
|
||||
#define N r6
|
||||
#define order r14
|
||||
#define diff r9
|
||||
@// Total num of radix stages required to comple the FFT
|
||||
#define count r8
|
||||
#define x0r r4
|
||||
#define x0i r5
|
||||
#define diffMinusOne r2
|
||||
#define round r3
|
||||
|
||||
@// Neon registers
|
||||
|
||||
#define dX0 D0.S16
|
||||
#define dShift D1.S16
|
||||
#define dX0S32 D0.S32
|
||||
|
||||
|
||||
|
||||
@// Allocate stack memory required by the function
|
||||
M_ALLOC4 diffOnStack, 4
|
||||
|
||||
@// Write function header
|
||||
M_START omxSP_FFTFwd_CToC_SC16_Sfs,r11,d15
|
||||
|
||||
@ Structure offsets for the FFTSpec
|
||||
.set ARMsFFTSpec_N, 0
|
||||
.set ARMsFFTSpec_pBitRev, 4
|
||||
.set ARMsFFTSpec_pTwiddle, 8
|
||||
.set ARMsFFTSpec_pBuf, 12
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// Read the size from structure and take log
|
||||
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
|
||||
|
||||
@// Read other structure parameters
|
||||
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
|
||||
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
|
||||
|
||||
CLZ order,N @// N = 2^order
|
||||
RSB order,order,#31
|
||||
MOV subFFTSize,#1
|
||||
@//MOV subFFTNum,N
|
||||
|
||||
CMP order,#3
|
||||
BGT orderGreaterthan3 @// order > 3
|
||||
|
||||
CMP order,#1
|
||||
BGE orderGreaterthan0 @// order > 0
|
||||
M_STR scale, diffOnStack,LT @// order = 0
|
||||
LDRLT x0r,[pSrc]
|
||||
STRLT x0r,[pDst]
|
||||
MOVLT pSrc,pDst
|
||||
BLT FFTEnd
|
||||
|
||||
orderGreaterthan0:
|
||||
@// set the buffers appropriately for various orders
|
||||
CMP order,#2
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
SUBS diff,scale,order
|
||||
M_STR diff,diffOnStack
|
||||
MOVGT scale,order
|
||||
@// Now scale <= order
|
||||
|
||||
CMP order,#1
|
||||
BGT orderGreaterthan1
|
||||
SUBS scale,scale,#1
|
||||
BLEQ armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @// order = 1
|
||||
BLLT armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe @// order = 1
|
||||
B FFTEnd
|
||||
|
||||
orderGreaterthan1:
|
||||
CMP order,#2
|
||||
MOV argScale,scale
|
||||
BGT orderGreaterthan2
|
||||
SUBS argScale,argScale,#1
|
||||
BLGE armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @// order =2
|
||||
BLLT armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
|
||||
SUBS argScale,argScale,#1
|
||||
BLEQ armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
|
||||
BLLT armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
orderGreaterthan2: @// order =3
|
||||
SUBS argScale,argScale,#1
|
||||
BLGE armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
|
||||
BLLT armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
|
||||
SUBS argScale,argScale,#1
|
||||
BLGE armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
|
||||
BLLT armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
|
||||
SUBS argScale,argScale,#1
|
||||
BLEQ armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
|
||||
BLLT armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
|
||||
orderGreaterthan3:
|
||||
@// check scale = 0 or scale = order
|
||||
SUBS diff, scale, order @// scale > order
|
||||
MOVGT scale,order
|
||||
BGE specialScaleCase @// scale = 0 or scale = order
|
||||
CMP scale,#0
|
||||
BEQ specialScaleCase
|
||||
B generalScaleCase
|
||||
|
||||
specialScaleCase: @// scale = 0 or scale = order and order > 3
|
||||
|
||||
TST order, #2 @// Set input args to fft stages
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
CMP diff,#0
|
||||
M_STR diff, diffOnStack
|
||||
BGE scaleEqualsOrder
|
||||
|
||||
@//check for even or odd order
|
||||
@// NOTE: The following combination of BL's would work fine eventhough the first
|
||||
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
|
||||
@// armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
|
||||
|
||||
TST order,#0x00000001
|
||||
BLEQ armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
|
||||
BLNE armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
|
||||
|
||||
CMP subFFTNum,#4
|
||||
BLT FFTEnd
|
||||
|
||||
unscaledRadix4Loop:
|
||||
BEQ lastStageUnscaledRadix4
|
||||
BL armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
|
||||
CMP subFFTNum,#4
|
||||
B unscaledRadix4Loop
|
||||
|
||||
lastStageUnscaledRadix4:
|
||||
BL armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
scaleEqualsOrder:
|
||||
@//check for even or odd order
|
||||
@// NOTE: The following combination of BL's would work fine eventhough the first
|
||||
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
|
||||
@// armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
|
||||
|
||||
TST order,#0x00000001
|
||||
BLEQ armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
|
||||
BLNE armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
|
||||
|
||||
CMP subFFTNum,#4
|
||||
BLT FFTEnd
|
||||
|
||||
scaledRadix4Loop:
|
||||
BEQ lastStageScaledRadix4
|
||||
BL armSP_FFTFwd_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
|
||||
CMP subFFTNum,#4
|
||||
B scaledRadix4Loop
|
||||
|
||||
lastStageScaledRadix4:
|
||||
BL armSP_FFTFwd_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
|
||||
|
||||
generalScaleCase: @// 0 < scale < order and order > 3
|
||||
@// Determine the correct destination buffer
|
||||
SUB diff,order,scale
|
||||
TST diff,#0x01
|
||||
ADDEQ count,scale,diff,LSR #1 @// count = scale + (order - scale)/2
|
||||
MOVNE count,order
|
||||
TST count,#0x01 @// Is count even or odd ?
|
||||
|
||||
MOVNE argDst,pDst @// Set input args to fft stages
|
||||
MOVEQ argDst,pOut
|
||||
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
CMP diff,#1
|
||||
M_STR diff, diffOnStack
|
||||
BEQ scaleps @// scaling including a radix2_ps stage
|
||||
|
||||
MOV argScale,scale @// Put scale in RN4 so as to save and restore
|
||||
BL armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage
|
||||
SUBS argScale,argScale,#1
|
||||
|
||||
scaledRadix2Loop:
|
||||
BLGT armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
|
||||
SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages
|
||||
BGT scaledRadix2Loop
|
||||
B outScale
|
||||
|
||||
scaleps:
|
||||
SUB argScale,scale,#1 @// order>3 and diff=1 => scale >= 3
|
||||
BL armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage
|
||||
SUBS argScale,argScale,#1
|
||||
|
||||
scaledRadix2psLoop:
|
||||
BEQ scaledRadix2psStage
|
||||
BLGT armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
|
||||
SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages
|
||||
BGE scaledRadix2psLoop
|
||||
|
||||
scaledRadix2psStage:
|
||||
BL armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
|
||||
B generalLastStageUnscaledRadix2
|
||||
|
||||
|
||||
outScale:
|
||||
M_LDR diff, diffOnStack
|
||||
@//check for even or odd order
|
||||
TST diff,#0x00000001
|
||||
BEQ generalUnscaledRadix4Loop
|
||||
B unscaledRadix2Loop
|
||||
|
||||
generalUnscaledRadix4Loop:
|
||||
CMP subFFTNum,#4
|
||||
BEQ generalLastStageUnscaledRadix4
|
||||
BL armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
|
||||
B generalUnscaledRadix4Loop
|
||||
|
||||
generalLastStageUnscaledRadix4:
|
||||
BL armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
|
||||
B End
|
||||
|
||||
unscaledRadix2Loop:
|
||||
CMP subFFTNum,#4
|
||||
BEQ generalLastTwoStagesUnscaledRadix2
|
||||
BL armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe
|
||||
B unscaledRadix2Loop
|
||||
|
||||
generalLastTwoStagesUnscaledRadix2:
|
||||
BL armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
|
||||
generalLastStageUnscaledRadix2:
|
||||
BL armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
|
||||
B End
|
||||
|
||||
|
||||
FFTEnd: @// Does only the scaling
|
||||
|
||||
M_LDR diff, diffOnStack
|
||||
CMP diff,#0
|
||||
BLE End
|
||||
|
||||
RSB diff,diff,#0 @// to use VRSHL for right shift by a variable
|
||||
VDUP dShift,diff
|
||||
|
||||
scaleFFTData: @// N = subFFTSize ; dataptr = pDst ; scale = diff
|
||||
VLD1 {dX0S32[0]},[pSrc] @// pSrc contains pDst pointer
|
||||
SUBS subFFTSize,subFFTSize,#1
|
||||
VRSHL dX0,dShift
|
||||
VST1 {dX0S32[0]},[pSrc]!
|
||||
|
||||
BGT scaleFFTData
|
||||
|
||||
|
||||
|
||||
End:
|
||||
@// Set return value
|
||||
MOV result, #OMX_Sts_NoErr
|
||||
|
||||
@// Write function tail
|
||||
M_END
|
||||
|
||||
.END
|
335
media/openmax_dl/dl/sp/src/omxSP_FFTFwd_CToC_SC32_Sfs_s.S
Normal file
335
media/openmax_dl/dl/sp/src/omxSP_FFTFwd_CToC_SC32_Sfs_s.S
Normal file
@ -0,0 +1,335 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
@//
|
||||
@//
|
||||
@// File Name: omxSP_FFTFwd_CToC_SC32_Sfs_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 6684
|
||||
@// Last Modified Date: Mon, 09 Jul 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute an inverse FFT for a complex signal
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
.extern armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r1
|
||||
#define pFFTSpec r2
|
||||
#define scale r3
|
||||
|
||||
|
||||
@// Output registers
|
||||
#define result r0
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define argTwiddle r1
|
||||
#define argDst r2
|
||||
#define argScale r4
|
||||
#define tmpOrder r4
|
||||
#define pTwiddle r4
|
||||
#define pOut r5
|
||||
#define subFFTSize r7
|
||||
#define subFFTNum r6
|
||||
#define N r6
|
||||
#define order r14
|
||||
#define diff r9
|
||||
@// Total num of radix stages required to comple the FFT
|
||||
#define count r8
|
||||
#define x0r r4
|
||||
#define x0i r5
|
||||
#define diffMinusOne r2
|
||||
#define round r3
|
||||
|
||||
@// Neon registers
|
||||
|
||||
#define dX0 D0.S32
|
||||
#define dShift D1.S32
|
||||
|
||||
|
||||
|
||||
@// Allocate stack memory required by the function
|
||||
M_ALLOC4 diffOnStack, 4
|
||||
|
||||
@// Write function header
|
||||
M_START omxSP_FFTFwd_CToC_SC32_Sfs,r11,d15
|
||||
|
||||
@ Structure offsets for the FFTSpec
|
||||
.set ARMsFFTSpec_N, 0
|
||||
.set ARMsFFTSpec_pBitRev, 4
|
||||
.set ARMsFFTSpec_pTwiddle, 8
|
||||
.set ARMsFFTSpec_pBuf, 12
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// Read the size from structure and take log
|
||||
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
|
||||
|
||||
@// Read other structure parameters
|
||||
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
|
||||
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
|
||||
|
||||
CLZ order,N @// N = 2^order
|
||||
RSB order,order,#31
|
||||
MOV subFFTSize,#1
|
||||
@//MOV subFFTNum,N
|
||||
|
||||
CMP order,#3
|
||||
BGT orderGreaterthan3 @// order > 3
|
||||
|
||||
CMP order,#1
|
||||
BGE orderGreaterthan0 @// order > 0
|
||||
M_STR scale, diffOnStack,LT @// order = 0
|
||||
VLD1 dX0,[pSrc]
|
||||
VST1 dX0,[pDst]
|
||||
MOV pSrc,pDst
|
||||
BLT FFTEnd
|
||||
|
||||
orderGreaterthan0:
|
||||
@// set the buffers appropriately for various orders
|
||||
CMP order,#2
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
SUBS diff,scale,order
|
||||
M_STR diff,diffOnStack
|
||||
MOVGT scale,order
|
||||
@// Now scale <= order
|
||||
|
||||
CMP order,#1
|
||||
BGT orderGreaterthan1
|
||||
SUBS scale,scale,#1
|
||||
BLEQ armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// order = 1
|
||||
BLLT armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe @// order = 1
|
||||
B FFTEnd
|
||||
|
||||
orderGreaterthan1:
|
||||
CMP order,#2
|
||||
MOV argScale,scale
|
||||
BGT orderGreaterthan2
|
||||
SUBS argScale,argScale,#1
|
||||
BLGE armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// order =2
|
||||
BLLT armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
|
||||
SUBS argScale,argScale,#1
|
||||
BLEQ armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
|
||||
BLLT armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
orderGreaterthan2: @// order =3
|
||||
SUBS argScale,argScale,#1
|
||||
BLGE armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe // "fs" means first stage
|
||||
BLLT armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
|
||||
SUBS argScale,argScale,#1
|
||||
BLGE armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
|
||||
BLLT armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe
|
||||
SUBS argScale,argScale,#1
|
||||
BLEQ armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe // "ls" means last stage
|
||||
BLLT armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
|
||||
|
||||
orderGreaterthan3:
|
||||
@// check scale = 0 or scale = order
|
||||
SUBS diff, scale, order @// scale > order
|
||||
MOVGT scale,order
|
||||
BGE specialScaleCase @// scale = 0 or scale = order
|
||||
CMP scale,#0
|
||||
BEQ specialScaleCase
|
||||
B generalScaleCase
|
||||
|
||||
specialScaleCase: @// scale = 0 or scale = order and order >= 2
|
||||
|
||||
TST order, #2 @// Set input args to fft stages
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
CMP diff,#0
|
||||
M_STR diff, diffOnStack
|
||||
BGE scaleEqualsOrder
|
||||
|
||||
@//check for even or odd order
|
||||
@// NOTE: The following combination of BL's would work fine eventhough the first
|
||||
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
|
||||
@// armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
|
||||
|
||||
TST order,#0x00000001
|
||||
BLEQ armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
|
||||
BLNE armSP_FFTFwd_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
|
||||
|
||||
CMP subFFTNum,#4
|
||||
BLT FFTEnd
|
||||
|
||||
|
||||
unscaledRadix4Loop:
|
||||
BEQ lastStageUnscaledRadix4
|
||||
BL armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe
|
||||
CMP subFFTNum,#4
|
||||
B unscaledRadix4Loop
|
||||
|
||||
lastStageUnscaledRadix4:
|
||||
BL armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
|
||||
scaleEqualsOrder:
|
||||
@//check for even or odd order
|
||||
@// NOTE: The following combination of BL's would work fine eventhough the first
|
||||
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
|
||||
@// armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
|
||||
|
||||
TST order,#0x00000001
|
||||
BLEQ armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
|
||||
BLNE armSP_FFTFwd_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
|
||||
|
||||
CMP subFFTNum,#4
|
||||
BLT FFTEnd
|
||||
|
||||
|
||||
scaledRadix4Loop:
|
||||
BEQ lastStageScaledRadix4
|
||||
BL armSP_FFTFwd_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
|
||||
CMP subFFTNum,#4
|
||||
B scaledRadix4Loop
|
||||
|
||||
lastStageScaledRadix4:
|
||||
BL armSP_FFTFwd_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
generalScaleCase: @// 0 < scale < order and order >= 2
|
||||
@// Determine the correct destination buffer
|
||||
SUB diff,order,scale
|
||||
TST diff,#0x01
|
||||
ADDEQ count,scale,diff,LSR #1 @// count = scale + (order - scale)/2
|
||||
MOVNE count,order
|
||||
TST count,#0x01 @// Is count even or odd ?
|
||||
|
||||
MOVNE argDst,pDst @// Set input args to fft stages
|
||||
MOVEQ argDst,pOut
|
||||
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
M_STR diff, diffOnStack
|
||||
|
||||
MOV argScale,scale @// Put scale in RN4 so as to save and restore
|
||||
BL armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage
|
||||
SUBS argScale,argScale,#1
|
||||
|
||||
scaledRadix2Loop:
|
||||
BLGT armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
|
||||
SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages
|
||||
BGT scaledRadix2Loop
|
||||
|
||||
|
||||
M_LDR diff, diffOnStack
|
||||
@//check for even or odd order
|
||||
TST diff,#0x00000001
|
||||
BEQ generalUnscaledRadix4Loop
|
||||
B unscaledRadix2Loop
|
||||
|
||||
generalUnscaledRadix4Loop:
|
||||
CMP subFFTNum,#4
|
||||
BEQ generalLastStageUnscaledRadix4
|
||||
BL armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe
|
||||
B generalUnscaledRadix4Loop
|
||||
|
||||
generalLastStageUnscaledRadix4:
|
||||
BL armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
|
||||
B End
|
||||
|
||||
|
||||
unscaledRadix2Loop:
|
||||
CMP subFFTNum,#2
|
||||
BEQ generalLastStageUnscaledRadix2
|
||||
BL armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe
|
||||
B unscaledRadix2Loop
|
||||
|
||||
generalLastStageUnscaledRadix2:
|
||||
BL armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
|
||||
B End
|
||||
|
||||
|
||||
FFTEnd: @// Does only the scaling
|
||||
|
||||
M_LDR diff, diffOnStack
|
||||
CMP diff,#0
|
||||
BLE End
|
||||
|
||||
RSB diff,diff,#0 @// to use VRSHL for right shift by a variable
|
||||
VDUP dShift,diff
|
||||
|
||||
scaleFFTData: @// N = subFFTSize ; dataptr = pDst ; scale = diff
|
||||
VLD1 {dX0},[pSrc] @// pSrc contains pDst pointer
|
||||
SUBS subFFTSize,subFFTSize,#1
|
||||
VRSHL dX0,dShift
|
||||
VST1 {dX0},[pSrc]!
|
||||
|
||||
BGT scaleFFTData
|
||||
|
||||
|
||||
|
||||
End:
|
||||
@// Set return value
|
||||
MOV result, #OMX_Sts_NoErr
|
||||
|
||||
@// Write function tail
|
||||
M_END
|
||||
|
||||
.end
|
406
media/openmax_dl/dl/sp/src/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S
Normal file
406
media/openmax_dl/dl/sp/src/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S
Normal file
@ -0,0 +1,406 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This is a modification of omxSP_FFTFwd_RToCCS_S32_Sfs_s.s
|
||||
@// to support float instead of SC32.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute FFT for a real signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
.extern armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
.extern armSP_FFTFwd_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r1
|
||||
#define pFFTSpec r2
|
||||
#define scale r3
|
||||
|
||||
|
||||
@// Output registers
|
||||
#define result r0
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define argTwiddle r1
|
||||
#define argDst r2
|
||||
#define argScale r4
|
||||
#define tmpOrder r4
|
||||
#define pTwiddle r4
|
||||
#define pOut r5
|
||||
#define subFFTSize r7
|
||||
#define subFFTNum r6
|
||||
#define N r6
|
||||
#define order r14
|
||||
#define diff r9
|
||||
@// Total num of radix stages required to comple the FFT
|
||||
#define count r8
|
||||
#define x0r r4
|
||||
#define x0i r5
|
||||
#define diffMinusOne r2
|
||||
#define subFFTSizeTmp r6
|
||||
#define step r3
|
||||
#define step1 r4
|
||||
#define twStep r8
|
||||
#define zero r9
|
||||
#define pTwiddleTmp r5
|
||||
#define t0 r10
|
||||
|
||||
@// Neon registers
|
||||
|
||||
#define dX0 d0.f32
|
||||
#define dzero d1.f32
|
||||
#define dZero d2.f32
|
||||
#define dShift d3.f32
|
||||
#define dX0r d2.f32
|
||||
#define dX0i d3.f32
|
||||
#define dX1r d4.f32
|
||||
#define dX1i d5.f32
|
||||
#define dT0 d6.f32
|
||||
#define dT1 d7.f32
|
||||
#define dT2 d8.f32
|
||||
#define dT3 d9.f32
|
||||
#define qT0 d10.f32
|
||||
#define qT1 d12.f32
|
||||
#define dW0r d14.f32
|
||||
#define dW0i d15.f32
|
||||
#define dW1r d16.f32
|
||||
#define dW1i d17.f32
|
||||
#define dY0r d14.f32
|
||||
#define dY0i d15.f32
|
||||
#define dY1r d16.f32
|
||||
#define dY1i d17.f32
|
||||
#define dY0rS64 d14.s64
|
||||
#define dY0iS64 d15.s64
|
||||
#define qT2 d18.f32
|
||||
#define qT3 d20.f32
|
||||
@// lastThreeelements
|
||||
#define dX1 d3.f32
|
||||
#define dW0 d4.f32
|
||||
#define dW1 d5.f32
|
||||
#define dY0 d10.f32
|
||||
#define dY1 d11.f32
|
||||
#define dY2 d12.f32
|
||||
#define dY3 d13.f32
|
||||
|
||||
#define half d0.f32
|
||||
|
||||
HALF: .float 0.5
|
||||
|
||||
@// Allocate stack memory required by the function
|
||||
|
||||
@// Write function header
|
||||
M_START omxSP_FFTFwd_RToCCS_F32_Sfs,r11,d15
|
||||
|
||||
@ Structure offsets for the FFTSpec
|
||||
.set ARMsFFTSpec_N, 0
|
||||
.set ARMsFFTSpec_pBitRev, 4
|
||||
.set ARMsFFTSpec_pTwiddle, 8
|
||||
.set ARMsFFTSpec_pBuf, 12
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// Read the size from structure and take log
|
||||
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
|
||||
|
||||
@// Read other structure parameters
|
||||
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
|
||||
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
|
||||
|
||||
@// N=1 Treat seperately
|
||||
CMP N,#1
|
||||
BGT sizeGreaterThanOne
|
||||
VLD1 dX0[0],[pSrc]
|
||||
MOV zero,#0
|
||||
VMOV dzero[0],zero
|
||||
VMOV dZero[0],zero
|
||||
VST3 {dX0[0],dzero[0],dZero[0]},[pDst]
|
||||
|
||||
B End
|
||||
|
||||
|
||||
|
||||
sizeGreaterThanOne:
|
||||
@// Do a N/2 point complex FFT including the scaling
|
||||
|
||||
MOV N,N,ASR #1 @// N/2 point complex FFT
|
||||
|
||||
CLZ order,N @// N = 2^order
|
||||
RSB order,order,#31
|
||||
MOV subFFTSize,#1
|
||||
@//MOV subFFTNum,N
|
||||
|
||||
CMP order,#3
|
||||
BGT orderGreaterthan3 @// order > 3
|
||||
|
||||
CMP order,#1
|
||||
BGE orderGreaterthan0 @// order > 0
|
||||
VLD1 dX0,[pSrc]
|
||||
VST1 dX0,[pOut]
|
||||
MOV pSrc,pOut
|
||||
MOV argDst,pDst
|
||||
BLT FFTEnd
|
||||
|
||||
orderGreaterthan0:
|
||||
@// set the buffers appropriately for various orders
|
||||
CMP order,#2
|
||||
MOVEQ argDst,pDst
|
||||
MOVNE argDst,pOut
|
||||
@// Pass the first stage destination in RN5
|
||||
MOVNE pOut,pDst
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
CMP order,#1
|
||||
BGT orderGreaterthan1
|
||||
@// order = 1
|
||||
BL armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
orderGreaterthan1:
|
||||
CMP order,#2
|
||||
BGT orderGreaterthan2
|
||||
@// order =2
|
||||
BL armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
|
||||
BL armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
orderGreaterthan2:@// order =3
|
||||
BL armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
|
||||
BL armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe
|
||||
BL armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
|
||||
|
||||
B FFTEnd
|
||||
|
||||
|
||||
|
||||
orderGreaterthan3:
|
||||
specialScaleCase:
|
||||
|
||||
@// Set input args to fft stages
|
||||
TST order, #2
|
||||
MOVEQ argDst,pDst
|
||||
MOVNE argDst,pOut
|
||||
@// Pass the first stage destination in RN5
|
||||
MOVNE pOut,pDst
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
@//check for even or odd order
|
||||
@// NOTE: The following combination of BL's would work fine even though
|
||||
@// the first BL would corrupt the flags. This is because the end of
|
||||
@// the "grpZeroSetLoop" loop inside
|
||||
@// armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag
|
||||
@// to EQ
|
||||
|
||||
TST order,#0x00000001
|
||||
BLEQ armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
|
||||
BLNE armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
|
||||
|
||||
CMP subFFTNum,#4
|
||||
BLT FFTEnd
|
||||
|
||||
|
||||
unscaledRadix4Loop:
|
||||
BEQ lastStageUnscaledRadix4
|
||||
BL armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe
|
||||
CMP subFFTNum,#4
|
||||
B unscaledRadix4Loop
|
||||
|
||||
lastStageUnscaledRadix4:
|
||||
BL armSP_FFTFwd_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
|
||||
FFTEnd:
|
||||
finalComplexToRealFixup:
|
||||
|
||||
|
||||
@// F(0) = 1/2[Z(0) + Z'(0)] - j [Z(0) - Z'(0)]
|
||||
@// 1/2[(a+jb) + (a-jb)] - j [(a+jb) - (a-jb)]
|
||||
@// 1/2[2a+j0] - j [0+j2b]
|
||||
@// (a+b, 0)
|
||||
|
||||
@// F(N/2) = 1/2[Z(0) + Z'(0)] + j [Z(0) - Z'(0)]
|
||||
@// 1/2[(a+jb) + (a-jb)] + j [(a+jb) - (a-jb)]
|
||||
@// 1/2[2a+j0] + j [0+j2b]
|
||||
@// (a-b, 0)
|
||||
|
||||
@// F(0) and F(N/2)
|
||||
VLD2 {dX0r[0],dX0i[0]},[pSrc]!
|
||||
MOV zero,#0
|
||||
VMOV dX0r[1],zero
|
||||
MOV step,subFFTSize,LSL #3 @// step = N/2 * 8 bytes
|
||||
VMOV dX0i[1],zero
|
||||
@// twStep = 3N/8 * 8 bytes pointing to W^1
|
||||
SUB twStep,step,subFFTSize,LSL #1
|
||||
|
||||
VADD dY0r,dX0r,dX0i @// F(0) = ((Z0.r+Z0.i) , 0)
|
||||
MOV step1,subFFTSize,LSL #2 @// step1 = N/2 * 4 bytes
|
||||
VSUB dY0i,dX0r,dX0i @// F(N/2) = ((Z0.r-Z0.i) , 0)
|
||||
SUBS subFFTSize,subFFTSize,#2
|
||||
|
||||
VST1 dY0r,[argDst],step
|
||||
ADD pTwiddleTmp,argTwiddle,#8 @// W^2
|
||||
VST1 dY0i,[argDst]!
|
||||
ADD argTwiddle,argTwiddle,twStep @// W^1
|
||||
|
||||
VDUP dzero,zero
|
||||
SUB argDst,argDst,step
|
||||
|
||||
BLT End
|
||||
BEQ lastElement
|
||||
SUB step,step,#24
|
||||
SUB step1,step1,#8 @// (N/4-1)*8 bytes
|
||||
|
||||
@// F(k) = 1/2[Z(k) + Z'(N/2-k)] -j*W^(k) [Z(k) - Z'(N/2-k)]
|
||||
@// Note: W^k is stored as negative values in the table
|
||||
@// Process 4 elements at a time. E.g: F(1),F(2) and F(N/2-2),F(N/2-1)
|
||||
@// since both of them require Z(1),Z(2) and Z(N/2-2),Z(N/2-1)
|
||||
|
||||
|
||||
LDR t0, =HALF
|
||||
VLD1 half[0], [t0]
|
||||
|
||||
evenOddButterflyLoop:
|
||||
|
||||
|
||||
VLD1 dW0r,[argTwiddle],step1
|
||||
VLD1 dW1r,[argTwiddle]!
|
||||
|
||||
VLD2 {dX0r,dX0i},[pSrc],step
|
||||
SUB argTwiddle,argTwiddle,step1
|
||||
VLD2 {dX1r,dX1i},[pSrc]!
|
||||
|
||||
|
||||
|
||||
SUB step1,step1,#8 @// (N/4-2)*8 bytes
|
||||
VLD1 dW0i,[pTwiddleTmp],step1
|
||||
VLD1 dW1i,[pTwiddleTmp]!
|
||||
SUB pSrc,pSrc,step
|
||||
|
||||
SUB pTwiddleTmp,pTwiddleTmp,step1
|
||||
VREV64 dX1r,dX1r
|
||||
VREV64 dX1i,dX1i
|
||||
SUBS subFFTSize,subFFTSize,#4
|
||||
|
||||
|
||||
|
||||
VSUB dT2,dX0r,dX1r @// a-c
|
||||
SUB step1,step1,#8
|
||||
VADD dT0,dX0r,dX1r @// a+c
|
||||
VSUB dT1,dX0i,dX1i @// b-d
|
||||
VADD dT3,dX0i,dX1i @// b+d
|
||||
VMUL dT0,dT0,half[0]
|
||||
VMUL dT1,dT1,half[0]
|
||||
VZIP dW1r,dW1i
|
||||
VZIP dW0r,dW0i
|
||||
|
||||
|
||||
VMUL qT0,dW1r,dT2
|
||||
VMUL qT1,dW1r,dT3
|
||||
VMUL qT2,dW0r,dT2
|
||||
VMUL qT3,dW0r,dT3
|
||||
|
||||
VMLA qT0,dW1i,dT3
|
||||
VMLS qT1,dW1i,dT2
|
||||
|
||||
VMLS qT2,dW0i,dT3
|
||||
VMLA qT3,dW0i,dT2
|
||||
|
||||
|
||||
VMUL dX1r,qT0,half[0]
|
||||
VMUL dX1i,qT1,half[0]
|
||||
|
||||
VSUB dY1r,dT0,dX1i @// F(N/2 -1)
|
||||
VADD dY1i,dT1,dX1r
|
||||
VNEG dY1i,dY1i
|
||||
|
||||
VREV64 dY1r,dY1r
|
||||
VREV64 dY1i,dY1i
|
||||
|
||||
|
||||
VMUL dX0r,qT2,half[0]
|
||||
VMUL dX0i,qT3,half[0]
|
||||
|
||||
VSUB dY0r,dT0,dX0i @// F(1)
|
||||
VADD dY0i,dT1,dX0r
|
||||
|
||||
|
||||
VST2 {dY0r,dY0i},[argDst],step
|
||||
VST2 {dY1r,dY1i},[argDst]!
|
||||
SUB argDst,argDst,step
|
||||
SUB step,step,#32 @// (N/2-4)*8 bytes
|
||||
|
||||
|
||||
BGT evenOddButterflyLoop
|
||||
|
||||
@// set both the ptrs to the last element
|
||||
SUB pSrc,pSrc,#8
|
||||
SUB argDst,argDst,#8
|
||||
|
||||
|
||||
|
||||
@// Last element can be expanded as follows
|
||||
@// 1/2[Z(k) + Z'(k)] + j w^k [Z(k) - Z'(k)]
|
||||
@// 1/2[(a+jb) + (a-jb)] + j w^k [(a+jb) - (a-jb)]
|
||||
@// 1/2[2a+j0] + j (c+jd) [0+j2b]
|
||||
@// (a-bc, -bd)
|
||||
@// Since (c,d) = (0,1) for the last element, result is just (a,-b)
|
||||
|
||||
lastElement:
|
||||
VLD1 dX0r,[pSrc]
|
||||
|
||||
VST1 dX0r[0],[argDst]!
|
||||
VNEG dX0r,dX0r
|
||||
VST1 dX0r[1],[argDst]!
|
||||
|
||||
End:
|
||||
@// Set return value
|
||||
MOV result, #OMX_Sts_NoErr
|
||||
|
||||
@// Write function tail
|
||||
M_END
|
||||
|
||||
.end
|
158
media/openmax_dl/dl/sp/src/omxSP_FFTFwd_RToCCS_S16S32_Sfs_s.S
Normal file
158
media/openmax_dl/dl/sp/src/omxSP_FFTFwd_RToCCS_S16S32_Sfs_s.S
Normal file
@ -0,0 +1,158 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// File Name: omxSP_FFTFwd_RToCCS_S16S32_Sfs_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 7403
|
||||
@// Last Modified Date: Mon, 17 Sep 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute FFT for a real signal
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
.extern omxSP_FFTFwd_RToCCS_S32_Sfs
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r1
|
||||
#define pFFTSpec r2
|
||||
#define scale r3
|
||||
|
||||
@// Output registers
|
||||
#define result r0
|
||||
|
||||
#define pTmpDst r4
|
||||
#define pTmpSrc r5
|
||||
#define N r6
|
||||
#define order r7
|
||||
#define pOut r8
|
||||
|
||||
@// Neon registers
|
||||
|
||||
#define dX0 D0.S16
|
||||
#define qY0 Q1.S32
|
||||
#define dY0S32 D2.S32
|
||||
#define qX0 Q1.S32
|
||||
#define dY1S32 D3.S32
|
||||
#define dX0S32 D0.S32
|
||||
|
||||
|
||||
|
||||
|
||||
@// Allocate stack memory required by the function
|
||||
|
||||
@// Write function header
|
||||
M_START omxSP_FFTFwd_RToCCS_S16S32_Sfs,r11,d15
|
||||
|
||||
@ Structure offsets for the FFTSpec
|
||||
.set ARMsFFTSpec_N, 0
|
||||
.set ARMsFFTSpec_pBitRev, 4
|
||||
.set ARMsFFTSpec_pTwiddle, 8
|
||||
.set ARMsFFTSpec_pBuf, 12
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// Read the size from structure and take log
|
||||
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
|
||||
|
||||
@// Read other structure parameters
|
||||
@//LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
|
||||
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
|
||||
|
||||
|
||||
@// N=1 Treat seperately
|
||||
CMP N,#1
|
||||
BGT sizeGreaterThanOne
|
||||
VLD1 dX0[0],[pSrc]
|
||||
VMOVL qY0,dX0
|
||||
VST1 dY0S32[0],[pDst]
|
||||
|
||||
MOV pSrc,pDst
|
||||
B realS32FFT
|
||||
|
||||
sizeGreaterThanOne:
|
||||
MOV N,N,ASR #1
|
||||
|
||||
CLZ order,N @// N = 2^order
|
||||
RSB order,order,#31
|
||||
|
||||
TST order,#1
|
||||
MOVEQ pTmpDst,pOut
|
||||
MOVNE pTmpDst,pDst
|
||||
MOV pTmpSrc,pTmpDst
|
||||
|
||||
CMP N,#1
|
||||
BGT copyS16ToS32
|
||||
VLD1 dX0S32[0],[pSrc]
|
||||
VMOVL qX0,dX0
|
||||
VST1 dY0S32,[pTmpDst]
|
||||
B setpSrc
|
||||
|
||||
|
||||
copyS16ToS32:
|
||||
|
||||
VLD1 dX0,[pSrc]!
|
||||
SUBS N,N,#2
|
||||
VMOVL qX0,dX0
|
||||
VST1 {dY0S32,dY1S32},[pTmpDst]!
|
||||
BGT copyS16ToS32
|
||||
|
||||
setpSrc:
|
||||
MOV pSrc,pTmpSrc
|
||||
|
||||
|
||||
|
||||
realS32FFT:
|
||||
BL omxSP_FFTFwd_RToCCS_S32_Sfs
|
||||
|
||||
|
||||
|
||||
End:
|
||||
@// Set return value
|
||||
MOV result, #OMX_Sts_NoErr
|
||||
|
||||
@// Write function tail
|
||||
M_END
|
||||
.end
|
||||
|
549
media/openmax_dl/dl/sp/src/omxSP_FFTFwd_RToCCS_S32_Sfs_s.S
Normal file
549
media/openmax_dl/dl/sp/src/omxSP_FFTFwd_RToCCS_S32_Sfs_s.S
Normal file
@ -0,0 +1,549 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// File Name: omxSP_FFTFwd_RToCCS_S32_Sfs_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 7810
|
||||
@// Last Modified Date: Thu, 04 Oct 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute FFT for a real signal
|
||||
@//
|
||||
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
.extern armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r1
|
||||
#define pFFTSpec r2
|
||||
#define scale r3
|
||||
|
||||
|
||||
@// Output registers
|
||||
#define result r0
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define argTwiddle r1
|
||||
#define argDst r2
|
||||
#define argScale r4
|
||||
#define tmpOrder r4
|
||||
#define pTwiddle r4
|
||||
#define pOut r5
|
||||
#define subFFTSize r7
|
||||
#define subFFTNum r6
|
||||
#define N r6
|
||||
#define order r14
|
||||
#define diff r9
|
||||
@// Total num of radix stages required to comple the FFT
|
||||
#define count r8
|
||||
#define x0r r4
|
||||
#define x0i r5
|
||||
#define diffMinusOne r2
|
||||
#define subFFTSizeTmp r6
|
||||
#define step r3
|
||||
#define step1 r4
|
||||
#define twStep r8
|
||||
#define zero r9
|
||||
#define pTwiddleTmp r5
|
||||
#define t0 r10
|
||||
|
||||
@// Neon registers
|
||||
|
||||
#define dX0 d0.s32
|
||||
#define dzero d1.s32
|
||||
#define dZero d2.s32
|
||||
#define dShift d3.s32
|
||||
#define dX0r d2.s32
|
||||
#define dX0i d3.s32
|
||||
#define dX1r d4.s32
|
||||
#define dX1i d5.s32
|
||||
#define dT0 d6.s32
|
||||
#define dT1 d7.s32
|
||||
#define dT2 d8.s32
|
||||
#define dT3 d9.s32
|
||||
#define qT0 q5.s64
|
||||
#define qT1 q6.s64
|
||||
#define dW0r d14.s32
|
||||
#define dW0i d15.s32
|
||||
#define dW1r d16.s32
|
||||
#define dW1i d17.s32
|
||||
#define dY0r d14.s32
|
||||
#define dY0i d15.s32
|
||||
#define dY1r d16.s32
|
||||
#define dY1i d17.s32
|
||||
#define dY0rS64 d14.s64
|
||||
#define dY0iS64 d15.s64
|
||||
#define qT2 q9.s64
|
||||
#define qT3 q10.s64
|
||||
@// lastThreeelements
|
||||
#define dX1 d3.s32
|
||||
#define dW0 d4.s32
|
||||
#define dW1 d5.s32
|
||||
#define dY0 d10.s32
|
||||
#define dY1 d11.s32
|
||||
#define dY2 d12.s32
|
||||
#define dY3 d13.s32
|
||||
|
||||
@// Allocate stack memory required by the function
|
||||
|
||||
M_ALLOC4 diffOnStack, 4
|
||||
|
||||
@// Write function header
|
||||
M_START omxSP_FFTFwd_RToCCS_S32_Sfs,r11,d15
|
||||
|
||||
@ Structure offsets for the FFTSpec
|
||||
.set ARMsFFTSpec_N, 0
|
||||
.set ARMsFFTSpec_pBitRev, 4
|
||||
.set ARMsFFTSpec_pTwiddle, 8
|
||||
.set ARMsFFTSpec_pBuf, 12
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// Read the size from structure and take log
|
||||
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
|
||||
|
||||
@// Read other structure parameters
|
||||
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
|
||||
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
|
||||
|
||||
@// N=1 Treat seperately
|
||||
CMP N,#1
|
||||
BGT sizeGreaterThanOne
|
||||
VLD1 dX0[0],[pSrc]
|
||||
RSB scale,scale,#0 @// to use VRSHL for right shift by a variable
|
||||
MOV zero,#0
|
||||
VMOV dShift[0],scale
|
||||
VMOV dzero[0],zero
|
||||
VRSHL dX0,dShift
|
||||
VMOV dZero[0],zero
|
||||
VST3 {dX0[0],dzero[0],dZero[0]},[pDst]
|
||||
|
||||
B End
|
||||
|
||||
|
||||
|
||||
sizeGreaterThanOne:
|
||||
@// Do a N/2 point complex FFT including the scaling
|
||||
|
||||
MOV N,N,ASR #1 @// N/2 point complex FFT
|
||||
|
||||
CLZ order,N @// N = 2^order
|
||||
RSB order,order,#31
|
||||
MOV subFFTSize,#1
|
||||
@//MOV subFFTNum,N
|
||||
|
||||
CMP order,#3
|
||||
BGT orderGreaterthan3 @// order > 3
|
||||
|
||||
CMP order,#1
|
||||
BGE orderGreaterthan0 @// order > 0
|
||||
M_STR scale, diffOnStack,LT @// order = 0
|
||||
VLD1 dX0,[pSrc]
|
||||
VST1 dX0,[pOut]
|
||||
MOV pSrc,pOut
|
||||
MOV argDst,pDst
|
||||
BLT FFTEnd
|
||||
|
||||
orderGreaterthan0:
|
||||
@// set the buffers appropriately for various orders
|
||||
CMP order,#2
|
||||
MOVEQ argDst,pDst
|
||||
MOVNE argDst,pOut
|
||||
MOVNE pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
SUBS diff,scale,order
|
||||
M_STR diff,diffOnStack
|
||||
MOVGT scale,order
|
||||
@// Now scale <= order
|
||||
|
||||
CMP order,#1
|
||||
BGT orderGreaterthan1
|
||||
SUBS scale,scale,#1
|
||||
BLEQ armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// order = 1
|
||||
BLLT armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe @// order = 1
|
||||
B FFTEnd
|
||||
|
||||
orderGreaterthan1:
|
||||
CMP order,#2
|
||||
MOV argScale,scale
|
||||
BGT orderGreaterthan2
|
||||
SUBS argScale,argScale,#1
|
||||
BLGE armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// order =2
|
||||
BLLT armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
|
||||
SUBS argScale,argScale,#1
|
||||
BLEQ armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
|
||||
BLLT armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
orderGreaterthan2:@// order =3
|
||||
SUBS argScale,argScale,#1
|
||||
BLGE armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
|
||||
BLLT armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
|
||||
SUBS argScale,argScale,#1
|
||||
BLGE armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
|
||||
BLLT armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe
|
||||
SUBS argScale,argScale,#1
|
||||
BLEQ armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
|
||||
BLLT armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
|
||||
|
||||
orderGreaterthan3:
|
||||
@// check scale = 0 or scale = order
|
||||
SUBS diff, scale, order @// scale > order
|
||||
MOVGT scale,order
|
||||
BGE specialScaleCase @// scale = 0 or scale = order
|
||||
CMP scale,#0
|
||||
BEQ specialScaleCase
|
||||
B generalScaleCase
|
||||
|
||||
specialScaleCase:@// scale = 0 or scale = order and order >= 2
|
||||
|
||||
TST order, #2 @// Set input args to fft stages
|
||||
MOVEQ argDst,pDst
|
||||
MOVNE argDst,pOut
|
||||
MOVNE pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
CMP diff,#0
|
||||
M_STR diff, diffOnStack
|
||||
BGE scaleEqualsOrder
|
||||
|
||||
@//check for even or odd order
|
||||
@// NOTE: The following combination of BL's would work fine eventhough the first
|
||||
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
|
||||
@// armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
|
||||
|
||||
TST order,#0x00000001
|
||||
BLEQ armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
|
||||
BLNE armSP_FFTFwd_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
|
||||
|
||||
CMP subFFTNum,#4
|
||||
BLT FFTEnd
|
||||
|
||||
|
||||
unscaledRadix4Loop:
|
||||
BEQ lastStageUnscaledRadix4
|
||||
BL armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe
|
||||
CMP subFFTNum,#4
|
||||
B unscaledRadix4Loop
|
||||
|
||||
lastStageUnscaledRadix4:
|
||||
BL armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
|
||||
scaleEqualsOrder:
|
||||
@//check for even or odd order
|
||||
@// NOTE: The following combination of BL's would work fine eventhough the first
|
||||
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
|
||||
@// armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
|
||||
|
||||
TST order,#0x00000001
|
||||
BLEQ armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
|
||||
BLNE armSP_FFTFwd_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
|
||||
|
||||
CMP subFFTNum,#4
|
||||
BLT FFTEnd
|
||||
|
||||
|
||||
scaledRadix4Loop:
|
||||
BEQ lastStageScaledRadix4
|
||||
BL armSP_FFTFwd_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
|
||||
CMP subFFTNum,#4
|
||||
B scaledRadix4Loop
|
||||
|
||||
lastStageScaledRadix4:
|
||||
BL armSP_FFTFwd_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
generalScaleCase:@// 0 < scale < order and order >= 2
|
||||
@// Determine the correct destination buffer
|
||||
SUB diff,order,scale
|
||||
TST diff,#0x01
|
||||
ADDEQ count, scale,diff,lsr #1 @// count = scale + (order - scale)/2
|
||||
MOVNE count, order
|
||||
TST count, #0x01 @// Is count even or odd ?
|
||||
|
||||
MOVEQ argDst,pDst @// Set input args to fft stages
|
||||
MOVNE argDst,pOut
|
||||
MOVNE pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
M_STR diff, diffOnStack
|
||||
|
||||
MOV argScale,scale @// Put scale in RN4 so as to save and restore
|
||||
BL armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage
|
||||
SUBS argScale,argScale,#1
|
||||
|
||||
scaledRadix2Loop:
|
||||
BLGT armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
|
||||
SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages
|
||||
BGT scaledRadix2Loop
|
||||
|
||||
|
||||
M_LDR diff, diffOnStack
|
||||
@//check for even or odd order
|
||||
TST diff,#0x00000001
|
||||
BEQ generalUnscaledRadix4Loop
|
||||
B unscaledRadix2Loop
|
||||
|
||||
generalUnscaledRadix4Loop:
|
||||
CMP subFFTNum,#4
|
||||
BEQ generalLastStageUnscaledRadix4
|
||||
BL armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe
|
||||
B generalUnscaledRadix4Loop
|
||||
|
||||
generalLastStageUnscaledRadix4:
|
||||
BL armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
|
||||
B finalComplexToRealFixup
|
||||
|
||||
|
||||
unscaledRadix2Loop:
|
||||
CMP subFFTNum,#2
|
||||
BEQ generalLastStageUnscaledRadix2
|
||||
BL armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe
|
||||
B unscaledRadix2Loop
|
||||
|
||||
generalLastStageUnscaledRadix2:
|
||||
BL armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
|
||||
B finalComplexToRealFixup
|
||||
|
||||
|
||||
FFTEnd:@// Does only the scaling
|
||||
|
||||
M_LDR diff, diffOnStack
|
||||
CMP diff,#0
|
||||
BLE finalComplexToRealFixup
|
||||
|
||||
RSB diff,diff,#0 @// to use VRSHL for right shift by a variable
|
||||
VDUP dShift,diff
|
||||
|
||||
@// save subFFTSize and use tmpsubfftsize in the folowwing loop
|
||||
MOV subFFTSizeTmp,subFFTSize @// subFFTSizeTmp same reg as subFFTNum
|
||||
|
||||
scaleFFTData:@// N = subFFTSize ; dataptr = pDst ; scale = diff
|
||||
VLD1 {dX0},[pSrc] @// pSrc contains pDst pointer
|
||||
SUBS subFFTSizeTmp,subFFTSizeTmp,#1
|
||||
VRSHL dX0,dShift
|
||||
VST1 {dX0},[pSrc]!
|
||||
|
||||
BGT scaleFFTData
|
||||
|
||||
SUB pSrc,pSrc,subFFTSize,LSL #3 @// reset pSrc for final fixup
|
||||
|
||||
@// change the logic so that output after scaling is in pOut and not in pDst
|
||||
@// finally store from pOut to pDst
|
||||
@// change branch "End" to branch "finalComplexToRealFixup" in the above
|
||||
@// chk the code below for multiplication by j factor
|
||||
|
||||
finalComplexToRealFixup:
|
||||
|
||||
|
||||
@// F(0) = 1/2[Z(0) + Z'(0)] - j [Z(0) - Z'(0)]
|
||||
@// 1/2[(a+jb) + (a-jb)] - j [(a+jb) - (a-jb)]
|
||||
@// 1/2[2a+j0] - j [0+j2b]
|
||||
@// (a+b, 0)
|
||||
|
||||
@// F(N/2) = 1/2[Z(0) + Z'(0)] + j [Z(0) - Z'(0)]
|
||||
@// 1/2[(a+jb) + (a-jb)] + j [(a+jb) - (a-jb)]
|
||||
@// 1/2[2a+j0] + j [0+j2b]
|
||||
@// (a-b, 0)
|
||||
|
||||
@// F(0) and F(N/2)
|
||||
VLD2 {dX0r[0],dX0i[0]},[pSrc]!
|
||||
MOV zero,#0
|
||||
VMOV dX0r[1],zero
|
||||
MOV step,subFFTSize,LSL #3 @// step = N/2 * 8 bytes
|
||||
VMOV dX0i[1],zero
|
||||
SUB twStep,step,subFFTSize,LSL #1 @// twStep = 3N/8 * 8 bytes pointing to W^1
|
||||
|
||||
VADD dY0r,dX0r,dX0i @// F(0) = ((Z0.r+Z0.i) , 0)
|
||||
MOV step1,subFFTSize,LSL #2 @// step1 = N/2 * 4 bytes
|
||||
VSUB dY0i,dX0r,dX0i @// F(N/2) = ((Z0.r-Z0.i) , 0)
|
||||
SUBS subFFTSize,subFFTSize,#2
|
||||
|
||||
VST1 dY0r,[argDst],step
|
||||
ADD pTwiddleTmp,argTwiddle,#8 @// W^2
|
||||
VST1 dY0i,[argDst]!
|
||||
ADD argTwiddle,argTwiddle,twStep @// W^1
|
||||
|
||||
VDUP dzero,zero
|
||||
SUB argDst,argDst,step
|
||||
|
||||
BLT End
|
||||
BEQ lastElement
|
||||
SUB step,step,#24
|
||||
SUB step1,step1,#8 @// (N/4-1)*8 bytes
|
||||
|
||||
@// F(k) = 1/2[Z(k) + Z'(N/2-k)] -j*W^(k) [Z(k) - Z'(N/2-k)]
|
||||
@// Note: W^k is stored as negative values in the table
|
||||
@// Process 4 elements at a time. E.g: F(1),F(2) and F(N/2-2),F(N/2-1) since both of them
|
||||
@// require Z(1),Z(2) and Z(N/2-2),Z(N/2-1)
|
||||
|
||||
|
||||
evenOddButterflyLoop:
|
||||
|
||||
|
||||
VLD1 dW0r,[argTwiddle],step1
|
||||
VLD1 dW1r,[argTwiddle]!
|
||||
|
||||
VLD2 {dX0r,dX0i},[pSrc],step
|
||||
SUB argTwiddle,argTwiddle,step1
|
||||
VLD2 {dX1r,dX1i},[pSrc]!
|
||||
|
||||
|
||||
|
||||
SUB step1,step1,#8 @// (N/4-2)*8 bytes
|
||||
VLD1 dW0i,[pTwiddleTmp],step1
|
||||
VLD1 dW1i,[pTwiddleTmp]!
|
||||
SUB pSrc,pSrc,step
|
||||
|
||||
SUB pTwiddleTmp,pTwiddleTmp,step1
|
||||
VREV64 dX1r,dX1r
|
||||
VREV64 dX1i,dX1i
|
||||
SUBS subFFTSize,subFFTSize,#4
|
||||
|
||||
|
||||
|
||||
VSUB dT2,dX0r,dX1r @// a-c
|
||||
SUB step1,step1,#8
|
||||
VADD dT3,dX0i,dX1i @// b+d
|
||||
VADD dT0,dX0r,dX1r @// a+c
|
||||
VSUB dT1,dX0i,dX1i @// b-d
|
||||
VHADD dT0,dT0,dzero
|
||||
VHADD dT1,dT1,dzero
|
||||
|
||||
VZIP dW1r,dW1i
|
||||
vzip dW0r,dW0i
|
||||
|
||||
|
||||
VMULL qT0,dW1r,dT2
|
||||
VMLAL qT0,dW1i,dT3
|
||||
VMULL qT1,dW1r,dT3
|
||||
VMLSL qT1,dW1i,dT2
|
||||
|
||||
VMULL qT2,dW0r,dT2
|
||||
VMLSL qT2,dW0i,dT3
|
||||
VMULL qT3,dW0r,dT3
|
||||
VMLAL qT3,dW0i,dT2
|
||||
|
||||
|
||||
VRSHRN dX1r,qT0,#32
|
||||
VRSHRN dX1i,qT1,#32
|
||||
|
||||
VSUB dY1r,dT0,dX1i @// F(N/2 -1)
|
||||
VADD dY1i,dT1,dX1r
|
||||
VNEG dY1i,dY1i
|
||||
|
||||
VREV64 dY1r,dY1r
|
||||
VREV64 dY1i,dY1i
|
||||
|
||||
|
||||
VRSHRN dX0r,qT2,#32
|
||||
VRSHRN dX0i,qT3,#32
|
||||
|
||||
|
||||
VSUB dY0r,dT0,dX0i @// F(1)
|
||||
VADD dY0i,dT1,dX0r
|
||||
|
||||
|
||||
VST2 {dY0r,dY0i},[argDst],step
|
||||
VST2 {dY1r,dY1i},[argDst]!
|
||||
SUB argDst,argDst,step
|
||||
SUB step,step,#32 @// (N/2-4)*8 bytes
|
||||
|
||||
|
||||
BGT evenOddButterflyLoop
|
||||
|
||||
SUB pSrc,pSrc,#8 @// set both the ptrs to the last element
|
||||
SUB argDst,argDst,#8
|
||||
|
||||
|
||||
|
||||
@// Last element can be expanded as follows
|
||||
@// 1/2[Z(k) + Z'(k)] + j w^k [Z(k) - Z'(k)]
|
||||
@// 1/2[(a+jb) + (a-jb)] + j w^k [(a+jb) - (a-jb)]
|
||||
@// 1/2[2a+j0] + j (c+jd) [0+j2b]
|
||||
@// (a-bc, -bd)
|
||||
@// Since (c,d) = (0,1) for the last element, result is just (a,-b)
|
||||
|
||||
lastElement:
|
||||
VLD1 dX0r,[pSrc]
|
||||
|
||||
VST1 dX0r[0],[argDst]!
|
||||
VNEG dX0r,dX0r
|
||||
VST1 dX0r[1],[argDst]!
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
End:
|
||||
@// Set return value
|
||||
MOV result, #OMX_Sts_NoErr
|
||||
|
||||
@// Write function tail
|
||||
M_END
|
||||
|
||||
.end
|
||||
|
52
media/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_C_FC32.c
Normal file
52
media/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_C_FC32.c
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "dl/api/armOMX.h"
|
||||
#include "dl/api/omxtypes.h"
|
||||
#include "dl/sp/api/armSP.h"
|
||||
#include "dl/sp/api/omxSP.h"
|
||||
|
||||
|
||||
/**
|
||||
* Function: omxSP_FFTGetBufSize_C_FC32
|
||||
*
|
||||
* Description:
|
||||
* These functions compute the size of the specification structure
|
||||
* required for the length 2^order complex FFT and IFFT functions. The function
|
||||
* <FFTGetBufSize_C_FC32> is used in conjunction with the 32-bit functions
|
||||
* <FFTFwd_CToC_FC32_Sfs> and <FFTInv_CToC_FC32_Sfs>.
|
||||
*
|
||||
* Input Arguments:
|
||||
*
|
||||
* order - base-2 logarithm of the desired block length; valid in the range
|
||||
* [1,12] ([1,15] if BIG_FFT_TABLE is defined.)
|
||||
*
|
||||
* Output Arguments:
|
||||
*
|
||||
* pSize - pointer to the number of bytes required for the specification
|
||||
* structure
|
||||
*
|
||||
* Return Value:
|
||||
*
|
||||
* OMX_Sts_NoErr - no error
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
OMXResult omxSP_FFTGetBufSize_C_FC32(OMX_INT order, OMX_INT *pSize) {
|
||||
if (!pSize || (order < 1) || (order > TWIDDLE_TABLE_ORDER))
|
||||
return OMX_Sts_BadArgErr;
|
||||
/*
|
||||
* The required size is the same as for C_SC32, because the
|
||||
* elements are the same size and because ARMsFFTSpec_SC32 is
|
||||
* the same size as ARMsFFTSpec_FC32.
|
||||
*/
|
||||
return omxSP_FFTGetBufSize_C_SC32(order, pSize);
|
||||
}
|
96
media/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_C_SC16.c
Normal file
96
media/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_C_SC16.c
Normal file
@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*
|
||||
* This file was originally licensed as follows. It has been
|
||||
* relicensed with permission from the copyright holders.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* File Name: omxSP_FFTGetBufSize_C_SC16.c
|
||||
* OpenMAX DL: v1.0.2
|
||||
* Last Modified Revision: 9468
|
||||
* Last Modified Date: Thu, 03 Jan 2008
|
||||
*
|
||||
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
*
|
||||
*
|
||||
* Description:
|
||||
* Compute the size of the specification structure required
|
||||
*/
|
||||
|
||||
#include "dl/api/armOMX.h"
|
||||
#include "dl/api/omxtypes.h"
|
||||
#include "dl/sp/api/armSP.h"
|
||||
#include "dl/sp/api/omxSP.h"
|
||||
|
||||
|
||||
/**
|
||||
* Function: omxSP_FFTGetBufSize_C_SC16 (2.2.4.1.6)
|
||||
*
|
||||
* Description:
|
||||
* These functions compute the size of the specification structure
|
||||
* required for the length 2^order complex FFT and IFFT functions. The function
|
||||
* <FFTGetBufSize_C_SC16> is used in conjunction with the 16-bit functions
|
||||
* <FFTFwd_CToC_SC16_Sfs> and <FFTInv_CToC_SC16_Sfs>.
|
||||
*
|
||||
* Input Arguments:
|
||||
*
|
||||
* order - base-2 logarithm of the desired block length; valid in the range
|
||||
* [0,12]
|
||||
*
|
||||
* Output Arguments:
|
||||
*
|
||||
* pSize - pointer to the number of bytes required for the specification
|
||||
* structure
|
||||
*
|
||||
* Return Value:
|
||||
*
|
||||
* OMX_Sts_NoErr - no error
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
|
||||
OMXResult omxSP_FFTGetBufSize_C_SC16(
|
||||
OMX_INT order,
|
||||
OMX_INT *pSize)
|
||||
{
|
||||
|
||||
OMX_INT N,twiddleSize;
|
||||
|
||||
/* Check for order zero */
|
||||
if (order == 0)
|
||||
{
|
||||
*pSize = sizeof(ARMsFFTSpec_SC16);
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
||||
|
||||
|
||||
N = 1 << order;
|
||||
|
||||
/*The max size of the twiddle table needed is 3N/4 for a radix-4 stage*/
|
||||
twiddleSize = 3*N/4;
|
||||
|
||||
/* 2 pointers to store bitreversed array and twiddle factor array */
|
||||
*pSize = sizeof(ARMsFFTSpec_SC16)
|
||||
/* Twiddle factors */
|
||||
+ sizeof(OMX_SC16) * twiddleSize
|
||||
/* Ping Pong buffer */
|
||||
+ sizeof(OMX_SC16) * N
|
||||
+ 62 ; /* Extra bytes to get 32 byte alignment of ptwiddle and pBuf */
|
||||
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
* END OF FILE
|
||||
*****************************************************************************/
|
||||
|
94
media/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_C_SC32.c
Normal file
94
media/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_C_SC32.c
Normal file
@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*
|
||||
* This file was originally licensed as follows. It has been
|
||||
* relicensed with permission from the copyright holders.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* File Name: omxSP_FFTGetBufSize_C_SC32.c
|
||||
* OpenMAX DL: v1.0.2
|
||||
* Last Modified Revision: 9468
|
||||
* Last Modified Date: Thu, 03 Jan 2008
|
||||
*
|
||||
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
*
|
||||
*
|
||||
* Description:
|
||||
* Compute the size of the specification structure required
|
||||
*/
|
||||
|
||||
#include "dl/api/armOMX.h"
|
||||
#include "dl/api/omxtypes.h"
|
||||
#include "dl/sp/api/armSP.h"
|
||||
#include "dl/sp/api/omxSP.h"
|
||||
|
||||
|
||||
/**
|
||||
* Function: omxSP_FFTGetBufSize_C_SC32 (2.2.4.1.6)
|
||||
*
|
||||
* Description:
|
||||
* These functions compute the size of the specification structure
|
||||
* required for the length 2^order complex FFT and IFFT functions. The function
|
||||
* <FFTGetBufSize_C_SC32> is used in conjunction with the 32-bit functions
|
||||
* <FFTFwd_CToC_SC32_Sfs> and <FFTInv_CToC_SC32_Sfs>.
|
||||
*
|
||||
* Input Arguments:
|
||||
*
|
||||
* order - base-2 logarithm of the desired block length; valid in the range
|
||||
* [0,12]
|
||||
*
|
||||
* Output Arguments:
|
||||
*
|
||||
* pSize - pointer to the number of bytes required for the specification
|
||||
* structure
|
||||
*
|
||||
* Return Value:
|
||||
*
|
||||
* OMX_Sts_NoErr - no error
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
OMXResult omxSP_FFTGetBufSize_C_SC32(
|
||||
OMX_INT order,
|
||||
OMX_INT *pSize)
|
||||
{
|
||||
|
||||
OMX_INT N,twiddleSize;
|
||||
|
||||
/* Check for order zero */
|
||||
if (order == 0)
|
||||
{
|
||||
*pSize = sizeof(ARMsFFTSpec_SC32);
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
||||
|
||||
|
||||
N = 1 << order;
|
||||
|
||||
/*The max size of the twiddle table needed is 3N/4 for a radix-4 stage*/
|
||||
twiddleSize = 3*N/4;
|
||||
|
||||
*pSize = sizeof(ARMsFFTSpec_SC32)
|
||||
/* N Twiddle factors */
|
||||
+ sizeof(OMX_SC32) * twiddleSize
|
||||
/* Ping Pong buffer */
|
||||
+ sizeof(OMX_SC32) * N
|
||||
+ 62 ; /* Extra bytes to get 32 byte alignment of ptwiddle and pBuf */
|
||||
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
* END OF FILE
|
||||
*****************************************************************************/
|
||||
|
49
media/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_R_F32.c
Normal file
49
media/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_R_F32.c
Normal file
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "dl/api/armOMX.h"
|
||||
#include "dl/api/omxtypes.h"
|
||||
#include "dl/sp/api/armSP.h"
|
||||
#include "dl/sp/api/omxSP.h"
|
||||
|
||||
/**
|
||||
* Function: omxSP_FFTGetBufSize_R_F32
|
||||
*
|
||||
* Description:
|
||||
* Computes the size of the specification structure required for the length
|
||||
* 2^order real FFT and IFFT functions.
|
||||
*
|
||||
* Remarks:
|
||||
* This function is used in conjunction with the 32-bit functions
|
||||
* <FFTFwd_RToCCS_F32_Sfs> and <FFTInv_CCSToR_F32_Sfs>.
|
||||
*
|
||||
* Parameters:
|
||||
* [in] order base-2 logarithm of the length; valid in the range
|
||||
* [1,12]. ([1,15] if BIG_FFT_TABLE is defined.)
|
||||
* [out] pSize pointer to the number of bytes required for the
|
||||
* specification structure.
|
||||
*
|
||||
* Return Value:
|
||||
* Standard omxError result. See enumeration for possible result codes.
|
||||
*
|
||||
*/
|
||||
|
||||
OMXResult omxSP_FFTGetBufSize_R_F32(OMX_INT order, OMX_INT *pSize) {
|
||||
if (!pSize || (order < 1) || (order > TWIDDLE_TABLE_ORDER))
|
||||
return OMX_Sts_BadArgErr;
|
||||
|
||||
/*
|
||||
* The required size is the same as for R_S32, because the
|
||||
* elements are the same size and because ARMsFFTSpec_R_SC32 is
|
||||
* the same size as ARMsFFTSpec_R_FC32.
|
||||
*/
|
||||
return omxSP_FFTGetBufSize_R_S32(order, pSize);
|
||||
}
|
91
media/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_R_S16S32.c
Normal file
91
media/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_R_S16S32.c
Normal file
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*
|
||||
* This file was originally licensed as follows. It has been
|
||||
* relicensed with permission from the copyright holders.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* File Name: omxSP_FFTGetBufSize_R_S16S32.c
|
||||
* OpenMAX DL: v1.0.2
|
||||
* Last Modified Revision: 7777
|
||||
* Last Modified Date: Thu, 27 Sep 2007
|
||||
*
|
||||
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
*
|
||||
*
|
||||
* Description:
|
||||
* Computes the size of the specification structure required.
|
||||
*/
|
||||
|
||||
#include "dl/api/armOMX.h"
|
||||
#include "dl/api/omxtypes.h"
|
||||
#include "dl/sp/api/armSP.h"
|
||||
#include "dl/sp/api/omxSP.h"
|
||||
|
||||
/**
|
||||
* Function: omxSP_FFTGetBufSize_R_S16S32
|
||||
*
|
||||
* Description:
|
||||
* Computes the size of the specification structure required for the length
|
||||
* 2^order real FFT and IFFT functions.
|
||||
*
|
||||
* Remarks:
|
||||
* This function is used in conjunction with the 16-bit functions
|
||||
* <FFTFwd_RToCCS_S16_S32_Sfs> and <FFTInv_CCSToR_S32_S16_Sfs>.
|
||||
*
|
||||
* Parameters:
|
||||
* [in] order base-2 logarithm of the length; valid in the range
|
||||
* [0,12].
|
||||
* [out] pSize pointer to the number of bytes required for the
|
||||
* specification structure.
|
||||
*
|
||||
* Return Value:
|
||||
* Standard omxError result. See enumeration for possible result codes.
|
||||
*
|
||||
*/
|
||||
|
||||
OMXResult omxSP_FFTGetBufSize_R_S16S32(
|
||||
OMX_INT order,
|
||||
OMX_INT *pSize
|
||||
)
|
||||
{
|
||||
OMX_INT NBy2,N,twiddleSize;
|
||||
|
||||
|
||||
/* Check for order zero */
|
||||
if (order == 0)
|
||||
{
|
||||
*pSize = sizeof(ARMsFFTSpec_R_SC32)
|
||||
+ sizeof(OMX_S32) * (2); /* Extra size 'N' is used in FFTInv_CCSToR_S32S16_Sfs as a temporary buf */
|
||||
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
||||
|
||||
NBy2 = 1 << (order - 1);
|
||||
N = NBy2<<1;
|
||||
twiddleSize = 5*N/8; /* 3/4(N/2) + N/4 */
|
||||
|
||||
/* 2 pointers to store bitreversed array and twiddle factor array */
|
||||
*pSize = sizeof(ARMsFFTSpec_R_SC32)
|
||||
/* Twiddle factors */
|
||||
+ sizeof(OMX_SC32) * twiddleSize
|
||||
/* Ping Pong buffer for doing the N/2 point complex FFT */
|
||||
+ sizeof(OMX_S32) * (N<<1) /* Extra size 'N' is used in FFTInv_CCSToR_S32S16_Sfs as a temporary buf */
|
||||
+ 62 ; /* Extra bytes to get 32 byte alignment of ptwiddle and pBuf */
|
||||
|
||||
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
* END OF FILE
|
||||
*****************************************************************************/
|
||||
|
91
media/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_R_S32.c
Normal file
91
media/openmax_dl/dl/sp/src/omxSP_FFTGetBufSize_R_S32.c
Normal file
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*
|
||||
* This file was originally licensed as follows. It has been
|
||||
* relicensed with permission from the copyright holders.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* File Name: omxSP_FFTGetBufSize_R_S32.c
|
||||
* OpenMAX DL: v1.0.2
|
||||
* Last Modified Revision: 7777
|
||||
* Last Modified Date: Thu, 27 Sep 2007
|
||||
*
|
||||
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
*
|
||||
*
|
||||
* Description:
|
||||
* Computes the size of the specification structure required.
|
||||
*/
|
||||
|
||||
#include "dl/api/armOMX.h"
|
||||
#include "dl/api/omxtypes.h"
|
||||
#include "dl/sp/api/armSP.h"
|
||||
#include "dl/sp/api/omxSP.h"
|
||||
|
||||
/**
|
||||
* Function: omxSP_FFTGetBufSize_R_S32
|
||||
*
|
||||
* Description:
|
||||
* Computes the size of the specification structure required for the length
|
||||
* 2^order real FFT and IFFT functions.
|
||||
*
|
||||
* Remarks:
|
||||
* This function is used in conjunction with the 32-bit functions
|
||||
* <FFTFwd_RToCCS_S32_Sfs> and <FFTInv_CCSToR_S32_Sfs>.
|
||||
*
|
||||
* Parameters:
|
||||
* [in] order base-2 logarithm of the length; valid in the range
|
||||
* [0,12].
|
||||
* [out] pSize pointer to the number of bytes required for the
|
||||
* specification structure.
|
||||
*
|
||||
* Return Value:
|
||||
* Standard omxError result. See enumeration for possible result codes.
|
||||
*
|
||||
*/
|
||||
|
||||
OMXResult omxSP_FFTGetBufSize_R_S32(
|
||||
OMX_INT order,
|
||||
OMX_INT *pSize
|
||||
)
|
||||
{
|
||||
OMX_INT NBy2,N,twiddleSize;
|
||||
|
||||
|
||||
/* Check for order zero */
|
||||
if (order == 0)
|
||||
{
|
||||
*pSize = sizeof(ARMsFFTSpec_R_SC32)
|
||||
+ sizeof(OMX_S32) * (2); /* Extra size 'N' is used in FFTInv_CCSToR_S32S16_Sfs as a temporary buf */
|
||||
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
||||
|
||||
NBy2 = 1 << (order - 1);
|
||||
N = NBy2<<1;
|
||||
twiddleSize = 5*N/8; /* 3/4(N/2) + N/4 */
|
||||
|
||||
/* 2 pointers to store bitreversed array and twiddle factor array */
|
||||
*pSize = sizeof(ARMsFFTSpec_R_SC32)
|
||||
/* Twiddle factors */
|
||||
+ sizeof(OMX_SC32) * twiddleSize
|
||||
/* Ping Pong buffer for doing the N/2 point complex FFT */
|
||||
+ sizeof(OMX_S32) * (N<<1) /* Extra size 'N' is used in FFTInv_CCSToR_S32_Sfs as a temporary buf */
|
||||
+ 62 ; /* Extra bytes to get 32 byte alignment of ptwiddle and pBuf */
|
||||
|
||||
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
* END OF FILE
|
||||
*****************************************************************************/
|
||||
|
162
media/openmax_dl/dl/sp/src/omxSP_FFTInit_C_FC32.c
Normal file
162
media/openmax_dl/dl/sp/src/omxSP_FFTInit_C_FC32.c
Normal file
@ -0,0 +1,162 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*
|
||||
* This is a modification of omxSP_FFTInit_C_SC32.c to support
|
||||
* complex float instead of SC32.
|
||||
*/
|
||||
|
||||
#include "dl/api/armOMX.h"
|
||||
#include "dl/api/omxtypes.h"
|
||||
#include "dl/sp/api/armSP.h"
|
||||
#include "dl/sp/api/omxSP.h"
|
||||
|
||||
|
||||
/**
|
||||
* Function: omxSP_FFTInit_C_FC32
|
||||
*
|
||||
* Description:
|
||||
* Initializes the specification structures required for the
|
||||
* complex FFT and IFFT functions.
|
||||
*
|
||||
* Remarks:
|
||||
* Desired block length is specified as an input. The function is used to
|
||||
* initialize the specification structures for functions <FFTFwd_CToC_FC32_Sfs>
|
||||
* and <FFTInv_CToC_FC32_Sfs>. Memory for the specification structure *pFFTSpec
|
||||
* must be allocated prior to calling this function. The space required for
|
||||
* *pFFTSpec, in bytes, can be determined using <FFTGetBufSize_C_FC32>.
|
||||
*
|
||||
* Parameters:
|
||||
* [in] order base-2 logarithm of the desired block length;
|
||||
* valid in the range [1,12]. ([1,15] if
|
||||
* BIG_FFT_TABLE is defined.)
|
||||
* [out] pFFTSpec pointer to initialized specification structure.
|
||||
*
|
||||
* Return Value:
|
||||
* Standard omxError result. See enumeration for possible result codes.
|
||||
*
|
||||
*/
|
||||
|
||||
OMXResult omxSP_FFTInit_C_FC32(OMXFFTSpec_C_FC32* pFFTSpec, OMX_INT order) {
|
||||
OMX_INT i;
|
||||
OMX_INT j;
|
||||
OMX_FC32* pTwiddle;
|
||||
OMX_FC32* pBuf;
|
||||
OMX_U16* pBitRev;
|
||||
OMX_U32 pTmp;
|
||||
OMX_INT Nby2;
|
||||
OMX_INT N;
|
||||
OMX_INT M;
|
||||
OMX_INT diff;
|
||||
OMX_INT step;
|
||||
ARMsFFTSpec_FC32* pFFTStruct = 0;
|
||||
OMX_F32 x;
|
||||
OMX_F32 y;
|
||||
OMX_F32 xNeg;
|
||||
|
||||
pFFTStruct = (ARMsFFTSpec_FC32 *) pFFTSpec;
|
||||
|
||||
/* Validate args */
|
||||
if (!pFFTSpec || (order < 1) || (order > TWIDDLE_TABLE_ORDER))
|
||||
return OMX_Sts_BadArgErr;
|
||||
|
||||
/* Do the initializations */
|
||||
Nby2 = 1 << (order - 1);
|
||||
N = Nby2 << 1;
|
||||
M = N >> 3;
|
||||
|
||||
/* optimized implementations don't use bitreversal */
|
||||
pBitRev = NULL;
|
||||
|
||||
pTwiddle = (OMX_FC32 *) (sizeof(ARMsFFTSpec_FC32) + (OMX_S8*) pFFTSpec);
|
||||
|
||||
/* Align to 32 byte boundary */
|
||||
pTmp = ((OMX_U32) pTwiddle) & 31;
|
||||
if (pTmp)
|
||||
pTwiddle = (OMX_FC32*) ((OMX_S8*)pTwiddle + (32 - pTmp));
|
||||
|
||||
pBuf = (OMX_FC32*) (sizeof(OMX_FC32) * (3 * N / 4) + (OMX_S8*) pTwiddle);
|
||||
|
||||
/* Align to 32 byte boundary */
|
||||
pTmp = ((OMX_U32)pBuf) & 31;
|
||||
if (pTmp)
|
||||
pBuf = (OMX_FC32*) ((OMX_S8*)pBuf + (32 - pTmp));
|
||||
|
||||
/*
|
||||
* Filling Twiddle factors :
|
||||
*
|
||||
* The original twiddle table "armSP_FFT_S32TwiddleTable" is of size
|
||||
* (MaxSize/8 + 1) Rest of the values i.e., upto MaxSize are
|
||||
* calculated using the symmetries of sin and cos The max size of
|
||||
* the twiddle table needed is 3N/4 for a radix-4 stage
|
||||
*
|
||||
* W = (-2 * PI) / N
|
||||
* N = 1 << order
|
||||
* W = -PI >> (order - 1)
|
||||
*/
|
||||
|
||||
diff = TWIDDLE_TABLE_ORDER - order;
|
||||
/* step into the twiddle table for the current order */
|
||||
step = 1 << diff;
|
||||
|
||||
x = armSP_FFT_F32TwiddleTable[0];
|
||||
y = armSP_FFT_F32TwiddleTable[1];
|
||||
xNeg = 1;
|
||||
|
||||
if (order >= 3) {
|
||||
/* i = 0 case */
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
pTwiddle[2 * M].Re = -y;
|
||||
pTwiddle[2 * M].Im = xNeg;
|
||||
pTwiddle[4 * M].Re = xNeg;
|
||||
pTwiddle[4 * M].Im = y;
|
||||
|
||||
for (i = 1; i <= M; i++) {
|
||||
j = i * step;
|
||||
|
||||
x = armSP_FFT_F32TwiddleTable[2 * j];
|
||||
y = armSP_FFT_F32TwiddleTable[2 * j + 1];
|
||||
|
||||
pTwiddle[i].Re = x;
|
||||
pTwiddle[i].Im = y;
|
||||
pTwiddle[2 * M - i].Re = -y;
|
||||
pTwiddle[2 * M - i].Im = -x;
|
||||
pTwiddle[2 * M + i].Re = y;
|
||||
pTwiddle[2 * M + i].Im = -x;
|
||||
pTwiddle[4 * M - i].Re = -x;
|
||||
pTwiddle[4 * M - i].Im = y;
|
||||
pTwiddle[4 * M + i].Re = -x;
|
||||
pTwiddle[4 * M + i].Im = -y;
|
||||
pTwiddle[6 * M - i].Re = y;
|
||||
pTwiddle[6 * M - i].Im = x;
|
||||
}
|
||||
} else if (order == 2) {
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
pTwiddle[1].Re = -y;
|
||||
pTwiddle[1].Im = xNeg;
|
||||
pTwiddle[2].Re = xNeg;
|
||||
pTwiddle[2].Im = y;
|
||||
} else if (order == 1) {
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
}
|
||||
|
||||
/* Update the structure */
|
||||
pFFTStruct->N = N;
|
||||
pFFTStruct->pTwiddle = pTwiddle;
|
||||
pFFTStruct->pBitRev = pBitRev;
|
||||
pFFTStruct->pBuf = pBuf;
|
||||
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
* END OF FILE
|
||||
*****************************************************************************/
|
201
media/openmax_dl/dl/sp/src/omxSP_FFTInit_C_SC16.c
Normal file
201
media/openmax_dl/dl/sp/src/omxSP_FFTInit_C_SC16.c
Normal file
@ -0,0 +1,201 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*
|
||||
* This file was originally licensed as follows. It has been
|
||||
* relicensed with permission from the copyright holders.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* File Name: omxSP_FFTInit_C_SC16.c
|
||||
* OpenMAX DL: v1.0.2
|
||||
* Last Modified Revision: 15322
|
||||
* Last Modified Date: Wed, 15 Oct 2008
|
||||
*
|
||||
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
*
|
||||
*
|
||||
* Description:
|
||||
* Initializes the specification structures required
|
||||
*/
|
||||
|
||||
#include "dl/api/armOMX.h"
|
||||
#include "dl/api/omxtypes.h"
|
||||
#include "dl/sp/api/armSP.h"
|
||||
#include "dl/sp/api/omxSP.h"
|
||||
|
||||
|
||||
/**
|
||||
* Function: omxSP_FFTInit_C_SC16
|
||||
*
|
||||
* Description:
|
||||
* These functions initialize the specification structures required for the
|
||||
* complex FFT and IFFT functions.
|
||||
*
|
||||
* Remarks:
|
||||
* Desired block length is specified as an input. The function is used to
|
||||
* initialize the specification structures for functions <FFTFwd_CToC_SC16_Sfs>
|
||||
* and <FFTInv_CToC_SC16_Sfs>. Memory for the specification structure *pFFTSpec
|
||||
* must be allocated prior to calling this function. The space required for
|
||||
* *pFFTSpec, in bytes, can be determined using <FFTGetBufSize_C_SC16>.
|
||||
*
|
||||
* Parameters:
|
||||
* [in] order base-2 logarithm of the desired block length;
|
||||
* valid in the range [0,12].
|
||||
* [out] pFFTSpec pointer to initialized specification structure.
|
||||
*
|
||||
* Return Value:
|
||||
* Standard omxError result. See enumeration for possible result codes.
|
||||
*
|
||||
*/
|
||||
|
||||
OMXResult omxSP_FFTInit_C_SC16(
|
||||
OMXFFTSpec_C_SC16* pFFTSpec,
|
||||
OMX_INT order
|
||||
)
|
||||
{
|
||||
OMX_INT i,j;
|
||||
OMX_SC16 *pTwiddle, *pBuf;
|
||||
OMX_U16 *pBitRev;
|
||||
OMX_INT Nby2,N,M,diff,step;
|
||||
OMX_U32 pTmp;
|
||||
ARMsFFTSpec_SC16 *pFFTStruct = 0;
|
||||
OMX_S16 x,y,xNeg;
|
||||
OMX_S32 xS32,yS32;
|
||||
|
||||
|
||||
pFFTStruct = (ARMsFFTSpec_SC16 *) pFFTSpec;
|
||||
|
||||
/* if order zero no init is needed */
|
||||
if (order == 0)
|
||||
{
|
||||
pFFTStruct->N = 1;
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
||||
|
||||
/* Do the initializations */
|
||||
Nby2 = 1 << (order - 1);
|
||||
N = Nby2 << 1;
|
||||
M = N>>3;
|
||||
|
||||
pBitRev = NULL ;
|
||||
|
||||
pTwiddle = (OMX_SC16 *)
|
||||
(sizeof(ARMsFFTSpec_SC16) + (OMX_S8*) pFFTSpec);
|
||||
|
||||
/* Align to 32 byte boundary */
|
||||
pTmp = ((OMX_U32)pTwiddle)&31; /* (OMX_U32)pTwiddle % 32 */
|
||||
if(pTmp != 0)
|
||||
pTwiddle = (OMX_SC16*) ((OMX_S8*)pTwiddle + (32-pTmp));
|
||||
|
||||
pBuf = (OMX_SC16 *)
|
||||
(sizeof(OMX_SC16) * (3*N/4) + (OMX_S8*) pTwiddle);
|
||||
|
||||
/* Align to 32 byte boundary */
|
||||
pTmp = ((OMX_U32)pBuf)&31; /* (OMX_U32)pBuf % 32 */
|
||||
if(pTmp != 0)
|
||||
pBuf = (OMX_SC16*) ((OMX_S8*)pBuf + (32-pTmp));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Filling Twiddle factors :
|
||||
* The original twiddle table "armSP_FFT_S16TwiddleTable" is of size (MaxSize/8 + 1)
|
||||
* Rest of the values i.e., upto MaxSize are calculated using the symmetries of sin and cos
|
||||
* The max size of the twiddle table needed is 3N/4 for a radix-4 stage
|
||||
*
|
||||
* W = (-2 * PI) / N
|
||||
* N = 1 << order
|
||||
* W = -PI >> (order - 1)
|
||||
*/
|
||||
|
||||
|
||||
|
||||
diff = 12 - order;
|
||||
step = 1<<diff; /* step into the twiddle table for the current order */
|
||||
|
||||
xS32 = armSP_FFT_S32TwiddleTable[0];
|
||||
yS32 = armSP_FFT_S32TwiddleTable[1];
|
||||
x = (xS32+0x8000)>>16;
|
||||
y = (yS32+0x8000)>>16;
|
||||
|
||||
xNeg = 0x7FFF;
|
||||
|
||||
if(order >=3)
|
||||
{
|
||||
/* i = 0 case */
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
pTwiddle[2*M].Re = -y;
|
||||
pTwiddle[2*M].Im = xNeg;
|
||||
pTwiddle[4*M].Re = xNeg;
|
||||
pTwiddle[4*M].Im = y;
|
||||
|
||||
|
||||
for (i=1; i<=M; i++)
|
||||
{
|
||||
j = i*step;
|
||||
|
||||
xS32 = armSP_FFT_S32TwiddleTable[2*j];
|
||||
yS32 = armSP_FFT_S32TwiddleTable[2*j+1];
|
||||
x = (xS32+0x8000)>>16;
|
||||
y = (yS32+0x8000)>>16;
|
||||
|
||||
pTwiddle[i].Re = x;
|
||||
pTwiddle[i].Im = y;
|
||||
pTwiddle[2*M-i].Re = -y;
|
||||
pTwiddle[2*M-i].Im = -x;
|
||||
pTwiddle[2*M+i].Re = y;
|
||||
pTwiddle[2*M+i].Im = -x;
|
||||
pTwiddle[4*M-i].Re = -x;
|
||||
pTwiddle[4*M-i].Im = y;
|
||||
pTwiddle[4*M+i].Re = -x;
|
||||
pTwiddle[4*M+i].Im = -y;
|
||||
pTwiddle[6*M-i].Re = y;
|
||||
pTwiddle[6*M-i].Im = x;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
if (order == 2)
|
||||
{
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
pTwiddle[1].Re = -y;
|
||||
pTwiddle[1].Im = xNeg;
|
||||
pTwiddle[2].Re = xNeg;
|
||||
pTwiddle[2].Im = y;
|
||||
|
||||
}
|
||||
if (order == 1)
|
||||
{
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* Update the structure */
|
||||
pFFTStruct->N = N;
|
||||
pFFTStruct->pTwiddle = pTwiddle;
|
||||
pFFTStruct->pBitRev = pBitRev;
|
||||
pFFTStruct->pBuf = pBuf;
|
||||
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
* END OF FILE
|
||||
*****************************************************************************/
|
||||
|
196
media/openmax_dl/dl/sp/src/omxSP_FFTInit_C_SC32.c
Normal file
196
media/openmax_dl/dl/sp/src/omxSP_FFTInit_C_SC32.c
Normal file
@ -0,0 +1,196 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*
|
||||
* This file was originally licensed as follows. It has been
|
||||
* relicensed with permission from the copyright holders.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* File Name: omxSP_FFTInit_C_SC32.c
|
||||
* OpenMAX DL: v1.0.2
|
||||
* Last Modified Revision: 7769
|
||||
* Last Modified Date: Thu, 27 Sep 2007
|
||||
*
|
||||
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
*
|
||||
*
|
||||
* Description:
|
||||
* Initializes the specification structures required
|
||||
*/
|
||||
|
||||
#include "dl/api/armOMX.h"
|
||||
#include "dl/api/omxtypes.h"
|
||||
#include "dl/sp/api/armSP.h"
|
||||
#include "dl/sp/api/omxSP.h"
|
||||
|
||||
|
||||
/**
|
||||
* Function: omxSP_FFTInit_C_SC32
|
||||
*
|
||||
* Description:
|
||||
* Initializes the specification structures required for the
|
||||
* complex FFT and IFFT functions.
|
||||
*
|
||||
* Remarks:
|
||||
* Desired block length is specified as an input. The function is used to
|
||||
* initialize the specification structures for functions <FFTFwd_CToC_SC32_Sfs>
|
||||
* and <FFTInv_CToC_SC32_Sfs>. Memory for the specification structure *pFFTSpec
|
||||
* must be allocated prior to calling this function. The space required for
|
||||
* *pFFTSpec, in bytes, can be determined using <FFTGetBufSize_C_SC32>.
|
||||
*
|
||||
* Parameters:
|
||||
* [in] order base-2 logarithm of the desired block length;
|
||||
* valid in the range [0,12].
|
||||
* [out] pFFTSpec pointer to initialized specification structure.
|
||||
*
|
||||
* Return Value:
|
||||
* Standard omxError result. See enumeration for possible result codes.
|
||||
*
|
||||
*/
|
||||
|
||||
OMXResult omxSP_FFTInit_C_SC32(
|
||||
OMXFFTSpec_C_SC32* pFFTSpec,
|
||||
OMX_INT order
|
||||
)
|
||||
{
|
||||
OMX_INT i,j;
|
||||
OMX_SC32 *pTwiddle, *pBuf;
|
||||
OMX_U16 *pBitRev;
|
||||
OMX_U32 pTmp;
|
||||
OMX_INT Nby2,N,M,diff, step;
|
||||
ARMsFFTSpec_SC32 *pFFTStruct = 0;
|
||||
OMX_S32 x,y,xNeg;
|
||||
|
||||
pFFTStruct = (ARMsFFTSpec_SC32 *) pFFTSpec;
|
||||
|
||||
/* if order zero no init is needed */
|
||||
if (order == 0)
|
||||
{
|
||||
pFFTStruct->N = 1;
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
||||
|
||||
/* Do the initializations */
|
||||
Nby2 = 1 << (order - 1);
|
||||
N = Nby2 << 1;
|
||||
M = N>>3;
|
||||
|
||||
|
||||
pBitRev = NULL ; /* optimized implementations don't use bitreversal */
|
||||
|
||||
pTwiddle = (OMX_SC32 *)
|
||||
(sizeof(ARMsFFTSpec_SC32) + (OMX_S8*) pFFTSpec);
|
||||
|
||||
/* Align to 32 byte boundary */
|
||||
pTmp = ((OMX_U32)pTwiddle)&31; /* (OMX_U32)pTwiddle % 32 */
|
||||
if(pTmp != 0)
|
||||
pTwiddle = (OMX_SC32*) ((OMX_S8*)pTwiddle + (32-pTmp));
|
||||
|
||||
pBuf = (OMX_SC32*)
|
||||
(sizeof(OMX_SC32) * (3*N/4) + (OMX_S8*) pTwiddle);
|
||||
|
||||
/* Align to 32 byte boundary */
|
||||
pTmp = ((OMX_U32)pBuf)&31; /* (OMX_U32)pBuf % 32 */
|
||||
if(pTmp != 0)
|
||||
pBuf = (OMX_SC32*) ((OMX_S8*)pBuf + (32-pTmp));
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Filling Twiddle factors :
|
||||
* The original twiddle table "armSP_FFT_S32TwiddleTable" is of size (MaxSize/8 + 1)
|
||||
* Rest of the values i.e., upto MaxSize are calculated using the symmetries of sin and cos
|
||||
* The max size of the twiddle table needed is 3N/4 for a radix-4 stage
|
||||
*
|
||||
* W = (-2 * PI) / N
|
||||
* N = 1 << order
|
||||
* W = -PI >> (order - 1)
|
||||
*/
|
||||
|
||||
|
||||
diff = 12 - order;
|
||||
step = 1<<diff; /* step into the twiddle table for the current order */
|
||||
|
||||
x = armSP_FFT_S32TwiddleTable[0];
|
||||
y = armSP_FFT_S32TwiddleTable[1];
|
||||
xNeg = 0x7FFFFFFF;
|
||||
|
||||
if(order >=3)
|
||||
{
|
||||
/* i = 0 case */
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
pTwiddle[2*M].Re = -y;
|
||||
pTwiddle[2*M].Im = xNeg;
|
||||
pTwiddle[4*M].Re = xNeg;
|
||||
pTwiddle[4*M].Im = y;
|
||||
|
||||
|
||||
for (i=1; i<=M; i++)
|
||||
{
|
||||
j = i*step;
|
||||
|
||||
x = armSP_FFT_S32TwiddleTable[2*j];
|
||||
y = armSP_FFT_S32TwiddleTable[2*j+1];
|
||||
|
||||
pTwiddle[i].Re = x;
|
||||
pTwiddle[i].Im = y;
|
||||
pTwiddle[2*M-i].Re = -y;
|
||||
pTwiddle[2*M-i].Im = -x;
|
||||
pTwiddle[2*M+i].Re = y;
|
||||
pTwiddle[2*M+i].Im = -x;
|
||||
pTwiddle[4*M-i].Re = -x;
|
||||
pTwiddle[4*M-i].Im = y;
|
||||
pTwiddle[4*M+i].Re = -x;
|
||||
pTwiddle[4*M+i].Im = -y;
|
||||
pTwiddle[6*M-i].Re = y;
|
||||
pTwiddle[6*M-i].Im = x;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
if (order == 2)
|
||||
{
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
pTwiddle[1].Re = -y;
|
||||
pTwiddle[1].Im = xNeg;
|
||||
pTwiddle[2].Re = xNeg;
|
||||
pTwiddle[2].Im = y;
|
||||
|
||||
}
|
||||
if (order == 1)
|
||||
{
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Update the structure */
|
||||
pFFTStruct->N = N;
|
||||
pFFTStruct->pTwiddle = pTwiddle;
|
||||
pFFTStruct->pBitRev = pBitRev;
|
||||
pFFTStruct->pBuf = pBuf;
|
||||
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
* END OF FILE
|
||||
*****************************************************************************/
|
||||
|
210
media/openmax_dl/dl/sp/src/omxSP_FFTInit_R_F32.c
Normal file
210
media/openmax_dl/dl/sp/src/omxSP_FFTInit_R_F32.c
Normal file
@ -0,0 +1,210 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*
|
||||
* This is a modification of omxSP_FFTInit_R_S32.c to support float
|
||||
* instead of S32.
|
||||
*/
|
||||
|
||||
#include "dl/api/armOMX.h"
|
||||
#include "dl/api/omxtypes.h"
|
||||
#include "dl/sp/api/armSP.h"
|
||||
#include "dl/sp/api/omxSP.h"
|
||||
|
||||
/**
|
||||
* Function: omxSP_FFTInit_R_F32
|
||||
*
|
||||
* Description:
|
||||
* Initialize the real forward-FFT specification information struct.
|
||||
*
|
||||
* Remarks:
|
||||
* This function is used to initialize the specification structures
|
||||
* for functions <ippsFFTFwd_RToCCS_F32_Sfs> and
|
||||
* <ippsFFTInv_CCSToR_F32_Sfs>. Memory for *pFFTSpec must be
|
||||
* allocated prior to calling this function. The number of bytes
|
||||
* required for *pFFTSpec can be determined using
|
||||
* <FFTGetBufSize_R_F32>.
|
||||
*
|
||||
* Parameters:
|
||||
* [in] order base-2 logarithm of the desired block length;
|
||||
* valid in the range [1,12]. ([1,15] if
|
||||
* BIG_FFT_TABLE is defined.)
|
||||
* [out] pFFTFwdSpec pointer to the initialized specification structure.
|
||||
*
|
||||
* Return Value:
|
||||
* Standard omxError result. See enumeration for possible result codes.
|
||||
*
|
||||
*/
|
||||
OMXResult omxSP_FFTInit_R_F32(OMXFFTSpec_R_F32* pFFTSpec, OMX_INT order) {
|
||||
OMX_INT i;
|
||||
OMX_INT j;
|
||||
OMX_FC32* pTwiddle;
|
||||
OMX_FC32* pTwiddle1;
|
||||
OMX_FC32* pTwiddle2;
|
||||
OMX_FC32* pTwiddle3;
|
||||
OMX_FC32* pTwiddle4;
|
||||
OMX_F32* pBuf;
|
||||
OMX_U16* pBitRev;
|
||||
OMX_U32 pTmp;
|
||||
OMX_INT Nby2;
|
||||
OMX_INT N;
|
||||
OMX_INT M;
|
||||
OMX_INT diff;
|
||||
OMX_INT step;
|
||||
OMX_F32 x;
|
||||
OMX_F32 y;
|
||||
OMX_F32 xNeg;
|
||||
ARMsFFTSpec_R_FC32* pFFTStruct = 0;
|
||||
|
||||
pFFTStruct = (ARMsFFTSpec_R_FC32 *) pFFTSpec;
|
||||
|
||||
/* Validate args */
|
||||
if (!pFFTSpec || (order < 1) || (order > TWIDDLE_TABLE_ORDER))
|
||||
return OMX_Sts_BadArgErr;
|
||||
|
||||
/* Do the initializations */
|
||||
Nby2 = 1 << (order - 1);
|
||||
N = Nby2 << 1;
|
||||
|
||||
/* optimized implementations don't use bitreversal */
|
||||
pBitRev = NULL;
|
||||
|
||||
pTwiddle = (OMX_FC32 *) (sizeof(ARMsFFTSpec_R_SC32) + (OMX_S8*) pFFTSpec);
|
||||
|
||||
/* Align to 32 byte boundary */
|
||||
pTmp = ((OMX_U32)pTwiddle) & 31;
|
||||
if (pTmp)
|
||||
pTwiddle = (OMX_FC32*) ((OMX_S8*)pTwiddle + (32 - pTmp));
|
||||
|
||||
pBuf = (OMX_F32*) (sizeof(OMX_FC32)*(5*N/8) + (OMX_S8*) pTwiddle);
|
||||
|
||||
/* Align to 32 byte boundary */
|
||||
pTmp = ((OMX_U32)pBuf)&31; /* (OMX_U32)pBuf % 32 */
|
||||
if (pTmp)
|
||||
pBuf = (OMX_F32*) ((OMX_S8*)pBuf + (32 - pTmp));
|
||||
|
||||
/*
|
||||
* Filling Twiddle factors :
|
||||
*
|
||||
* exp^(-j*2*PI*k/ (N/2) ) ; k=0,1,2,...,3/4(N/2)
|
||||
*
|
||||
* N/2 point complex FFT is used to compute N point real FFT The
|
||||
* original twiddle table "armSP_FFT_F32TwiddleTable" is of size
|
||||
* (MaxSize/8 + 1) Rest of the values i.e., upto MaxSize are
|
||||
* calculated using the symmetries of sin and cos The max size of
|
||||
* the twiddle table needed is 3/4(N/2) for a radix-4 stage
|
||||
*
|
||||
* W = (-2 * PI) / N
|
||||
* N = 1 << order
|
||||
* W = -PI >> (order - 1)
|
||||
*/
|
||||
|
||||
M = Nby2 >> 3;
|
||||
diff = TWIDDLE_TABLE_ORDER - (order - 1);
|
||||
/* step into the twiddle table for the current order */
|
||||
step = 1 << diff;
|
||||
|
||||
x = armSP_FFT_F32TwiddleTable[0];
|
||||
y = armSP_FFT_F32TwiddleTable[1];
|
||||
xNeg = 1;
|
||||
|
||||
if ((order - 1) >= 3) {
|
||||
/* i = 0 case */
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
pTwiddle[2*M].Re = -y;
|
||||
pTwiddle[2*M].Im = xNeg;
|
||||
pTwiddle[4*M].Re = xNeg;
|
||||
pTwiddle[4*M].Im = y;
|
||||
|
||||
for (i = 1; i <= M; i++) {
|
||||
j = i*step;
|
||||
|
||||
x = armSP_FFT_F32TwiddleTable[2*j];
|
||||
y = armSP_FFT_F32TwiddleTable[2*j+1];
|
||||
|
||||
pTwiddle[i].Re = x;
|
||||
pTwiddle[i].Im = y;
|
||||
pTwiddle[2*M-i].Re = -y;
|
||||
pTwiddle[2*M-i].Im = -x;
|
||||
pTwiddle[2*M+i].Re = y;
|
||||
pTwiddle[2*M+i].Im = -x;
|
||||
pTwiddle[4*M-i].Re = -x;
|
||||
pTwiddle[4*M-i].Im = y;
|
||||
pTwiddle[4*M+i].Re = -x;
|
||||
pTwiddle[4*M+i].Im = -y;
|
||||
pTwiddle[6*M-i].Re = y;
|
||||
pTwiddle[6*M-i].Im = x;
|
||||
}
|
||||
} else if ((order - 1) == 2) {
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
pTwiddle[1].Re = -y;
|
||||
pTwiddle[1].Im = xNeg;
|
||||
pTwiddle[2].Re = xNeg;
|
||||
pTwiddle[2].Im = y;
|
||||
} else if ((order-1) == 1) {
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now fill the last N/4 values : exp^(-j*2*PI*k/N) ;
|
||||
* k=1,3,5,...,N/2-1 These are used for the final twiddle fix-up for
|
||||
* converting complex to real FFT
|
||||
*/
|
||||
|
||||
M = N >> 3;
|
||||
diff = TWIDDLE_TABLE_ORDER - order;
|
||||
step = 1 << diff;
|
||||
|
||||
pTwiddle1 = pTwiddle + 3*N/8;
|
||||
pTwiddle4 = pTwiddle1 + (N/4 - 1);
|
||||
pTwiddle3 = pTwiddle1 + N/8;
|
||||
pTwiddle2 = pTwiddle1 + (N/8 - 1);
|
||||
|
||||
x = armSP_FFT_F32TwiddleTable[0];
|
||||
y = armSP_FFT_F32TwiddleTable[1];
|
||||
xNeg = 1;
|
||||
|
||||
if (order >=3) {
|
||||
for (i = 1; i <= M; i += 2) {
|
||||
j = i*step;
|
||||
|
||||
x = armSP_FFT_F32TwiddleTable[2*j];
|
||||
y = armSP_FFT_F32TwiddleTable[2*j+1];
|
||||
|
||||
pTwiddle1[0].Re = x;
|
||||
pTwiddle1[0].Im = y;
|
||||
pTwiddle1 += 1;
|
||||
pTwiddle2[0].Re = -y;
|
||||
pTwiddle2[0].Im = -x;
|
||||
pTwiddle2 -= 1;
|
||||
pTwiddle3[0].Re = y;
|
||||
pTwiddle3[0].Im = -x;
|
||||
pTwiddle3 += 1;
|
||||
pTwiddle4[0].Re = -x;
|
||||
pTwiddle4[0].Im = y;
|
||||
pTwiddle4 -= 1;
|
||||
}
|
||||
} else {
|
||||
if (order == 2) {
|
||||
pTwiddle1[0].Re = -y;
|
||||
pTwiddle1[0].Im = xNeg;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Update the structure */
|
||||
pFFTStruct->N = N;
|
||||
pFFTStruct->pTwiddle = pTwiddle;
|
||||
pFFTStruct->pBitRev = pBitRev;
|
||||
pFFTStruct->pBuf = pBuf;
|
||||
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
263
media/openmax_dl/dl/sp/src/omxSP_FFTInit_R_S16S32.c
Normal file
263
media/openmax_dl/dl/sp/src/omxSP_FFTInit_R_S16S32.c
Normal file
@ -0,0 +1,263 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*
|
||||
* This file was originally licensed as follows. It has been
|
||||
* relicensed with permission from the copyright holders.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* File Name: omxSP_FFTInit_R_S16S32.c
|
||||
* OpenMAX DL: v1.0.2
|
||||
* Last Modified Revision: 7777
|
||||
* Last Modified Date: Thu, 27 Sep 2007
|
||||
*
|
||||
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
*
|
||||
*
|
||||
* Description:
|
||||
* Initialize the real forward-FFT specification information struct.
|
||||
*/
|
||||
|
||||
#include "dl/api/armOMX.h"
|
||||
#include "dl/api/omxtypes.h"
|
||||
#include "dl/sp/api/armSP.h"
|
||||
#include "dl/sp/api/omxSP.h"
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Function: omxSP_FFTInit_R_S16_S32
|
||||
*
|
||||
* Description:
|
||||
* Initialize the real forward-FFT specification information struct.
|
||||
*
|
||||
* Remarks:
|
||||
* This function is used to initialize the specification structures
|
||||
* for functions <ippsFFTFwd_RToCCS_S16_S32_Sfs> and
|
||||
* <ippsFFTInv_CCSToR_S32_S16_Sfs>. Memory for *pFFTSpec must be
|
||||
* allocated prior to calling this function. The number of bytes
|
||||
* required for *pFFTSpec can be determined using
|
||||
* <FFTGetBufSize_R_S16_S32>.
|
||||
*
|
||||
* Parameters:
|
||||
* [in] order base-2 logarithm of the desired block length;
|
||||
* valid in the range [0,12].
|
||||
* [out] pFFTFwdSpec pointer to the initialized specification structure.
|
||||
*
|
||||
* Return Value:
|
||||
* Standard omxError result. See enumeration for possible result codes.
|
||||
*
|
||||
*/
|
||||
|
||||
OMXResult omxSP_FFTInit_R_S16S32(
|
||||
OMXFFTSpec_R_S16S32* pFFTSpec,
|
||||
OMX_INT order
|
||||
)
|
||||
{
|
||||
OMX_INT i,j;
|
||||
OMX_SC32 *pTwiddle,*pTwiddle1,*pTwiddle2,*pTwiddle3,*pTwiddle4;
|
||||
OMX_S32 *pBuf;
|
||||
OMX_U16 *pBitRev;
|
||||
OMX_U32 pTmp;
|
||||
OMX_INT Nby2,N,M,diff, step;
|
||||
OMX_S32 x,y,xNeg;
|
||||
ARMsFFTSpec_R_SC32 *pFFTStruct = 0;
|
||||
|
||||
|
||||
pFFTStruct = (ARMsFFTSpec_R_SC32 *) pFFTSpec;
|
||||
|
||||
/* if order zero no init is needed */
|
||||
if (order == 0)
|
||||
{
|
||||
pFFTStruct->N = 1;
|
||||
pFFTStruct->pTwiddle = NULL;
|
||||
pFFTStruct->pBuf = (OMX_S32 *)
|
||||
(sizeof(ARMsFFTSpec_R_SC32) + (OMX_S8*) pFFTSpec);
|
||||
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
||||
|
||||
/* Do the initializations */
|
||||
Nby2 = 1 << (order - 1);
|
||||
N = Nby2 << 1;
|
||||
|
||||
|
||||
|
||||
pBitRev = NULL ; /* optimized implementations don't use bitreversal */
|
||||
|
||||
pTwiddle = (OMX_SC32 *)
|
||||
(sizeof(ARMsFFTSpec_R_SC32) + (OMX_S8*) pFFTSpec);
|
||||
|
||||
/* Align to 32 byte boundary */
|
||||
pTmp = ((OMX_U32)pTwiddle)&31; /* (OMX_U32)pTwiddle % 32 */
|
||||
if(pTmp != 0)
|
||||
pTwiddle = (OMX_SC32*) ((OMX_S8*)pTwiddle + (32-pTmp));
|
||||
|
||||
|
||||
pBuf = (OMX_S32*)
|
||||
(sizeof(OMX_SC32) * (5*N/8) + (OMX_S8*) pTwiddle);
|
||||
|
||||
/* Align to 32 byte boundary */
|
||||
pTmp = ((OMX_U32)pBuf)&31; /* (OMX_U32)pBuf % 32 */
|
||||
if(pTmp != 0)
|
||||
pBuf = (OMX_S32*) ((OMX_S8*)pBuf + (32-pTmp));
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Filling Twiddle factors : exp^(-j*2*PI*k/ (N/2) ) ; k=0,1,2,...,3/4(N/2)
|
||||
* N/2 point complex FFT is used to compute N point real FFT
|
||||
* The original twiddle table "armSP_FFT_S32TwiddleTable" is of size (MaxSize/8 + 1)
|
||||
* Rest of the values i.e., upto MaxSize are calculated using the symmetries of sin and cos
|
||||
* The max size of the twiddle table needed is 3/4(N/2) for a radix-4 stage
|
||||
*
|
||||
* W = (-2 * PI) / N
|
||||
* N = 1 << order
|
||||
* W = -PI >> (order - 1)
|
||||
*/
|
||||
|
||||
M = Nby2>>3;
|
||||
diff = 12 - (order-1);
|
||||
step = 1<<diff; /* step into the twiddle table for the current order */
|
||||
|
||||
x = armSP_FFT_S32TwiddleTable[0];
|
||||
y = armSP_FFT_S32TwiddleTable[1];
|
||||
xNeg = 0x7FFFFFFF;
|
||||
|
||||
if((order-1) >=3)
|
||||
{
|
||||
/* i = 0 case */
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
pTwiddle[2*M].Re = -y;
|
||||
pTwiddle[2*M].Im = xNeg;
|
||||
pTwiddle[4*M].Re = xNeg;
|
||||
pTwiddle[4*M].Im = y;
|
||||
|
||||
|
||||
for (i=1; i<=M; i++)
|
||||
{
|
||||
j = i*step;
|
||||
|
||||
x = armSP_FFT_S32TwiddleTable[2*j];
|
||||
y = armSP_FFT_S32TwiddleTable[2*j+1];
|
||||
|
||||
pTwiddle[i].Re = x;
|
||||
pTwiddle[i].Im = y;
|
||||
pTwiddle[2*M-i].Re = -y;
|
||||
pTwiddle[2*M-i].Im = -x;
|
||||
pTwiddle[2*M+i].Re = y;
|
||||
pTwiddle[2*M+i].Im = -x;
|
||||
pTwiddle[4*M-i].Re = -x;
|
||||
pTwiddle[4*M-i].Im = y;
|
||||
pTwiddle[4*M+i].Re = -x;
|
||||
pTwiddle[4*M+i].Im = -y;
|
||||
pTwiddle[6*M-i].Re = y;
|
||||
pTwiddle[6*M-i].Im = x;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((order-1) == 2)
|
||||
{
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
pTwiddle[1].Re = -y;
|
||||
pTwiddle[1].Im = xNeg;
|
||||
pTwiddle[2].Re = xNeg;
|
||||
pTwiddle[2].Im = y;
|
||||
|
||||
}
|
||||
if ((order-1) == 1)
|
||||
{
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Now fill the last N/4 values : exp^(-j*2*PI*k/N) ; k=1,3,5,...,N/2-1
|
||||
* These are used for the final twiddle fix-up for converting complex to real FFT
|
||||
*/
|
||||
|
||||
M = N>>3;
|
||||
diff = 12 - order;
|
||||
step = 1<<diff;
|
||||
|
||||
pTwiddle1 = pTwiddle + 3*N/8;
|
||||
pTwiddle4 = pTwiddle1 + (N/4-1);
|
||||
pTwiddle3 = pTwiddle1 + N/8;
|
||||
pTwiddle2 = pTwiddle1 + (N/8-1);
|
||||
|
||||
x = armSP_FFT_S32TwiddleTable[0];
|
||||
y = armSP_FFT_S32TwiddleTable[1];
|
||||
xNeg = 0x7FFFFFFF;
|
||||
|
||||
if((order) >=3)
|
||||
{
|
||||
|
||||
|
||||
for (i=1; i<=M; i+=2 )
|
||||
{
|
||||
j = i*step;
|
||||
|
||||
x = armSP_FFT_S32TwiddleTable[2*j];
|
||||
y = armSP_FFT_S32TwiddleTable[2*j+1];
|
||||
|
||||
pTwiddle1[0].Re = x;
|
||||
pTwiddle1[0].Im = y;
|
||||
pTwiddle1 += 1;
|
||||
pTwiddle2[0].Re = -y;
|
||||
pTwiddle2[0].Im = -x;
|
||||
pTwiddle2 -= 1;
|
||||
pTwiddle3[0].Re = y;
|
||||
pTwiddle3[0].Im = -x;
|
||||
pTwiddle3 += 1;
|
||||
pTwiddle4[0].Re = -x;
|
||||
pTwiddle4[0].Im = y;
|
||||
pTwiddle4 -= 1;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
if (order == 2)
|
||||
{
|
||||
|
||||
pTwiddle1[0].Re = -y;
|
||||
pTwiddle1[0].Im = xNeg;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* Update the structure */
|
||||
pFFTStruct->N = N;
|
||||
pFFTStruct->pTwiddle = pTwiddle;
|
||||
pFFTStruct->pBitRev = pBitRev;
|
||||
pFFTStruct->pBuf = pBuf;
|
||||
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
||||
/*****************************************************************************
|
||||
* END OF FILE
|
||||
*****************************************************************************/
|
||||
|
261
media/openmax_dl/dl/sp/src/omxSP_FFTInit_R_S32.c
Normal file
261
media/openmax_dl/dl/sp/src/omxSP_FFTInit_R_S32.c
Normal file
@ -0,0 +1,261 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*
|
||||
* This file was originally licensed as follows. It has been
|
||||
* relicensed with permission from the copyright holders.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* File Name: omxSP_FFTInit_R_S32.c
|
||||
* OpenMAX DL: v1.0.2
|
||||
* Last Modified Revision: 7777
|
||||
* Last Modified Date: Thu, 27 Sep 2007
|
||||
*
|
||||
* (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
*
|
||||
*
|
||||
* Description:
|
||||
* Initialize the real forward-FFT specification information struct.
|
||||
*/
|
||||
|
||||
#include "dl/api/armOMX.h"
|
||||
#include "dl/api/omxtypes.h"
|
||||
#include "dl/sp/api/armSP.h"
|
||||
#include "dl/sp/api/omxSP.h"
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Function: omxSP_FFTInit_R_S32
|
||||
*
|
||||
* Description:
|
||||
* Initialize the real forward-FFT specification information struct.
|
||||
*
|
||||
* Remarks:
|
||||
* This function is used to initialize the specification structures
|
||||
* for functions <ippsFFTFwd_RToCCS_S32_Sfs> and
|
||||
* <ippsFFTInv_CCSToR_S32_Sfs>. Memory for *pFFTSpec must be
|
||||
* allocated prior to calling this function. The number of bytes
|
||||
* required for *pFFTSpec can be determined using
|
||||
* <FFTGetBufSize_R_S32>.
|
||||
*
|
||||
* Parameters:
|
||||
* [in] order base-2 logarithm of the desired block length;
|
||||
* valid in the range [0,12].
|
||||
* [out] pFFTFwdSpec pointer to the initialized specification structure.
|
||||
*
|
||||
* Return Value:
|
||||
* Standard omxError result. See enumeration for possible result codes.
|
||||
*
|
||||
*/
|
||||
OMXResult omxSP_FFTInit_R_S32(
|
||||
OMXFFTSpec_R_S32* pFFTSpec,
|
||||
OMX_INT order
|
||||
)
|
||||
{
|
||||
OMX_INT i,j;
|
||||
OMX_SC32 *pTwiddle,*pTwiddle1,*pTwiddle2,*pTwiddle3,*pTwiddle4;
|
||||
OMX_S32 *pBuf;
|
||||
OMX_U16 *pBitRev;
|
||||
OMX_U32 pTmp;
|
||||
OMX_INT Nby2,N,M,diff, step;
|
||||
OMX_S32 x,y,xNeg;
|
||||
ARMsFFTSpec_R_SC32 *pFFTStruct = 0;
|
||||
|
||||
|
||||
pFFTStruct = (ARMsFFTSpec_R_SC32 *) pFFTSpec;
|
||||
|
||||
/* if order zero no init is needed */
|
||||
if (order == 0)
|
||||
{
|
||||
pFFTStruct->N = 1;
|
||||
pFFTStruct->pTwiddle = NULL;
|
||||
pFFTStruct->pBuf = (OMX_S32 *)
|
||||
(sizeof(ARMsFFTSpec_R_SC32) + (OMX_S8*) pFFTSpec);
|
||||
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
||||
|
||||
/* Do the initializations */
|
||||
Nby2 = 1 << (order - 1);
|
||||
N = Nby2 << 1;
|
||||
|
||||
|
||||
|
||||
pBitRev = NULL ; /* optimized implementations don't use bitreversal */
|
||||
|
||||
pTwiddle = (OMX_SC32 *)
|
||||
(sizeof(ARMsFFTSpec_R_SC32) + (OMX_S8*) pFFTSpec);
|
||||
|
||||
/* Align to 32 byte boundary */
|
||||
pTmp = ((OMX_U32)pTwiddle)&31; /* (OMX_U32)pTwiddle % 32 */
|
||||
if(pTmp != 0)
|
||||
pTwiddle = (OMX_SC32*) ((OMX_S8*)pTwiddle + (32-pTmp));
|
||||
|
||||
pBuf = (OMX_S32*)
|
||||
(sizeof(OMX_SC32) * (5*N/8) + (OMX_S8*) pTwiddle);
|
||||
|
||||
/* Align to 32 byte boundary */
|
||||
pTmp = ((OMX_U32)pBuf)&31; /* (OMX_U32)pBuf % 32 */
|
||||
if(pTmp != 0)
|
||||
pBuf = (OMX_S32*) ((OMX_S8*)pBuf + (32-pTmp));
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Filling Twiddle factors : exp^(-j*2*PI*k/ (N/2) ) ; k=0,1,2,...,3/4(N/2)
|
||||
* N/2 point complex FFT is used to compute N point real FFT
|
||||
* The original twiddle table "armSP_FFT_S32TwiddleTable" is of size (MaxSize/8 + 1)
|
||||
* Rest of the values i.e., upto MaxSize are calculated using the symmetries of sin and cos
|
||||
* The max size of the twiddle table needed is 3/4(N/2) for a radix-4 stage
|
||||
*
|
||||
* W = (-2 * PI) / N
|
||||
* N = 1 << order
|
||||
* W = -PI >> (order - 1)
|
||||
*/
|
||||
|
||||
M = Nby2>>3;
|
||||
diff = 12 - (order-1);
|
||||
step = 1<<diff; /* step into the twiddle table for the current order */
|
||||
|
||||
x = armSP_FFT_S32TwiddleTable[0];
|
||||
y = armSP_FFT_S32TwiddleTable[1];
|
||||
xNeg = 0x7FFFFFFF;
|
||||
|
||||
if((order-1) >=3)
|
||||
{
|
||||
/* i = 0 case */
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
pTwiddle[2*M].Re = -y;
|
||||
pTwiddle[2*M].Im = xNeg;
|
||||
pTwiddle[4*M].Re = xNeg;
|
||||
pTwiddle[4*M].Im = y;
|
||||
|
||||
|
||||
for (i=1; i<=M; i++)
|
||||
{
|
||||
j = i*step;
|
||||
|
||||
x = armSP_FFT_S32TwiddleTable[2*j];
|
||||
y = armSP_FFT_S32TwiddleTable[2*j+1];
|
||||
|
||||
pTwiddle[i].Re = x;
|
||||
pTwiddle[i].Im = y;
|
||||
pTwiddle[2*M-i].Re = -y;
|
||||
pTwiddle[2*M-i].Im = -x;
|
||||
pTwiddle[2*M+i].Re = y;
|
||||
pTwiddle[2*M+i].Im = -x;
|
||||
pTwiddle[4*M-i].Re = -x;
|
||||
pTwiddle[4*M-i].Im = y;
|
||||
pTwiddle[4*M+i].Re = -x;
|
||||
pTwiddle[4*M+i].Im = -y;
|
||||
pTwiddle[6*M-i].Re = y;
|
||||
pTwiddle[6*M-i].Im = x;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((order-1) == 2)
|
||||
{
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
pTwiddle[1].Re = -y;
|
||||
pTwiddle[1].Im = xNeg;
|
||||
pTwiddle[2].Re = xNeg;
|
||||
pTwiddle[2].Im = y;
|
||||
|
||||
}
|
||||
if ((order-1) == 1)
|
||||
{
|
||||
pTwiddle[0].Re = x;
|
||||
pTwiddle[0].Im = y;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Now fill the last N/4 values : exp^(-j*2*PI*k/N) ; k=1,3,5,...,N/2-1
|
||||
* These are used for the final twiddle fix-up for converting complex to real FFT
|
||||
*/
|
||||
|
||||
M = N>>3;
|
||||
diff = 12 - order;
|
||||
step = 1<<diff;
|
||||
|
||||
pTwiddle1 = pTwiddle + 3*N/8;
|
||||
pTwiddle4 = pTwiddle1 + (N/4-1);
|
||||
pTwiddle3 = pTwiddle1 + N/8;
|
||||
pTwiddle2 = pTwiddle1 + (N/8-1);
|
||||
|
||||
x = armSP_FFT_S32TwiddleTable[0];
|
||||
y = armSP_FFT_S32TwiddleTable[1];
|
||||
xNeg = 0x7FFFFFFF;
|
||||
|
||||
if((order) >=3)
|
||||
{
|
||||
|
||||
|
||||
for (i=1; i<=M; i+=2 )
|
||||
{
|
||||
j = i*step;
|
||||
|
||||
x = armSP_FFT_S32TwiddleTable[2*j];
|
||||
y = armSP_FFT_S32TwiddleTable[2*j+1];
|
||||
|
||||
pTwiddle1[0].Re = x;
|
||||
pTwiddle1[0].Im = y;
|
||||
pTwiddle1 += 1;
|
||||
pTwiddle2[0].Re = -y;
|
||||
pTwiddle2[0].Im = -x;
|
||||
pTwiddle2 -= 1;
|
||||
pTwiddle3[0].Re = y;
|
||||
pTwiddle3[0].Im = -x;
|
||||
pTwiddle3 += 1;
|
||||
pTwiddle4[0].Re = -x;
|
||||
pTwiddle4[0].Im = y;
|
||||
pTwiddle4 -= 1;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
if (order == 2)
|
||||
{
|
||||
|
||||
pTwiddle1[0].Re = -y;
|
||||
pTwiddle1[0].Im = xNeg;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* Update the structure */
|
||||
pFFTStruct->N = N;
|
||||
pFFTStruct->pTwiddle = pTwiddle;
|
||||
pFFTStruct->pBitRev = pBitRev;
|
||||
pFFTStruct->pBuf = pBuf;
|
||||
|
||||
return OMX_Sts_NoErr;
|
||||
}
|
||||
/*****************************************************************************
|
||||
* END OF FILE
|
||||
*****************************************************************************/
|
||||
|
283
media/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_s.S
Normal file
283
media/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_F32_Sfs_s.S
Normal file
@ -0,0 +1,283 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This is a modification of omxSP_FFTInv_CCSToR_S32_Sfs_s.s
|
||||
@// to support float instead of SC32.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute an inverse FFT for a complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r1
|
||||
#define pFFTSpec r2
|
||||
#define scale r3
|
||||
|
||||
|
||||
@// Output registers
|
||||
#define result r0
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define argTwiddle r1
|
||||
#define argDst r2
|
||||
#define argScale r4
|
||||
#define tmpOrder r4
|
||||
#define pTwiddle r4
|
||||
#define pOut r5
|
||||
#define subFFTSize r7
|
||||
#define subFFTNum r6
|
||||
#define N r6
|
||||
#define order r14
|
||||
#define diff r9
|
||||
@// Total num of radix stages required to comple the FFT
|
||||
#define count r8
|
||||
#define x0r r4
|
||||
#define x0i r5
|
||||
#define diffMinusOne r2
|
||||
#define round r3
|
||||
|
||||
#define pOut1 r2
|
||||
#define size r7
|
||||
#define step r8
|
||||
#define step1 r9
|
||||
#define twStep r10
|
||||
#define pTwiddleTmp r11
|
||||
#define argTwiddle1 r12
|
||||
#define zero r14
|
||||
|
||||
@// Neon registers
|
||||
|
||||
#define dX0 D0.F32
|
||||
#define dShift D1.F32
|
||||
#define dX1 D1.F32
|
||||
#define dY0 D2.F32
|
||||
#define dY1 D3.F32
|
||||
#define dX0r D0.F32
|
||||
#define dX0i D1.F32
|
||||
#define dX1r D2.F32
|
||||
#define dX1i D3.F32
|
||||
#define dW0r D4.F32
|
||||
#define dW0i D5.F32
|
||||
#define dW1r D6.F32
|
||||
#define dW1i D7.F32
|
||||
#define dT0 D8.F32
|
||||
#define dT1 D9.F32
|
||||
#define dT2 D10.F32
|
||||
#define dT3 D11.F32
|
||||
#define qT0 d12.F32
|
||||
#define qT1 d14.F32
|
||||
#define qT2 d16.F32
|
||||
#define qT3 d18.F32
|
||||
#define dY0r D4.F32
|
||||
#define dY0i D5.F32
|
||||
#define dY1r D6.F32
|
||||
#define dY1i D7.F32
|
||||
#define dzero D20.F32
|
||||
|
||||
#define dY2 D4.F32
|
||||
#define dY3 D5.F32
|
||||
#define dW0 D6.F32
|
||||
#define dW1 D7.F32
|
||||
#define dW0Tmp D10.F32
|
||||
#define dW1Neg D11.F32
|
||||
|
||||
#define sN S0.S32
|
||||
#define fN S1.F32
|
||||
@// one must be the same as dScale[0]!
|
||||
#define dScale D2.F32
|
||||
#define one S4.F32
|
||||
|
||||
|
||||
@// Allocate stack memory required by the function
|
||||
M_ALLOC4 complexFFTSize, 4
|
||||
|
||||
@// Write function header
|
||||
M_START omxSP_FFTInv_CCSToR_F32_Sfs,r11,d15
|
||||
|
||||
@ Structure offsets for the FFTSpec
|
||||
.set ARMsFFTSpec_N, 0
|
||||
.set ARMsFFTSpec_pBitRev, 4
|
||||
.set ARMsFFTSpec_pTwiddle, 8
|
||||
.set ARMsFFTSpec_pBuf, 12
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// Read the size from structure and take log
|
||||
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
|
||||
|
||||
@// Read other structure parameters
|
||||
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
|
||||
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
|
||||
|
||||
@// N=1 Treat seperately
|
||||
CMP N,#1
|
||||
BGT sizeGreaterThanOne
|
||||
VLD1 dX0[0],[pSrc]
|
||||
VST1 dX0[0],[pDst]
|
||||
|
||||
B End
|
||||
|
||||
sizeGreaterThanOne:
|
||||
|
||||
@// Call the preTwiddle Radix2 stage before doing the compledIFFT
|
||||
|
||||
|
||||
BL armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe
|
||||
|
||||
|
||||
complexIFFT:
|
||||
|
||||
ASR N,N,#1 @// N/2 point complex IFFT
|
||||
M_STR N, complexFFTSize @ Save N for scaling later
|
||||
ADD pSrc,pOut,N,LSL #3 @// set pSrc as pOut1
|
||||
|
||||
CLZ order,N @// N = 2^order
|
||||
RSB order,order,#31
|
||||
MOV subFFTSize,#1
|
||||
@//MOV subFFTNum,N
|
||||
|
||||
CMP order,#3
|
||||
BGT orderGreaterthan3 @// order > 3
|
||||
|
||||
CMP order,#1
|
||||
BGE orderGreaterthan0 @// order > 0
|
||||
|
||||
VLD1 dX0,[pSrc]
|
||||
VST1 dX0,[pDst]
|
||||
MOV pSrc,pDst
|
||||
BLT FFTEnd
|
||||
|
||||
orderGreaterthan0:
|
||||
@// set the buffers appropriately for various orders
|
||||
CMP order,#2
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
@// Pass the first stage destination in RN5
|
||||
MOVEQ pOut,pDst
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
BGE orderGreaterthan1
|
||||
BLLT armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe @// order = 1
|
||||
B FFTEnd
|
||||
|
||||
orderGreaterthan1:
|
||||
MOV tmpOrder,order @// tmpOrder = RN 4
|
||||
BL armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
|
||||
CMP tmpOrder,#2
|
||||
BLGT armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe
|
||||
BL armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
|
||||
orderGreaterthan3:
|
||||
specialScaleCase:
|
||||
|
||||
@// Set input args to fft stages
|
||||
TST order, #2
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
@// Pass the first stage destination in RN5
|
||||
MOVEQ pOut,pDst
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
@//check for even or odd order
|
||||
@// NOTE: The following combination of BL's would work fine even though
|
||||
@// the first BL would corrupt the flags. This is because the end of
|
||||
@// the "grpZeroSetLoop" loop inside
|
||||
@// armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag
|
||||
@// to EQ
|
||||
|
||||
TST order,#0x00000001
|
||||
BLEQ armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
|
||||
BLNE armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
|
||||
|
||||
CMP subFFTNum,#4
|
||||
BLT FFTEnd
|
||||
|
||||
|
||||
unscaledRadix4Loop:
|
||||
BEQ lastStageUnscaledRadix4
|
||||
BL armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe
|
||||
CMP subFFTNum,#4
|
||||
B unscaledRadix4Loop
|
||||
|
||||
lastStageUnscaledRadix4:
|
||||
BL armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
FFTEnd: @// Does only the scaling
|
||||
@ Scale inverse FFT result by 1/N
|
||||
|
||||
M_LDR N, complexFFTSize
|
||||
VMOV sN,N
|
||||
VCVT fN, sN @ fn = fftSize, as a float
|
||||
VMOV one, 1.0
|
||||
VDIV one, one, fN @ one = dScale[0] = 1 / fftSize
|
||||
|
||||
|
||||
@// N = subFFTSize ; dataptr = pDst
|
||||
scaleFFTData:
|
||||
VLD1 {dX0},[pSrc] @// pSrc contains pDst pointer
|
||||
SUBS subFFTSize,subFFTSize,#1
|
||||
VMUL dX0, dX0, dScale[0]
|
||||
VST1 {dX0},[pSrc]!
|
||||
|
||||
BGT scaleFFTData
|
||||
|
||||
End:
|
||||
@// Set return value
|
||||
MOV result, #OMX_Sts_NoErr
|
||||
|
||||
@// Write function tail
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
.end
|
146
media/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_S32S16_Sfs_s.S
Normal file
146
media/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_S32S16_Sfs_s.S
Normal file
@ -0,0 +1,146 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// File Name: omxSP_FFTInv_CCSToR_S32S16_Sfs_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 7098
|
||||
@// Last Modified Date: Thu, 16 Aug 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute an inverse FFT for a complex signal
|
||||
@//
|
||||
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
.extern omxSP_FFTInv_CCSToR_S32_Sfs
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r1
|
||||
#define pFFTSpec r2
|
||||
#define scale r3
|
||||
|
||||
|
||||
@// Output registers
|
||||
#define result r0
|
||||
|
||||
|
||||
#define N r6
|
||||
#define pOut r5
|
||||
#define pTmpDst r4
|
||||
|
||||
|
||||
@// Neon registers
|
||||
|
||||
#define dX0 D0.S32
|
||||
#define dX01 D1.S32
|
||||
#define qX0 Q0.S32
|
||||
#define dY0 D2.S16
|
||||
#define dY0S32 D2.S32
|
||||
|
||||
|
||||
|
||||
@// Allocate stack memory required by the function
|
||||
|
||||
@// Write function header
|
||||
M_START omxSP_FFTInv_CCSToR_S32S16_Sfs,r11,d15
|
||||
|
||||
.set ARMsFFTSpec_N, 0
|
||||
.set ARMsFFTSpec_pBitRev, 4
|
||||
.set ARMsFFTSpec_pTwiddle, 8
|
||||
.set ARMsFFTSpec_pBuf, 12
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// Read the size from structure and take log
|
||||
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
|
||||
|
||||
@// Read other structure parameters
|
||||
@//LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
|
||||
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
|
||||
|
||||
|
||||
MOV pTmpDst,pDst
|
||||
ADD pDst,pOut,N, LSL #2
|
||||
|
||||
|
||||
BL omxSP_FFTInv_CCSToR_S32_Sfs
|
||||
|
||||
ADD pDst,pOut,N, LSL #2
|
||||
|
||||
CMP N,#2
|
||||
BGT copyLoop
|
||||
BEQ copyS32ToS16
|
||||
VLD1 dX0[0],[pDst]
|
||||
VQMOVN dY0,qX0
|
||||
VST1 dY0[0],[pTmpDst]
|
||||
|
||||
B End
|
||||
|
||||
copyS32ToS16:
|
||||
|
||||
VLD1 dX0,[pDst]
|
||||
VQMOVN dY0,qX0
|
||||
VST1 dY0S32[0],[pTmpDst]
|
||||
B End
|
||||
|
||||
copyLoop:
|
||||
|
||||
VLD1 {dX0,dX01},[pDst]!
|
||||
SUBS N,N,#4
|
||||
VQMOVN dY0,qX0
|
||||
VST1 dY0,[pTmpDst]!
|
||||
|
||||
BGT copyLoop
|
||||
|
||||
|
||||
End:
|
||||
@// Set return value
|
||||
MOV result, #OMX_Sts_NoErr
|
||||
|
||||
@// Write function tail
|
||||
M_END
|
||||
|
||||
.end
|
390
media/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_S32_Sfs_s.S
Normal file
390
media/openmax_dl/dl/sp/src/omxSP_FFTInv_CCSToR_S32_Sfs_s.S
Normal file
@ -0,0 +1,390 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// File Name: omxSP_FFTInv_CCSToR_S32_Sfs_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 7469
|
||||
@// Last Modified Date: Thu, 20 Sep 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute an inverse FFT for a complex signal
|
||||
@//
|
||||
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CCSToR_S32_Sfs_preTwiddleRadix2_unsafe
|
||||
.extern armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe
|
||||
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
.extern armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r1
|
||||
#define pFFTSpec r2
|
||||
#define scale r3
|
||||
|
||||
|
||||
@// Output registers
|
||||
#define result r0
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define argTwiddle r1
|
||||
#define argDst r2
|
||||
#define argScale r4
|
||||
#define tmpOrder r4
|
||||
#define pTwiddle r4
|
||||
#define pOut r5
|
||||
#define subFFTSize r7
|
||||
#define subFFTNum r6
|
||||
#define N r6
|
||||
#define order r14
|
||||
#define diff r9
|
||||
@// Total num of radix stages required to comple the FFT
|
||||
#define count r8
|
||||
#define x0r r4
|
||||
#define x0i r5
|
||||
#define diffMinusOne r2
|
||||
#define round r3
|
||||
|
||||
#define pOut1 r2
|
||||
#define size r7
|
||||
#define step r8
|
||||
#define step1 r9
|
||||
#define twStep r10
|
||||
#define pTwiddleTmp r11
|
||||
#define argTwiddle1 r12
|
||||
#define zero r14
|
||||
|
||||
@// Neon registers
|
||||
|
||||
#define dX0 D0.S32
|
||||
#define dShift D1.S32
|
||||
#define dX1 D1.S32
|
||||
#define dY0 D2.S32
|
||||
#define dY1 D3.S32
|
||||
#define dX0r D0.S32
|
||||
#define dX0i D1.S32
|
||||
#define dX1r D2.S32
|
||||
#define dX1i D3.S32
|
||||
#define dW0r D4.S32
|
||||
#define dW0i D5.S32
|
||||
#define dW1r D6.S32
|
||||
#define dW1i D7.S32
|
||||
#define dT0 D8.S32
|
||||
#define dT1 D9.S32
|
||||
#define dT2 D10.S32
|
||||
#define dT3 D11.S32
|
||||
#define qT0 Q6.S64
|
||||
#define qT1 Q7.S64
|
||||
#define qT2 Q8.S64
|
||||
#define qT3 Q9.S64
|
||||
#define dY0r D4.S32
|
||||
#define dY0i D5.S32
|
||||
#define dY1r D6.S32
|
||||
#define dY1i D7.S32
|
||||
#define dzero D20.S32
|
||||
|
||||
#define dY2 D4.S32
|
||||
#define dY3 D5.S32
|
||||
#define dW0 D6.S32
|
||||
#define dW1 D7.S32
|
||||
#define dW0Tmp D10.S32
|
||||
#define dW1Neg D11.S32
|
||||
|
||||
|
||||
|
||||
@// Allocate stack memory required by the function
|
||||
M_ALLOC4 diffOnStack, 4
|
||||
|
||||
@// Write function header
|
||||
M_START omxSP_FFTInv_CCSToR_S32_Sfs,r11,d15
|
||||
|
||||
@ Structure offsets for the FFTSpec
|
||||
.set ARMsFFTSpec_N, 0
|
||||
.set ARMsFFTSpec_pBitRev, 4
|
||||
.set ARMsFFTSpec_pTwiddle, 8
|
||||
.set ARMsFFTSpec_pBuf, 12
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// Read the size from structure and take log
|
||||
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
|
||||
|
||||
@// Read other structure parameters
|
||||
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
|
||||
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
|
||||
|
||||
@// N=1 Treat seperately
|
||||
CMP N,#1
|
||||
BGT sizeGreaterThanOne
|
||||
VLD1 dX0[0],[pSrc]
|
||||
RSB scale,scale,#0 @// to use VRSHL for right shift by a variable
|
||||
VMOV dShift[0],scale
|
||||
VRSHL dX0,dShift
|
||||
VST1 dX0[0],[pDst]
|
||||
|
||||
B End
|
||||
|
||||
sizeGreaterThanOne:
|
||||
|
||||
@// Call the preTwiddle Radix2 stage before doing the compledIFFT
|
||||
|
||||
@// The following conditional BL combination would work since
|
||||
@// evenOddButterflyLoop in the first call would set Z flag to zero
|
||||
|
||||
CMP scale,#0
|
||||
BLEQ armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe
|
||||
BLGT armSP_FFTInv_CCSToR_S32_Sfs_preTwiddleRadix2_unsafe
|
||||
|
||||
|
||||
|
||||
complexIFFT:
|
||||
|
||||
ASR N,N,#1 @// N/2 point complex IFFT
|
||||
ADD pSrc,pOut,N,LSL #3 @// set pSrc as pOut1
|
||||
|
||||
CLZ order,N @// N = 2^order
|
||||
RSB order,order,#31
|
||||
MOV subFFTSize,#1
|
||||
@//MOV subFFTNum,N
|
||||
|
||||
ADD scale,scale,order @// FFTInverse has a final scaling factor by N
|
||||
|
||||
CMP order,#3
|
||||
BGT orderGreaterthan3 @// order > 3
|
||||
|
||||
CMP order,#1
|
||||
BGE orderGreaterthan0 @// order > 0
|
||||
M_STR scale, diffOnStack,LT @// order = 0
|
||||
VLD1 dX0,[pSrc]
|
||||
VST1 dX0,[pDst]
|
||||
MOV pSrc,pDst
|
||||
BLT FFTEnd
|
||||
|
||||
orderGreaterthan0:
|
||||
@// set the buffers appropriately for various orders
|
||||
CMP order,#2
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
@// Store the scale factor and scale at the end
|
||||
SUB diff,scale,order
|
||||
M_STR diff, diffOnStack
|
||||
BGE orderGreaterthan1
|
||||
BLLT armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// order = 1
|
||||
B FFTEnd
|
||||
|
||||
orderGreaterthan1:
|
||||
MOV tmpOrder,order @// tmpOrder = RN 4
|
||||
BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
|
||||
CMP tmpOrder,#2
|
||||
BLGT armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
|
||||
BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
|
||||
orderGreaterthan3:
|
||||
@// check scale = 0 or scale = order
|
||||
SUBS diff, scale, order @// scale > order
|
||||
MOVGT scale,order
|
||||
BGE specialScaleCase @// scale = 0 or scale = order
|
||||
CMP scale,#0
|
||||
BEQ specialScaleCase
|
||||
B generalScaleCase
|
||||
|
||||
specialScaleCase: @// scale = 0 or scale = order and order >= 2
|
||||
|
||||
TST order, #2 @// Set input args to fft stages
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
CMP diff,#0
|
||||
M_STR diff, diffOnStack
|
||||
BGE scaleEqualsOrder
|
||||
|
||||
@//check for even or odd order
|
||||
@// NOTE: The following combination of BL's would work fine eventhough the first
|
||||
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
|
||||
@// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
|
||||
|
||||
TST order,#0x00000001
|
||||
BLEQ armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
|
||||
BLNE armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
|
||||
|
||||
CMP subFFTNum,#4
|
||||
BLT FFTEnd
|
||||
|
||||
|
||||
unscaledRadix4Loop:
|
||||
BEQ lastStageUnscaledRadix4
|
||||
BL armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
|
||||
CMP subFFTNum,#4
|
||||
B unscaledRadix4Loop
|
||||
|
||||
lastStageUnscaledRadix4:
|
||||
BL armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
|
||||
scaleEqualsOrder:
|
||||
@//check for even or odd order
|
||||
@// NOTE: The following combination of BL's would work fine eventhough the first
|
||||
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
|
||||
@// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
|
||||
|
||||
TST order,#0x00000001
|
||||
BLEQ armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
|
||||
BLNE armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
|
||||
|
||||
CMP subFFTNum,#4
|
||||
BLT FFTEnd
|
||||
|
||||
|
||||
scaledRadix4Loop:
|
||||
BEQ lastStageScaledRadix4
|
||||
BL armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
|
||||
CMP subFFTNum,#4
|
||||
B scaledRadix4Loop
|
||||
|
||||
lastStageScaledRadix4:
|
||||
BL armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
generalScaleCase: @// 0 < scale < order and order >= 2
|
||||
@// Determine the correct destination buffer
|
||||
SUB diff,order,scale
|
||||
TST diff,#0x01
|
||||
ADDEQ count,scale,diff,LSR #1 @// count = scale + (order - scale)/2
|
||||
MOVNE count,order
|
||||
TST count,#0x01 @// Is count even or odd ?
|
||||
|
||||
MOVNE argDst,pDst @// Set input args to fft stages
|
||||
MOVEQ argDst,pOut
|
||||
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
M_STR diff, diffOnStack
|
||||
|
||||
MOV argScale,scale @// Put scale in RN4 so as to save and restore
|
||||
BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage
|
||||
SUBS argScale,argScale,#1
|
||||
|
||||
scaledRadix2Loop:
|
||||
BLGT armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
|
||||
SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages
|
||||
BGT scaledRadix2Loop
|
||||
|
||||
|
||||
M_LDR diff, diffOnStack
|
||||
@//check for even or odd order
|
||||
TST diff,#0x00000001
|
||||
BEQ generalUnscaledRadix4Loop
|
||||
B unscaledRadix2Loop
|
||||
|
||||
generalUnscaledRadix4Loop:
|
||||
CMP subFFTNum,#4
|
||||
BEQ generalLastStageUnscaledRadix4
|
||||
BL armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
|
||||
B generalUnscaledRadix4Loop
|
||||
|
||||
generalLastStageUnscaledRadix4:
|
||||
BL armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
|
||||
B End
|
||||
|
||||
|
||||
unscaledRadix2Loop:
|
||||
CMP subFFTNum,#2
|
||||
BEQ generalLastStageUnscaledRadix2
|
||||
BL armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe
|
||||
B unscaledRadix2Loop
|
||||
|
||||
generalLastStageUnscaledRadix2:
|
||||
BL armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
|
||||
B End
|
||||
|
||||
|
||||
FFTEnd: @// Does only the scaling
|
||||
|
||||
M_LDR diff, diffOnStack
|
||||
CMP diff,#0
|
||||
BLE End
|
||||
|
||||
RSB diff,diff,#0 @// to use VRSHL for right shift by a variable
|
||||
VDUP dShift,diff
|
||||
|
||||
scaleFFTData: @// N = subFFTSize ; dataptr = pDst ; scale = diff
|
||||
VLD1 {dX0},[pSrc] @// pSrc contains pDst pointer
|
||||
SUBS subFFTSize,subFFTSize,#1
|
||||
VRSHL dX0,dShift
|
||||
VST1 {dX0},[pSrc]!
|
||||
|
||||
BGT scaleFFTData
|
||||
|
||||
|
||||
End:
|
||||
@// Set return value
|
||||
MOV result, #OMX_Sts_NoErr
|
||||
|
||||
@// Write function tail
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
.end
|
214
media/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_FC32_Sfs_s.S
Normal file
214
media/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_FC32_Sfs_s.S
Normal file
@ -0,0 +1,214 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This is a modification of armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.s
|
||||
@// to support float instead of SC32.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute an inverse FFT for a complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r1
|
||||
#define pFFTSpec r2
|
||||
|
||||
|
||||
@// Output registers
|
||||
#define result r0
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define argTwiddle r1
|
||||
#define argDst r2
|
||||
#define argScale r4
|
||||
#define tmpOrder r4
|
||||
#define pTwiddle r4
|
||||
#define pOut r5
|
||||
#define subFFTSize r7
|
||||
#define subFFTNum r6
|
||||
#define N r6
|
||||
#define order r14
|
||||
#define diff r9
|
||||
@// Total num of radix stages required to comple the FFT
|
||||
#define count r8
|
||||
#define x0r r4
|
||||
#define x0i r5
|
||||
#define diffMinusOne r2
|
||||
|
||||
@// Neon registers
|
||||
|
||||
#define dX0 D0.F32
|
||||
#define qX0 Q0.F32
|
||||
#define sN S0.S32
|
||||
#define fN S1.F32
|
||||
@// one must be the same as dScale[0]!
|
||||
#define dScale D4.F32
|
||||
#define one S8.F32
|
||||
|
||||
|
||||
|
||||
@// Allocate stack memory required by the function
|
||||
M_ALLOC4 fftSize, 4
|
||||
|
||||
@// Write function header
|
||||
M_START omxSP_FFTInv_CToC_FC32_Sfs,r11,d15
|
||||
|
||||
@ Structure offsets for the FFTSpec
|
||||
.set ARMsFFTSpec_N, 0
|
||||
.set ARMsFFTSpec_pBitRev, 4
|
||||
.set ARMsFFTSpec_pTwiddle, 8
|
||||
.set ARMsFFTSpec_pBuf, 12
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// Read the size from structure and take log
|
||||
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
|
||||
M_STR N, fftSize
|
||||
|
||||
@// Read other structure parameters
|
||||
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
|
||||
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
|
||||
|
||||
CLZ order,N @// N = 2^order
|
||||
RSB order,order,#31
|
||||
MOV subFFTSize,#1
|
||||
@//MOV subFFTNum,N
|
||||
|
||||
CMP order,#3
|
||||
BGT orderGreaterthan3 @// order > 3
|
||||
|
||||
CMP order,#1
|
||||
BGE orderGreaterthan0 @// order > 0
|
||||
VLD1 dX0,[pSrc]
|
||||
VST1 dX0,[pDst]
|
||||
MOV pSrc,pDst
|
||||
BLT FFTEnd
|
||||
|
||||
orderGreaterthan0:
|
||||
@// set the buffers appropriately for various orders
|
||||
CMP order,#2
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
@// Pass the first stage destination in RN5
|
||||
MOVEQ pOut,pDst
|
||||
MOV argTwiddle,pTwiddle
|
||||
BGE orderGreaterthan1
|
||||
@// order = 1
|
||||
BLLT armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
orderGreaterthan1:
|
||||
MOV tmpOrder,order @// tmpOrder = RN 4
|
||||
BL armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
|
||||
CMP tmpOrder,#2
|
||||
BLGT armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace_unsafe
|
||||
BL armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
|
||||
orderGreaterthan3:
|
||||
|
||||
@// Set input args to fft stages
|
||||
TST order, #2
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
@// Pass the first stage destination in RN5
|
||||
MOVEQ pOut,pDst
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
@//check for even or odd order
|
||||
@// NOTE: The following combination of BL's would work fine even though
|
||||
@// the first BL would corrupt the flags. This is because the end of
|
||||
@// the "grpZeroSetLoop" loop inside
|
||||
@// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag
|
||||
@// to EQ
|
||||
|
||||
TST order,#0x00000001
|
||||
BLEQ armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
|
||||
BLNE armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
|
||||
|
||||
CMP subFFTNum,#4
|
||||
BLT FFTEnd
|
||||
|
||||
|
||||
unscaledRadix4Loop:
|
||||
BEQ lastStageUnscaledRadix4
|
||||
BL armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe
|
||||
CMP subFFTNum,#4
|
||||
B unscaledRadix4Loop
|
||||
|
||||
lastStageUnscaledRadix4:
|
||||
BL armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
FFTEnd: @// Does only the scaling
|
||||
|
||||
M_LDR N, fftSize
|
||||
|
||||
VMOV sN,N
|
||||
VCVT fN, sN @ fn = fftSize, as a float
|
||||
VMOV one, 1.0
|
||||
VDIV one, one, fN @ one = dScale[0] = 1 / fftSize
|
||||
|
||||
@ Scale data, doing 2 complex values at a time (because N is
|
||||
@ always even).
|
||||
|
||||
@// N = subFFTSize ; dataptr = pDst ; scale = diff
|
||||
scaleFFTData:
|
||||
VLD1 {qX0},[pSrc :128] @// pSrc contains pDst pointer
|
||||
SUBS subFFTSize,subFFTSize,#2
|
||||
VMUL qX0, qX0, dScale[0]
|
||||
VST1 {qX0},[pSrc :128]!
|
||||
|
||||
BGT scaleFFTData
|
||||
End:
|
||||
@// Set return value
|
||||
MOV result, #OMX_Sts_NoErr
|
||||
|
||||
@// Write function tail
|
||||
M_END
|
||||
|
||||
.end
|
342
media/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_SC16_Sfs_s.S
Normal file
342
media/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_SC16_Sfs_s.S
Normal file
@ -0,0 +1,342 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
|
||||
@//
|
||||
@//
|
||||
@// File Name: omxSP_FFTInv_CToC_SC16_Sfs_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 6729
|
||||
@// Last Modified Date: Tue, 17 Jul 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute an inverse FFT for a complex signal
|
||||
@//
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
.extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
.extern armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r1
|
||||
#define pFFTSpec r2
|
||||
#define scale r3
|
||||
|
||||
|
||||
@// Output registers
|
||||
#define result r0
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define argTwiddle r1
|
||||
#define argDst r2
|
||||
#define argScale r4
|
||||
#define pTwiddle r4
|
||||
#define tmpOrder r4
|
||||
#define pOut r5
|
||||
#define subFFTSize r7
|
||||
#define subFFTNum r6
|
||||
#define N r6
|
||||
#define order r14
|
||||
#define diff r9
|
||||
@// Total num of radix stages required to comple the FFT
|
||||
#define count r8
|
||||
#define x0r r4
|
||||
#define x0i r5
|
||||
#define diffMinusOne r2
|
||||
#define round r3
|
||||
|
||||
@// Neon registers
|
||||
|
||||
#define dX0 D0.S16
|
||||
#define dShift D1.S16
|
||||
#define dX0S32 D0.S32
|
||||
|
||||
|
||||
@// Allocate stack memory required by the function
|
||||
M_ALLOC4 diffOnStack, 4
|
||||
|
||||
@// Write function header
|
||||
M_START omxSP_FFTInv_CToC_SC16_Sfs,r11,d15
|
||||
|
||||
@ Structure offsets for the FFTSpec
|
||||
.set ARMsFFTSpec_N, 0
|
||||
.set ARMsFFTSpec_pBitRev, 4
|
||||
.set ARMsFFTSpec_pTwiddle, 8
|
||||
.set ARMsFFTSpec_pBuf, 12
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// Read the size from structure and take log
|
||||
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
|
||||
|
||||
@// Read other structure parameters
|
||||
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
|
||||
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
|
||||
|
||||
CLZ order,N @// N = 2^order
|
||||
RSB order,order,#31
|
||||
MOV subFFTSize,#1
|
||||
@//MOV subFFTNum,N
|
||||
|
||||
ADD scale,scale,order @// FFTInverse has a final scaling factor by N
|
||||
|
||||
CMP order,#3
|
||||
BGT orderGreaterthan3 @// order > 3
|
||||
|
||||
CMP order,#1
|
||||
BGE orderGreaterthan0 @// order > 0
|
||||
M_STR scale, diffOnStack,LT @// order = 0
|
||||
LDRLT x0r,[pSrc]
|
||||
STRLT x0r,[pDst]
|
||||
MOVLT pSrc,pDst
|
||||
BLT FFTEnd
|
||||
|
||||
orderGreaterthan0:
|
||||
@// set the buffers appropriately for various orders
|
||||
CMP order,#2
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
@// Store the scale factor and scale at the end
|
||||
SUB diff,scale,order
|
||||
M_STR diff, diffOnStack
|
||||
BGE orderGreaterthan1
|
||||
BLLT armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @// order = 1
|
||||
B FFTEnd
|
||||
|
||||
|
||||
orderGreaterthan1:
|
||||
MOV tmpOrder,order @// tmpOrder = RN 4
|
||||
BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
|
||||
CMP tmpOrder,#2
|
||||
BLGT armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
|
||||
BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
|
||||
|
||||
|
||||
orderGreaterthan3:
|
||||
@// check scale = 0 or scale = order
|
||||
SUBS diff, scale, order @// scale > order
|
||||
MOVGT scale,order
|
||||
BGE specialScaleCase @// scale = 0 or scale = order
|
||||
CMP scale,#0
|
||||
BEQ specialScaleCase
|
||||
B generalScaleCase
|
||||
|
||||
specialScaleCase: @// scale = 0 or scale = order and order > 3
|
||||
|
||||
TST order, #2 @// Set input args to fft stages
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
CMP diff,#0
|
||||
M_STR diff, diffOnStack
|
||||
BGE scaleEqualsOrder
|
||||
|
||||
@//check for even or odd order
|
||||
@// NOTE: The following combination of BL's would work fine eventhough the first
|
||||
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
|
||||
@// armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
|
||||
|
||||
TST order,#0x00000001
|
||||
BLEQ armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
|
||||
BLNE armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
|
||||
|
||||
CMP subFFTNum,#4
|
||||
BLT FFTEnd
|
||||
|
||||
unscaledRadix4Loop:
|
||||
BEQ lastStageUnscaledRadix4
|
||||
BL armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
|
||||
CMP subFFTNum,#4
|
||||
B unscaledRadix4Loop
|
||||
|
||||
lastStageUnscaledRadix4:
|
||||
BL armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
scaleEqualsOrder:
|
||||
@//check for even or odd order
|
||||
@// NOTE: The following combination of BL's would work fine eventhough the first
|
||||
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
|
||||
@// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
|
||||
|
||||
TST order,#0x00000001
|
||||
BLEQ armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
|
||||
BLNE armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
|
||||
|
||||
CMP subFFTNum,#4
|
||||
BLT FFTEnd
|
||||
|
||||
scaledRadix4Loop:
|
||||
BEQ lastStageScaledRadix4
|
||||
BL armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
|
||||
CMP subFFTNum,#4
|
||||
B scaledRadix4Loop
|
||||
|
||||
lastStageScaledRadix4:
|
||||
BL armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
|
||||
|
||||
generalScaleCase: @// 0 < scale < order and order > 3
|
||||
@// Determine the correct destination buffer
|
||||
SUB diff,order,scale
|
||||
TST diff,#0x01
|
||||
ADDEQ count,scale,diff,LSR #1 @// count = scale + (order - scale)/2
|
||||
MOVNE count,order
|
||||
TST count,#0x01 @// Is count even or odd ?
|
||||
|
||||
MOVNE argDst,pDst @// Set input args to fft stages
|
||||
MOVEQ argDst,pOut
|
||||
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
CMP diff,#1
|
||||
M_STR diff, diffOnStack
|
||||
BEQ scaleps @// scaling including a radix2_ps stage
|
||||
|
||||
MOV argScale,scale @// Put scale in RN4 so as to save and restore
|
||||
BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage
|
||||
SUBS argScale,argScale,#1
|
||||
|
||||
scaledRadix2Loop:
|
||||
BLGT armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
|
||||
SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages
|
||||
BGT scaledRadix2Loop
|
||||
B outScale
|
||||
|
||||
scaleps:
|
||||
SUB argScale,scale,#1 @// order>3 and diff=1 => scale >= 3
|
||||
BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage
|
||||
SUBS argScale,argScale,#1
|
||||
|
||||
scaledRadix2psLoop:
|
||||
BEQ scaledRadix2psStage
|
||||
BLGT armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
|
||||
SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages
|
||||
BGE scaledRadix2psLoop
|
||||
|
||||
scaledRadix2psStage:
|
||||
BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
|
||||
B generalLastStageUnscaledRadix2
|
||||
|
||||
|
||||
outScale:
|
||||
M_LDR diff, diffOnStack
|
||||
@//check for even or odd order
|
||||
TST diff,#0x00000001
|
||||
BEQ generalUnscaledRadix4Loop
|
||||
B unscaledRadix2Loop
|
||||
|
||||
generalUnscaledRadix4Loop:
|
||||
CMP subFFTNum,#4
|
||||
BEQ generalLastStageUnscaledRadix4
|
||||
BL armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
|
||||
B generalUnscaledRadix4Loop
|
||||
|
||||
generalLastStageUnscaledRadix4:
|
||||
BL armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
|
||||
B End
|
||||
|
||||
unscaledRadix2Loop:
|
||||
CMP subFFTNum,#4
|
||||
BEQ generalLastTwoStagesUnscaledRadix2
|
||||
BL armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe
|
||||
B unscaledRadix2Loop
|
||||
|
||||
generalLastTwoStagesUnscaledRadix2:
|
||||
BL armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
|
||||
generalLastStageUnscaledRadix2:
|
||||
BL armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
|
||||
B End
|
||||
|
||||
|
||||
FFTEnd: @// Does only the scaling
|
||||
|
||||
M_LDR diff, diffOnStack
|
||||
CMP diff,#0
|
||||
BLE End
|
||||
|
||||
RSB diff,diff,#0 @// to use VRSHL for right shift by a variable
|
||||
VDUP dShift,diff
|
||||
|
||||
scaleFFTData: @// N = subFFTSize ; dataptr = pDst ; scale = diff
|
||||
VLD1 {dX0S32[0]},[pSrc] @// pSrc contains pDst pointer
|
||||
SUBS subFFTSize,subFFTSize,#1
|
||||
VRSHL dX0,dShift
|
||||
VST1 {dX0S32[0]},[pSrc]!
|
||||
|
||||
BGT scaleFFTData
|
||||
|
||||
|
||||
End:
|
||||
@// Set return value
|
||||
MOV result, #OMX_Sts_NoErr
|
||||
|
||||
@// Write function tail
|
||||
M_END
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.END
|
314
media/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_SC32_Sfs_s.S
Normal file
314
media/openmax_dl/dl/sp/src/omxSP_FFTInv_CToC_SC32_Sfs_s.S
Normal file
@ -0,0 +1,314 @@
|
||||
@//
|
||||
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
@//
|
||||
@// Use of this source code is governed by a BSD-style license
|
||||
@// that can be found in the LICENSE file in the root of the source
|
||||
@// tree. An additional intellectual property rights grant can be found
|
||||
@// in the file PATENTS. All contributing project authors may
|
||||
@// be found in the AUTHORS file in the root of the source tree.
|
||||
@//
|
||||
@// This file was originally licensed as follows. It has been
|
||||
@// relicensed with permission from the copyright holders.
|
||||
@//
|
||||
|
||||
@//
|
||||
@// File Name: omxSP_FFTInv_CToC_SC32_Sfs_s.s
|
||||
@// OpenMAX DL: v1.0.2
|
||||
@// Last Modified Revision: 6675
|
||||
@// Last Modified Date: Fri, 06 Jul 2007
|
||||
@//
|
||||
@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
|
||||
@//
|
||||
@//
|
||||
@//
|
||||
@// Description:
|
||||
@// Compute an inverse FFT for a complex signal
|
||||
@//
|
||||
|
||||
|
||||
@// Include standard headers
|
||||
|
||||
#include "dl/api/armCOMM_s.h"
|
||||
#include "dl/api/omxtypes_s.h"
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
|
||||
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe
|
||||
|
||||
@// Set debugging level
|
||||
@//DEBUG_ON SETL {TRUE}
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
|
||||
|
||||
@// Guarding implementation by the processor name
|
||||
|
||||
@// Import symbols required from other files
|
||||
@// (For example tables)
|
||||
.extern armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
|
||||
.extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
|
||||
|
||||
|
||||
@//Input Registers
|
||||
|
||||
#define pSrc r0
|
||||
#define pDst r1
|
||||
#define pFFTSpec r2
|
||||
#define scale r3
|
||||
|
||||
|
||||
@// Output registers
|
||||
#define result r0
|
||||
|
||||
@//Local Scratch Registers
|
||||
|
||||
#define argTwiddle r1
|
||||
#define argDst r2
|
||||
#define argScale r4
|
||||
#define tmpOrder r4
|
||||
#define pTwiddle r4
|
||||
#define pOut r5
|
||||
#define subFFTSize r7
|
||||
#define subFFTNum r6
|
||||
#define N r6
|
||||
#define order r14
|
||||
#define diff r9
|
||||
@// Total num of radix stages required to comple the FFT
|
||||
#define count r8
|
||||
#define x0r r4
|
||||
#define x0i r5
|
||||
#define diffMinusOne r2
|
||||
#define round r3
|
||||
|
||||
@// Neon registers
|
||||
|
||||
#define dX0 D0.S32
|
||||
#define dShift D1.S32
|
||||
|
||||
|
||||
|
||||
@// Allocate stack memory required by the function
|
||||
M_ALLOC4 diffOnStack, 4
|
||||
|
||||
@// Write function header
|
||||
M_START omxSP_FFTInv_CToC_SC32_Sfs,r11,d15
|
||||
|
||||
@ Structure offsets for the FFTSpec
|
||||
.set ARMsFFTSpec_N, 0
|
||||
.set ARMsFFTSpec_pBitRev, 4
|
||||
.set ARMsFFTSpec_pTwiddle, 8
|
||||
.set ARMsFFTSpec_pBuf, 12
|
||||
|
||||
@// Define stack arguments
|
||||
|
||||
@// Read the size from structure and take log
|
||||
LDR N, [pFFTSpec, #ARMsFFTSpec_N]
|
||||
|
||||
@// Read other structure parameters
|
||||
LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
|
||||
LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
|
||||
|
||||
CLZ order,N @// N = 2^order
|
||||
RSB order,order,#31
|
||||
MOV subFFTSize,#1
|
||||
@//MOV subFFTNum,N
|
||||
|
||||
ADD scale,scale,order @// FFTInverse has a final scaling factor by N
|
||||
|
||||
CMP order,#3
|
||||
BGT orderGreaterthan3 @// order > 3
|
||||
|
||||
CMP order,#1
|
||||
BGE orderGreaterthan0 @// order > 0
|
||||
M_STR scale, diffOnStack,LT @// order = 0
|
||||
VLD1 dX0,[pSrc]
|
||||
VST1 dX0,[pDst]
|
||||
MOV pSrc,pDst
|
||||
BLT FFTEnd
|
||||
|
||||
orderGreaterthan0:
|
||||
@// set the buffers appropriately for various orders
|
||||
CMP order,#2
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
@// Store the scale factor and scale at the end
|
||||
SUB diff,scale,order
|
||||
M_STR diff, diffOnStack
|
||||
BGE orderGreaterthan1
|
||||
BLLT armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// order = 1
|
||||
B FFTEnd
|
||||
|
||||
orderGreaterthan1:
|
||||
MOV tmpOrder,order @// tmpOrder = RN 4
|
||||
BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
|
||||
CMP tmpOrder,#2
|
||||
BLGT armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
|
||||
BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
|
||||
orderGreaterthan3:
|
||||
@// check scale = 0 or scale = order
|
||||
SUBS diff, scale, order @// scale > order
|
||||
MOVGT scale,order
|
||||
BGE specialScaleCase @// scale = 0 or scale = order
|
||||
CMP scale,#0
|
||||
BEQ specialScaleCase
|
||||
B generalScaleCase
|
||||
|
||||
specialScaleCase: @// scale = 0 or scale = order and order >= 2
|
||||
|
||||
TST order, #2 @// Set input args to fft stages
|
||||
MOVNE argDst,pDst
|
||||
MOVEQ argDst,pOut
|
||||
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
CMP diff,#0
|
||||
M_STR diff, diffOnStack
|
||||
BGE scaleEqualsOrder
|
||||
|
||||
@//check for even or odd order
|
||||
@// NOTE: The following combination of BL's would work fine eventhough the first
|
||||
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
|
||||
@// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
|
||||
|
||||
TST order,#0x00000001
|
||||
BLEQ armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
|
||||
BLNE armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
|
||||
|
||||
CMP subFFTNum,#4
|
||||
BLT FFTEnd
|
||||
|
||||
|
||||
unscaledRadix4Loop:
|
||||
BEQ lastStageUnscaledRadix4
|
||||
BL armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
|
||||
CMP subFFTNum,#4
|
||||
B unscaledRadix4Loop
|
||||
|
||||
lastStageUnscaledRadix4:
|
||||
BL armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
|
||||
scaleEqualsOrder:
|
||||
@//check for even or odd order
|
||||
@// NOTE: The following combination of BL's would work fine eventhough the first
|
||||
@// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
|
||||
@// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
|
||||
|
||||
TST order,#0x00000001
|
||||
BLEQ armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
|
||||
BLNE armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
|
||||
|
||||
CMP subFFTNum,#4
|
||||
BLT FFTEnd
|
||||
|
||||
|
||||
scaledRadix4Loop:
|
||||
BEQ lastStageScaledRadix4
|
||||
BL armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
|
||||
CMP subFFTNum,#4
|
||||
B scaledRadix4Loop
|
||||
|
||||
lastStageScaledRadix4:
|
||||
BL armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
|
||||
B FFTEnd
|
||||
|
||||
generalScaleCase: @// 0 < scale < order and order >= 2
|
||||
@// Determine the correct destination buffer
|
||||
SUB diff,order,scale
|
||||
TST diff,#0x01
|
||||
ADDEQ count,scale,diff,LSR #1 @// count = scale + (order - scale)/2
|
||||
MOVNE count,order
|
||||
TST count,#0x01 @// Is count even or odd ?
|
||||
|
||||
MOVNE argDst,pDst @// Set input args to fft stages
|
||||
MOVEQ argDst,pOut
|
||||
MOVEQ pOut,pDst @// Pass the first stage destination in RN5
|
||||
MOV argTwiddle,pTwiddle
|
||||
|
||||
M_STR diff, diffOnStack
|
||||
|
||||
MOV argScale,scale @// Put scale in RN4 so as to save and restore
|
||||
BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage
|
||||
SUBS argScale,argScale,#1
|
||||
|
||||
scaledRadix2Loop:
|
||||
BLGT armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
|
||||
SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages
|
||||
BGT scaledRadix2Loop
|
||||
|
||||
|
||||
M_LDR diff, diffOnStack
|
||||
@//check for even or odd order
|
||||
TST diff,#0x00000001
|
||||
BEQ generalUnscaledRadix4Loop
|
||||
B unscaledRadix2Loop
|
||||
|
||||
generalUnscaledRadix4Loop:
|
||||
CMP subFFTNum,#4
|
||||
BEQ generalLastStageUnscaledRadix4
|
||||
BL armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
|
||||
B generalUnscaledRadix4Loop
|
||||
|
||||
generalLastStageUnscaledRadix4:
|
||||
BL armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
|
||||
B End
|
||||
|
||||
|
||||
unscaledRadix2Loop:
|
||||
CMP subFFTNum,#2
|
||||
BEQ generalLastStageUnscaledRadix2
|
||||
BL armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe
|
||||
B unscaledRadix2Loop
|
||||
|
||||
generalLastStageUnscaledRadix2:
|
||||
BL armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
|
||||
B End
|
||||
|
||||
|
||||
FFTEnd: @// Does only the scaling
|
||||
|
||||
M_LDR diff, diffOnStack
|
||||
CMP diff,#0
|
||||
BLE End
|
||||
|
||||
RSB diff,diff,#0 @// to use VRSHL for right shift by a variable
|
||||
VDUP dShift,diff
|
||||
|
||||
scaleFFTData: @// N = subFFTSize ; dataptr = pDst ; scale = diff
|
||||
VLD1 {dX0},[pSrc] @// pSrc contains pDst pointer
|
||||
SUBS subFFTSize,subFFTSize,#1
|
||||
VRSHL dX0,dShift
|
||||
VST1 {dX0},[pSrc]!
|
||||
|
||||
BGT scaleFFTData
|
||||
|
||||
|
||||
End:
|
||||
@// Set return value
|
||||
MOV result, #OMX_Sts_NoErr
|
||||
|
||||
@// Write function tail
|
||||
M_END
|
||||
|
||||
.end
|
@ -98,6 +98,7 @@
|
||||
<li><a href="about:license#jpnic">Japan Network Information Center License</a></li>
|
||||
<li><a href="about:license#jemalloc">jemalloc License</a></li>
|
||||
<li><a href="about:license#jquery">jQuery License</a></li>
|
||||
<li><a href="about:license#khronos">Khronos group License</a></li>
|
||||
<li><a href="about:license#kiss_fft">Kiss FFT License</a></li>
|
||||
<li><a href="about:license#libcubeb">libcubeb License</a></li>
|
||||
<li><a href="about:license#libevent">libevent License</a></li>
|
||||
@ -1950,6 +1951,7 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
<span class="path">dom/plugins/</span>,
|
||||
<span class="path">tools/profiler/sps/</span>,
|
||||
<span class="path">gfx/ots/</span>,
|
||||
<span class="path">media/openmax_dl/</span>,
|
||||
<span class="path">gfx/ycbcr</span> and
|
||||
<span class="path">dom/media/webspeech/recognition/</span>.
|
||||
</p>
|
||||
@ -2882,6 +2884,43 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
<hr>
|
||||
|
||||
<h1><a id="khronos"></a>Khronos group License</h1>
|
||||
|
||||
<p>This license applies to the following files:</p>
|
||||
|
||||
<ul>
|
||||
<li class="path">openmax_dl/dl/api/omxtypes.h</li>
|
||||
<li class="path">openmax_dl/dl/sp/api/omxSP.h</li>
|
||||
</ul>
|
||||
|
||||
<pre>
|
||||
Copyright 2005-2008 The Khronos Group Inc. All Rights Reserved.
|
||||
|
||||
These materials are protected by copyright laws and contain material
|
||||
proprietary to the Khronos Group, Inc. You may use these materials
|
||||
for implementing Khronos specifications, without altering or removing
|
||||
any trademark, copyright or other notice from the specification.
|
||||
|
||||
Khronos Group makes no, and expressly disclaims any, representations
|
||||
or warranties, express or implied, regarding these materials, including,
|
||||
without limitation, any implied warranties of merchantability or fitness
|
||||
for a particular purpose or non-infringement of any intellectual property.
|
||||
Khronos Group makes no, and expressly disclaims any, warranties, express
|
||||
or implied, regarding the correctness, accuracy, completeness, timeliness,
|
||||
and reliability of these materials.
|
||||
|
||||
Under no circumstances will the Khronos Group, or any of its Promoters,
|
||||
Contributors or Members or their respective partners, officers, directors,
|
||||
employees, agents or representatives be liable for any damages, whether
|
||||
direct, indirect, special or consequential damages for lost revenues,
|
||||
lost profits, or otherwise, arising from or in connection with these
|
||||
materials.
|
||||
|
||||
Khronos and OpenMAX are trademarks of the Khronos Group Inc.
|
||||
</pre>
|
||||
|
||||
<hr>
|
||||
|
||||
<h1><a id="kiss_fft"></a>Kiss FFT License</h1>
|
||||
|
||||
<p>This license applies to files in the directory
|
||||
|
Loading…
x
Reference in New Issue
Block a user