Universal Audio Class

Based on my implementation in sceAac https://github.com/hrydgard/ppsspp/pull/5836
I've created a class AuCtx included in My SimpleAudioDec.cpp/.h which aims at providing a standard easy implementation to support all codecs in ffmpeg.
Here, I also completely re-code sceMp3 file with this class to give an example how to use this class, and it has solved all mp3 issues I've observed in the current master.
Tests on different freq and channels mp3 audios as:
Miku custom BGM (48kHz, stereo), Hanayaka Nari Wa ga Ichizoku(32kHz, mono, a little fast but better than before now), downstreet panic (44.1kHz, stereo), and learn jp09(44.1kHz, stero) are just all right.
Especially, I am very glad to see that Miku's Custom BGMs have no repetition issues in first tone any more and no longer stopped in the first second neither. :)
We will come into a new age to fast support new audio formats from now on I hope :P
This commit is contained in:
kaienfr 2014-04-11 22:56:59 +02:00
parent c216b535ee
commit 3a12cf2ad7
4 changed files with 171 additions and 83 deletions

View File

@ -89,3 +89,5 @@ struct AudioChannel
extern AudioChannel chans[PSP_AUDIO_CHANNEL_MAX + 1];
void Register_sceAudio();
u32 sceAudioSetFrequency(u32 freq);

View File

@ -27,22 +27,6 @@
#include "Core/Reporting.h"
#include "Core/HW/SimpleAudioDec.h"
static const int ID3 = 0x49443300;
#ifdef USE_FFMPEG
#ifndef PRId64
#define PRId64 "%llu"
#endif
extern "C" {
#include <libavutil/opt.h>
#include <libavformat/avformat.h>
//#include <libavutil/timestamp.h> // iOS build is not happy with this one.
#include <libswresample/swresample.h>
#include <libavutil/samplefmt.h>
}
#endif
static std::map<u32, AuCtx *> mp3Map;
AuCtx *getMp3Ctx(u32 mp3) {
@ -121,6 +105,7 @@ u32 sceMp3ReserveMp3Handle(u32 mp3Addr) {
DEBUG_LOG(ME, "startPos %x endPos %x mp3buf %08x mp3bufSize %08x PCMbuf %08x PCMbufSize %08x",
Au->startPos, Au->endPos, Au->AuBuf, Au->AuBufSize, Au->PCMBuf, Au->PCMBufSize);
Au->audioType = PSP_CODEC_MP3;
Au->Channels = 2;
Au->SumDecodedSamples = 0;
Au->MaxOutputSample = Au->PCMBufSize / 4;
@ -130,7 +115,7 @@ u32 sceMp3ReserveMp3Handle(u32 mp3Addr) {
Au->readPos = Au->startPos;
// create Au decoder
Au->decoder = new SimpleAudio(PSP_CODEC_MP3);
Au->decoder = new SimpleAudio(Au->audioType);
// close the audio if mp3Addr already exist.
if (mp3Map.find(mp3Addr) != mp3Map.end()) {
@ -155,6 +140,86 @@ int sceMp3TermResource() {
return 0;
}
int __CalculateMp3Channels(int bitval) {
if (bitval == 0 || bitval == 1 || bitval == 2) { // Stereo / Joint Stereo / Dual Channel.
return 2;
}
else if (bitval == 3) { // Mono.
return 1;
}
else {
return -1;
}
}
int __CalculateMp3SampleRates(int bitval, int mp3version) {
if (mp3version == 3) { // MPEG Version 1
int valuemapping[] = { 44100, 48000, 32000, -1 };
return valuemapping[bitval];
}
else if (mp3version == 2) { // MPEG Version 2
int valuemapping[] = { 22050, 24000, 16000, -1 };
return valuemapping[bitval];
}
else if (mp3version == 0) { // MPEG Version 2.5
int valuemapping[] = { 11025, 12000, 8000, -1 };
return valuemapping[bitval];
}
else {
return -1;
}
}
int __CalculateMp3Bitrates(int bitval, int mp3version, int mp3layer) {
if (mp3version == 3) { // MPEG Version 1
if (mp3layer == 3) { // Layer I
int valuemapping[] = { 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, -1 };
return valuemapping[bitval];
}
else if (mp3layer == 2) { // Layer II
int valuemapping[] = { 0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, -1 };
return valuemapping[bitval];
}
else if (mp3layer == 1) { // Layer III
int valuemapping[] = { 0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, -1 };
return valuemapping[bitval];
}
else {
return -1;
}
}
else if (mp3version == 2 || mp3version == 0) { // MPEG Version 2 or 2.5
if (mp3layer == 3) { // Layer I
int valuemapping[] = { 0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, -1 };
return valuemapping[bitval];
}
else if (mp3layer == 1 || mp3layer == 2) { // Layer II or III
int valuemapping[] = { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, -1 };
return valuemapping[bitval];
}
else {
return -1;
}
}
else {
return -1;
}
}
int __ParseMp3Header(AuCtx *ctx, bool *isID3) {
int header = bswap32(Memory::Read_U32(ctx->AuBuf));
// ID3 tag , can be seen in Hanayaka Nari Wa ga Ichizoku.
static const int ID3 = 0x49443300;
if ((header & 0xFFFFFF00) == ID3) {
*isID3 = true;
int size = bswap32(Memory::Read_U32(ctx->AuBuf + ctx->startPos + 6));
// Highest bit of each byte has to be ignored (format: 0x7F7F7F7F)
size = (size & 0x7F) | ((size & 0x7F00) >> 1) | ((size & 0x7F0000) >> 2) | ((size & 0x7F000000) >> 3);
header = bswap32(Memory::Read_U32(ctx->AuBuf + ctx->startPos + 10 + size));
}
return header;
}
int sceMp3Init(u32 mp3) {
DEBUG_LOG(ME, "sceMp3Init(%08x)", mp3);
@ -164,12 +229,35 @@ int sceMp3Init(u32 mp3) {
return -1;
}
// TODO
// if startPos == 0, we have to read the header part of mp3 and get some information
// and move startPos to stream data position. Decode from header will not success.
if (ctx->startPos == 0){
// Parse the Mp3 header
bool isID3 = false;
int header = __ParseMp3Header(ctx, &isID3);
int layer = (header >> 17) & 0x3;
ctx->Version = ((header >> 19) & 0x3);
ctx->SamplingRate = __CalculateMp3SampleRates((header >> 10) & 0x3, ctx->Version);
ctx->Channels = __CalculateMp3Channels((header >> 6) & 0x3);
ctx->BitRate = __CalculateMp3Bitrates((header >> 12) & 0xF, ctx->Version, layer);
ctx->freq = ctx->SamplingRate;
INFO_LOG(ME, "sceMp3Init(): channels=%i, samplerate=%iHz, bitrate=%ikbps", ctx->Channels, ctx->SamplingRate, ctx->BitRate);
// for mp3, if required freq is 48000, reset resampling Frequency to 48000 seems get better sound quality (e.g. Miku Custom BGM)
if (ctx->freq == 48000){
ctx->decoder->setResampleFrequency(ctx->freq);
}
// For mp3 file, if ID3 tag is detected, we must move startPos and writePos to 0x400 (stream start position), and reduce the available buffer size by 0x400
// this is very important for ID3 tag mp3, since our universal audio decoder is for decoding stream part only.
if (isID3){
// if get ID3 tage, we will decode from 0x400
ctx->startPos = 0x400;
ctx->writePos = 0x400;
ctx->AuBufAvailable -= 0x400;
}
else{
// if no ID3 tag, we will decode from the begining of the file
ctx->startPos = 0;
ctx->writePos = 0;
}
return 0;
@ -199,7 +287,7 @@ int sceMp3GetMaxOutputSample(u32 mp3)
}
int sceMp3GetSumDecodedSample(u32 mp3) {
ERROR_LOG_REPORT(ME, "UNIMPL sceMp3GetSumDecodedSample(%08X)", mp3);
DEBUG_LOG_REPORT(ME, "sceMp3GetSumDecodedSample(%08X)", mp3);
AuCtx *ctx = getMp3Ctx(mp3);
if (!ctx) {
@ -288,7 +376,6 @@ int sceMp3ReleaseMp3Handle(u32 mp3) {
return -1;
}
delete ctx;
mp3Map.erase(mp3);
@ -311,14 +398,14 @@ u32 sceMp3GetFrameNum(u32 mp3) {
}
u32 sceMp3GetMPEGVersion(u32 mp3) {
ERROR_LOG(ME, "UNIMPL sceMp3GetMPEGVersion(%08x)", mp3);
INFO_LOG(ME, "sceMp3GetMPEGVersion(%08x)", mp3);
AuCtx *ctx = getMp3Ctx(mp3);
if (!ctx) {
ERROR_LOG(ME, "%s: bad mp3 handle %08x", __FUNCTION__, mp3);
return -1;
}
return 0;
return ctx->sceAuGetVersion();
}
u32 sceMp3ResetPlayPositionByFrame(u32 mp3, int position) {

View File

@ -61,7 +61,7 @@ bool SimpleAudio::GetAudioCodecID(int audioType){
}
SimpleAudio::SimpleAudio(int audioType)
: codec_(0), codecCtx_(0), swrCtx_(0), audioType(audioType), outSamples(0){
: codec_(0), codecCtx_(0), swrCtx_(0), audioType(audioType), outSamples(0), wanted_resample_freq(44100){
#ifdef USE_FFMPEG
avcodec_register_all();
av_register_all();
@ -103,7 +103,7 @@ SimpleAudio::SimpleAudio(int audioType)
SimpleAudio::SimpleAudio(u32 ctxPtr, int audioType)
: codec_(0), codecCtx_(0), swrCtx_(0), ctxPtr(ctxPtr), audioType(audioType), outSamples(0){
: codec_(0), codecCtx_(0), swrCtx_(0), ctxPtr(ctxPtr), audioType(audioType), outSamples(0), wanted_resample_freq(44100){
#ifdef USE_FFMPEG
avcodec_register_all();
av_register_all();
@ -136,6 +136,7 @@ SimpleAudio::SimpleAudio(u32 ctxPtr, int audioType)
AVDictionary *opts = 0;
if (avcodec_open2(codecCtx_, codec_, &opts) < 0) {
ERROR_LOG(ME, "Failed to open codec");
av_dict_free(&opts);
return;
}
@ -143,6 +144,35 @@ SimpleAudio::SimpleAudio(u32 ctxPtr, int audioType)
#endif // USE_FFMPEG
}
bool SimpleAudio::ResetCodecCtx(int channels, int samplerate){
#ifdef USE_FFMPEG
if (codecCtx_)
avcodec_close(codecCtx_);
// Find decoder
codec_ = avcodec_find_decoder(audioCodecId);
if (!codec_) {
// Eh, we shouldn't even have managed to compile. But meh.
ERROR_LOG(ME, "This version of FFMPEG does not support AV_CODEC_ctx for audio (%s). Update your submodule.", GetCodecName(audioType));
return false;
}
codecCtx_->channels = channels;
codecCtx_->channel_layout = channels==2?AV_CH_LAYOUT_STEREO:AV_CH_LAYOUT_MONO;
codecCtx_->sample_rate = samplerate;
// Open codec
AVDictionary *opts = 0;
if (avcodec_open2(codecCtx_, codec_, &opts) < 0) {
ERROR_LOG(ME, "Failed to open codec");
av_dict_free(&opts);
return false;
}
av_dict_free(&opts);
return true;
#endif
return false;
}
SimpleAudio::~SimpleAudio() {
#ifdef USE_FFMPEG
if (frame_)
@ -195,7 +225,7 @@ bool SimpleAudio::Decode(void* inbuf, int inbytes, uint8_t *outbuf, int *outbyte
swrCtx_,
wanted_channel_layout,
AV_SAMPLE_FMT_S16,
44100,
wanted_resample_freq,
dec_channel_layout,
codecCtx_->sample_fmt,
codecCtx_->sample_rate,
@ -242,6 +272,10 @@ int SimpleAudio::getSourcePos(){
return srcPos;
}
void SimpleAudio::setResampleFrequency(int freq){
wanted_resample_freq = freq;
}
void AudioClose(SimpleAudio **ctx) {
#ifdef USE_FFMPEG
delete *ctx;
@ -269,7 +303,6 @@ u32 AuCtx::sceAuDecode(u32 pcmAddr)
auto inbuff = Memory::GetPointer(AuBuf);
auto outbuf = Memory::GetPointer(PCMBuf);
memset(outbuf, 0, PCMBufSize);
u32 outpcmbufsize = 0;
// move inbuff to writePos of buffer
@ -287,7 +320,6 @@ u32 AuCtx::sceAuDecode(u32 pcmAddr)
readPos -= AuBufAvailable;
}
AuBufAvailable = 0;
//break;
}
// count total output pcm size
outpcmbufsize += pcmframesize;
@ -306,60 +338,17 @@ u32 AuCtx::sceAuDecode(u32 pcmAddr)
Memory::Write_U32(PCMBuf, pcmAddr);
return outpcmbufsize;
}
/*
// return output pcm size, <0 error
u32 AuCtx::sceAuDecode(u32 pcmAddr)
{
if (!Memory::IsValidAddress(pcmAddr)){
ERROR_LOG(ME, "%s: output bufferAddress %08x is invalctx", __FUNCTION__, pcmAddr);
return -1;
// if we got zero pcm, and we still haven't reach endPos.
// some game like "Miku" will stop playing if we return 0, but some others will recharge buffer.
// so we did a hack here, clear output buff and just return a nonzero value to continue
if (outpcmbufsize == 0 && readPos < endPos){
// clear output buffer will avoid noise
memset(outbuf, 0, PCMBufSize);
return FF_INPUT_BUFFER_PADDING_SIZE; // return a padding size seems very good and almost unsensible latency.
}
auto inbuff = Memory::GetPointer(AuBuf);
auto outbuf = Memory::GetPointer(PCMBuf);
memset(outbuf, 0, PCMBufSize);
u32 outpcmbufsize = 0;
// move inbuff to writePos of buffer
inbuff += writePos;
// decode frames in AuBuf and output into PCMBuf if it is not exceed
if (AuBufAvailable > 0 && outpcmbufsize < PCMBufSize){
int pcmframesize;
// decode
decoder->Decode(inbuff, AuBufAvailable, outbuf, &pcmframesize);
if (pcmframesize == 0){
// no output pcm, we have either no data or no enough data to decode
// move back audio source readPos to the begin of the last incomplete frame if we not start looping and reset available AuBuf
if (readPos > startPos) { // this means we are not begin to loop yet
readPos -= AuBufAvailable;
}
AuBufAvailable = 0;
//break;
}
// count total output pcm size
outpcmbufsize += pcmframesize;
// count total output samples
SumDecodedSamples += decoder->getOutSamples();
// move inbuff position to next frame
int srcPos = decoder->getSourcePos();
inbuff += srcPos;
// decrease available AuBuf
AuBufAvailable -= srcPos;
// modify the writePos value
writePos += srcPos;
// move outbuff position to the current end of output
outbuf += pcmframesize;
}
Memory::Write_U32(PCMBuf, pcmAddr);
return outpcmbufsize;
}
*/
u32 AuCtx::sceAuGetLoopNum()
{
@ -446,4 +435,8 @@ int AuCtx::sceAuGetSamplingRate(){
u32 AuCtx::sceAuResetPlayPositionByFrame(int position){
readPos = position;
return 0;
}
}
int AuCtx::sceAuGetVersion(){
return Version;
}

View File

@ -21,6 +21,7 @@
#include "base/basictypes.h"
#include "Core/HW/MediaEngine.h"
#include "Core/HLE/sceAudio.h"
#ifdef USE_FFMPEG
@ -53,11 +54,14 @@ public:
bool IsOK() const { return codec_ != 0; }
int getOutSamples();
int getSourcePos();
bool ResetCodecCtx(int channels, int samplerate);
void setResampleFrequency(int freq);
u32 ctxPtr;
int audioType;
int outSamples; // output samples per frame
int srcPos; // source position after decode
int wanted_resample_freq; // wanted resampling rate/frequency
private:
#ifdef USE_FFMPEG
@ -108,6 +112,7 @@ public:
int BitRate;
int SamplingRate;
int Channels;
int Version;
// audio settings
u32 SumDecodedSamples;
@ -147,6 +152,7 @@ public:
int sceAuGetBitRate();
int sceAuGetSamplingRate();
u32 sceAuResetPlayPositionByFrame(int position);
int sceAuGetVersion();
void DoState(PointerWrap &p) {
auto s = p.Section("AuContext", 1);