Universal Audio Class

Based on my implementation in sceAac https://github.com/hrydgard/ppsspp/pull/5836 I've created a class AuCtx included in My SimpleAudioDec.cpp/.h which aims at providing a standard easy implementation to support all codecs in ffmpeg. Here, I also completely re-code sceMp3 file with this class to give an example how to use this class, and it has solved all mp3 issues I've observed in the current master. Tests on different freq and channels mp3 audios as: Miku custom BGM (48kHz, stereo), Hanayaka Nari Wa ga Ichizoku(32kHz, mono, a little fast but better than before now), downstreet panic (44.1kHz, stereo), and learn jp09(44.1kHz, stero) are just all right. Especially, I am very glad to see that Miku's Custom BGMs have no repetition issues in first tone any more and no longer stopped in the first second neither. :) We will come into a new age to fast support new audio formats from now on I hope :P
2025-02-06 21:47:44 +00:00 · 2014-04-11 22:56:59 +02:00 · 2014-04-11 22:56:59 +02:00 · 3a12cf2ad7
commit 3a12cf2ad7
parent c216b535ee
4 changed files with 171 additions and 83 deletions
--- a/Core/HLE/sceAudio.h
+++ b/Core/HLE/sceAudio.h
@ -89,3 +89,5 @@ struct AudioChannel
 extern AudioChannel chans[PSP_AUDIO_CHANNEL_MAX + 1];

 void Register_sceAudio();
+
+u32 sceAudioSetFrequency(u32 freq);
--- a/Core/HLE/sceMp3.cpp
+++ b/Core/HLE/sceMp3.cpp
@ -27,22 +27,6 @@
 #include "Core/Reporting.h"
 #include "Core/HW/SimpleAudioDec.h"

-static const int ID3 = 0x49443300;
-
-#ifdef USE_FFMPEG
-#ifndef PRId64
-#define PRId64  "%llu" 
-#endif
-
-extern "C" {
-#include <libavutil/opt.h>
-#include <libavformat/avformat.h>
-//#include <libavutil/timestamp.h>     // iOS build is not happy with this one.
-#include <libswresample/swresample.h>
-#include <libavutil/samplefmt.h>
-}
-#endif
-
 static std::map<u32, AuCtx *> mp3Map;

 AuCtx *getMp3Ctx(u32 mp3) {
@ -121,6 +105,7 @@ u32 sceMp3ReserveMp3Handle(u32 mp3Addr) {
 	DEBUG_LOG(ME, "startPos %x endPos %x mp3buf %08x mp3bufSize %08x PCMbuf %08x PCMbufSize %08x",
 		Au->startPos, Au->endPos, Au->AuBuf, Au->AuBufSize, Au->PCMBuf, Au->PCMBufSize);

+	Au->audioType = PSP_CODEC_MP3;
 	Au->Channels = 2;
 	Au->SumDecodedSamples = 0;
 	Au->MaxOutputSample = Au->PCMBufSize / 4;
@ -130,7 +115,7 @@ u32 sceMp3ReserveMp3Handle(u32 mp3Addr) {
 	Au->readPos = Au->startPos;

 	// create Au decoder
-	Au->decoder = new SimpleAudio(PSP_CODEC_MP3);
+	Au->decoder = new SimpleAudio(Au->audioType);

 	// close the audio if mp3Addr already exist.
 	if (mp3Map.find(mp3Addr) != mp3Map.end()) {
@ -155,6 +140,86 @@ int sceMp3TermResource() {
 	return 0;
 }

+int __CalculateMp3Channels(int bitval) {
+	if (bitval == 0 || bitval == 1 || bitval == 2) { // Stereo / Joint Stereo / Dual Channel.
+		return 2;
+	}
+	else if (bitval == 3) { // Mono.
+		return 1;
+	}
+	else {
+		return -1;
+	}
+}
+
+int __CalculateMp3SampleRates(int bitval, int mp3version) {
+	if (mp3version == 3) { // MPEG Version 1
+		int valuemapping[] = { 44100, 48000, 32000, -1 };
+		return valuemapping[bitval];
+	}
+	else if (mp3version == 2) { // MPEG Version 2
+		int valuemapping[] = { 22050, 24000, 16000, -1 };
+		return valuemapping[bitval];
+	}
+	else if (mp3version == 0) { // MPEG Version 2.5
+		int valuemapping[] = { 11025, 12000, 8000, -1 };
+		return valuemapping[bitval];
+	}
+	else {
+		return -1;
+	}
+}
+
+int __CalculateMp3Bitrates(int bitval, int mp3version, int mp3layer) {
+	if (mp3version == 3) { // MPEG Version 1
+		if (mp3layer == 3) { // Layer I
+			int valuemapping[] = { 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, -1 };
+			return valuemapping[bitval];
+		}
+		else if (mp3layer == 2) { // Layer II
+			int valuemapping[] = { 0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, -1 };
+			return valuemapping[bitval];
+		}
+		else if (mp3layer == 1) { // Layer III
+			int valuemapping[] = { 0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, -1 };
+			return valuemapping[bitval];
+		}
+		else {
+			return -1;
+		}
+	}
+	else if (mp3version == 2 || mp3version == 0) { // MPEG Version 2 or 2.5
+		if (mp3layer == 3) { // Layer I
+			int valuemapping[] = { 0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, -1 };
+			return valuemapping[bitval];
+		}
+		else if (mp3layer == 1 || mp3layer == 2) { // Layer II or III
+			int valuemapping[] = { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, -1 };
+			return valuemapping[bitval];
+		}
+		else {
+			return -1;
+		}
+	}
+	else {
+		return -1;
+	}
+}
+
+int __ParseMp3Header(AuCtx *ctx, bool *isID3) {
+	int header = bswap32(Memory::Read_U32(ctx->AuBuf));
+	// ID3 tag , can be seen in Hanayaka Nari Wa ga Ichizoku.
+	static const int ID3 = 0x49443300;
+	if ((header & 0xFFFFFF00) == ID3) {
+		*isID3 = true;
+		int size = bswap32(Memory::Read_U32(ctx->AuBuf + ctx->startPos + 6));
+		// Highest bit of each byte has to be ignored (format: 0x7F7F7F7F)
+		size = (size & 0x7F) | ((size & 0x7F00) >> 1) | ((size & 0x7F0000) >> 2) | ((size & 0x7F000000) >> 3);
+		header = bswap32(Memory::Read_U32(ctx->AuBuf + ctx->startPos + 10 + size));
+	}
+	return header;
+}
+
 int sceMp3Init(u32 mp3) {
 	DEBUG_LOG(ME, "sceMp3Init(%08x)", mp3);

@ -164,12 +229,35 @@ int sceMp3Init(u32 mp3) {
 		return -1;
 	}

-	// TODO
-	// if startPos == 0, we have to read the header part of mp3 and get some information
-	// and move startPos to stream data position. Decode from header will not success.
-	if (ctx->startPos == 0){
-		
-		
+	// Parse the Mp3 header
+	bool isID3 = false;
+	int header = __ParseMp3Header(ctx, &isID3);
+	int layer = (header >> 17) & 0x3;
+	ctx->Version = ((header >> 19) & 0x3);
+	ctx->SamplingRate = __CalculateMp3SampleRates((header >> 10) & 0x3, ctx->Version);
+	ctx->Channels = __CalculateMp3Channels((header >> 6) & 0x3);
+	ctx->BitRate = __CalculateMp3Bitrates((header >> 12) & 0xF, ctx->Version, layer);
+	ctx->freq = ctx->SamplingRate;
+
+	INFO_LOG(ME, "sceMp3Init(): channels=%i, samplerate=%iHz, bitrate=%ikbps", ctx->Channels, ctx->SamplingRate, ctx->BitRate);
+
+	// for mp3, if required freq is 48000, reset resampling Frequency to 48000 seems get better sound quality (e.g. Miku Custom BGM)
+	if (ctx->freq == 48000){
+		ctx->decoder->setResampleFrequency(ctx->freq);
+	}
+
+	// For mp3 file, if ID3 tag is detected, we must move startPos and writePos to 0x400 (stream start position), and reduce the available buffer size by 0x400
+	// this is very important for ID3 tag mp3, since our universal audio decoder is for decoding stream part only.
+	if (isID3){
+		// if get ID3 tage, we will decode from 0x400
+		ctx->startPos = 0x400;
+		ctx->writePos = 0x400;
+		ctx->AuBufAvailable -= 0x400;
+	}
+	else{
+		// if no ID3 tag, we will decode from the begining of the file
+		ctx->startPos = 0;
+		ctx->writePos = 0;
 	}

 	return 0;
@ -199,7 +287,7 @@ int sceMp3GetMaxOutputSample(u32 mp3)
 }

 int sceMp3GetSumDecodedSample(u32 mp3) {
-	ERROR_LOG_REPORT(ME, "UNIMPL sceMp3GetSumDecodedSample(%08X)", mp3);
+	DEBUG_LOG_REPORT(ME, "sceMp3GetSumDecodedSample(%08X)", mp3);

 	AuCtx *ctx = getMp3Ctx(mp3);
 	if (!ctx) {
@ -288,7 +376,6 @@ int sceMp3ReleaseMp3Handle(u32 mp3) {
 		return -1;
 	}

-
 	delete ctx;
 	mp3Map.erase(mp3);

@ -311,14 +398,14 @@ u32 sceMp3GetFrameNum(u32 mp3) {
 }

 u32 sceMp3GetMPEGVersion(u32 mp3) {
-	ERROR_LOG(ME, "UNIMPL sceMp3GetMPEGVersion(%08x)", mp3);
+	INFO_LOG(ME, "sceMp3GetMPEGVersion(%08x)", mp3);
 	AuCtx *ctx = getMp3Ctx(mp3);
 	if (!ctx) {
 		ERROR_LOG(ME, "%s: bad mp3 handle %08x", __FUNCTION__, mp3);
 		return -1;
 	}

-	return 0;
+	return ctx->sceAuGetVersion();
 }

 u32 sceMp3ResetPlayPositionByFrame(u32 mp3, int position) {
--- a/Core/HW/SimpleAudioDec.cpp
+++ b/Core/HW/SimpleAudioDec.cpp
@ -61,7 +61,7 @@ bool SimpleAudio::GetAudioCodecID(int audioType){
 }

 SimpleAudio::SimpleAudio(int audioType)
-: codec_(0), codecCtx_(0), swrCtx_(0), audioType(audioType), outSamples(0){
+: codec_(0), codecCtx_(0), swrCtx_(0), audioType(audioType), outSamples(0), wanted_resample_freq(44100){
 #ifdef USE_FFMPEG
 	avcodec_register_all();
 	av_register_all();
@ -103,7 +103,7 @@ SimpleAudio::SimpleAudio(int audioType)


 SimpleAudio::SimpleAudio(u32 ctxPtr, int audioType)
-: codec_(0), codecCtx_(0), swrCtx_(0), ctxPtr(ctxPtr), audioType(audioType), outSamples(0){
+: codec_(0), codecCtx_(0), swrCtx_(0), ctxPtr(ctxPtr), audioType(audioType), outSamples(0), wanted_resample_freq(44100){
 #ifdef USE_FFMPEG
 	avcodec_register_all();
 	av_register_all();
@ -136,6 +136,7 @@ SimpleAudio::SimpleAudio(u32 ctxPtr, int audioType)
 	AVDictionary *opts = 0;
 	if (avcodec_open2(codecCtx_, codec_, &opts) < 0) {
 		ERROR_LOG(ME, "Failed to open codec");
+		av_dict_free(&opts);
 		return;
 	}

@ -143,6 +144,35 @@ SimpleAudio::SimpleAudio(u32 ctxPtr, int audioType)
 #endif  // USE_FFMPEG
 }

+bool SimpleAudio::ResetCodecCtx(int channels, int samplerate){
+#ifdef USE_FFMPEG
+	if (codecCtx_)
+		avcodec_close(codecCtx_);
+
+	// Find decoder
+	codec_ = avcodec_find_decoder(audioCodecId);
+	if (!codec_) {
+		// Eh, we shouldn't even have managed to compile. But meh.
+		ERROR_LOG(ME, "This version of FFMPEG does not support AV_CODEC_ctx for audio (%s). Update your submodule.", GetCodecName(audioType));
+		return false;
+	}
+
+	codecCtx_->channels = channels;
+	codecCtx_->channel_layout = channels==2?AV_CH_LAYOUT_STEREO:AV_CH_LAYOUT_MONO;
+	codecCtx_->sample_rate = samplerate;
+	// Open codec
+	AVDictionary *opts = 0;
+	if (avcodec_open2(codecCtx_, codec_, &opts) < 0) {
+		ERROR_LOG(ME, "Failed to open codec");
+		av_dict_free(&opts);
+		return false;
+	}
+	av_dict_free(&opts);
+	return true;
+#endif
+	return false;
+}
+
 SimpleAudio::~SimpleAudio() {
 #ifdef USE_FFMPEG
 	if (frame_)
@ -195,7 +225,7 @@ bool SimpleAudio::Decode(void* inbuf, int inbytes, uint8_t *outbuf, int *outbyte
 			swrCtx_,
 			wanted_channel_layout,
 			AV_SAMPLE_FMT_S16,
-			44100,
+			wanted_resample_freq,
 			dec_channel_layout,
 			codecCtx_->sample_fmt,
 			codecCtx_->sample_rate,
@ -242,6 +272,10 @@ int SimpleAudio::getSourcePos(){
 	return srcPos;
 }

+void SimpleAudio::setResampleFrequency(int freq){
+	wanted_resample_freq = freq;
+}
+
 void AudioClose(SimpleAudio **ctx) {
 #ifdef USE_FFMPEG
 	delete *ctx;
@ -269,7 +303,6 @@ u32 AuCtx::sceAuDecode(u32 pcmAddr)

 	auto inbuff = Memory::GetPointer(AuBuf);
 	auto outbuf = Memory::GetPointer(PCMBuf);
-	memset(outbuf, 0, PCMBufSize);
 	u32 outpcmbufsize = 0;

 	// move inbuff to writePos of buffer
@ -287,7 +320,6 @@ u32 AuCtx::sceAuDecode(u32 pcmAddr)
 				readPos -= AuBufAvailable;
 			}
 			AuBufAvailable = 0;
-			//break;
 		}
 		// count total output pcm size 
 		outpcmbufsize += pcmframesize;
@ -306,60 +338,17 @@ u32 AuCtx::sceAuDecode(u32 pcmAddr)

 	Memory::Write_U32(PCMBuf, pcmAddr);

-	return outpcmbufsize;
-}
-
-/*
-// return output pcm size, <0 error
-u32 AuCtx::sceAuDecode(u32 pcmAddr)
-{
-	if (!Memory::IsValidAddress(pcmAddr)){
-		ERROR_LOG(ME, "%s: output bufferAddress %08x is invalctx", __FUNCTION__, pcmAddr);
-		return -1;
+	// if we got zero pcm, and we still haven't reach endPos. 
+	// some game like "Miku" will stop playing if we return 0, but some others will recharge buffer.
+	// so we did a hack here, clear output buff and just return a nonzero value to continue 
+	if (outpcmbufsize == 0 && readPos < endPos){
+		// clear output buffer will avoid noise
+		memset(outbuf, 0, PCMBufSize);
+		return FF_INPUT_BUFFER_PADDING_SIZE; // return a padding size seems very good and almost unsensible latency.
 	}

-	auto inbuff = Memory::GetPointer(AuBuf);
-	auto outbuf = Memory::GetPointer(PCMBuf);
-	memset(outbuf, 0, PCMBufSize);
-	u32 outpcmbufsize = 0;
-
-	// move inbuff to writePos of buffer
-	inbuff += writePos;
-
-	// decode frames in AuBuf and output into PCMBuf if it is not exceed
-	if (AuBufAvailable > 0 && outpcmbufsize < PCMBufSize){
-		int pcmframesize;
-		// decode
-		decoder->Decode(inbuff, AuBufAvailable, outbuf, &pcmframesize);
-		if (pcmframesize == 0){
-			// no output pcm, we have either no data or no enough data to decode
-			// move back audio source readPos to the begin of the last incomplete frame if we not start looping and reset available AuBuf
-			if (readPos > startPos) { // this means we are not begin to loop yet
-				readPos -= AuBufAvailable;
-			}
-			AuBufAvailable = 0;
-			//break;
-		}
-		// count total output pcm size 
-		outpcmbufsize += pcmframesize;
-		// count total output samples
-		SumDecodedSamples += decoder->getOutSamples();
-		// move inbuff position to next frame
-		int srcPos = decoder->getSourcePos();
-		inbuff += srcPos;
-		// decrease available AuBuf
-		AuBufAvailable -= srcPos;
-		// modify the writePos value
-		writePos += srcPos;
-		// move outbuff position to the current end of output 
-		outbuf += pcmframesize;
-	}
-
-	Memory::Write_U32(PCMBuf, pcmAddr);
-
 	return outpcmbufsize;
 }
-*/

 u32 AuCtx::sceAuGetLoopNum()
 {
@ -446,4 +435,8 @@ int AuCtx::sceAuGetSamplingRate(){
 u32 AuCtx::sceAuResetPlayPositionByFrame(int position){
 	readPos = position;
 	return 0;
-}
+}
+
+int AuCtx::sceAuGetVersion(){
+	return Version;
+}
--- a/Core/HW/SimpleAudioDec.h
+++ b/Core/HW/SimpleAudioDec.h
@ -21,6 +21,7 @@

 #include "base/basictypes.h"
 #include "Core/HW/MediaEngine.h"
+#include "Core/HLE/sceAudio.h"

 #ifdef USE_FFMPEG

@ -53,11 +54,14 @@ public:
 	bool IsOK() const { return codec_ != 0; }
 	int getOutSamples();
 	int getSourcePos();
+	bool ResetCodecCtx(int channels, int samplerate);
+	void setResampleFrequency(int freq);

 	u32 ctxPtr;
 	int audioType;
 	int outSamples; // output samples per frame
 	int srcPos; // source position after decode 
+	int wanted_resample_freq; // wanted resampling rate/frequency

 private:
 #ifdef USE_FFMPEG
@ -108,6 +112,7 @@ public:
 	int BitRate;
 	int SamplingRate;
 	int Channels;
+	int Version;

 	// audio settings
 	u32 SumDecodedSamples;
@ -147,6 +152,7 @@ public:
 	int sceAuGetBitRate();
 	int sceAuGetSamplingRate();
 	u32 sceAuResetPlayPositionByFrame(int position);
+	int sceAuGetVersion();

 	void DoState(PointerWrap &p) {
 		auto s = p.Section("AuContext", 1);