mirror of
https://github.com/RPCS3/soundtouch.git
synced 2024-11-23 19:39:47 +00:00
Merge branch 'bpm-work' into 'master'
BPM algorithm work - improved beat analysis routine and added individual beat detection See merge request soundtouch/soundtouch!1
This commit is contained in:
commit
8f6f91f9b3
13
README.html
13
README.html
@ -13,8 +13,8 @@
|
||||
</head>
|
||||
<body class="normal">
|
||||
<hr>
|
||||
<h1>SoundTouch audio processing library v2.0.1pre</h1>
|
||||
<p class="normal">SoundTouch library Copyright © Olli Parviainen 2001-2017</p>
|
||||
<h1>SoundTouch audio processing library v2.1pre</h1>
|
||||
<p class="normal">SoundTouch library Copyright © Olli Parviainen 2001-2018</p>
|
||||
<hr>
|
||||
<h2>1. Introduction </h2>
|
||||
<p>SoundTouch is an open-source audio processing library that allows
|
||||
@ -573,7 +573,7 @@ this corresponds to lowering the pitch by -0.318 semitones:</p>
|
||||
<hr>
|
||||
<h2>5. Change History</h2>
|
||||
<h3>5.1. SoundTouch library Change History </h3>
|
||||
<p><b>2.0.1pre:</b></p>
|
||||
<p><b>2.1pre:</b></p>
|
||||
<ul>
|
||||
<li>Refactored C# interface example</li>
|
||||
<li>Disable anti-alias filter when switch
|
||||
@ -581,7 +581,10 @@ this corresponds to lowering the pitch by -0.318 semitones:</p>
|
||||
filter cause slight click if the rate change crosses zero during
|
||||
processing</li>
|
||||
<li>Added script for building SoundTouchDll dynamic-link-library for GNU platforms</li>
|
||||
<li>Added BPM functions to SoundTouchDll API</li>
|
||||
<li>Rewrote Beats-per-Minute analysis algorithm for more reliable BPM
|
||||
detection</li>
|
||||
<li>Added BPM functions to SoundTouchDll API</li>
|
||||
<li>Migrated Visual Studio project files to MSVC 201x format</li>
|
||||
</ul>
|
||||
<p><b>2.0:</b></p>
|
||||
<ul>
|
||||
@ -901,6 +904,8 @@ General Public License for more details.</p>
|
||||
<p>You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA</p>
|
||||
<p>---</p>
|
||||
<p>commercial license alternative also available, contact author for details.</p>
|
||||
<hr>
|
||||
<p><i>README.html file updated in May-2018</i></p>
|
||||
</body>
|
||||
|
@ -50,102 +50,232 @@
|
||||
#ifndef _BPMDetect_H_
|
||||
#define _BPMDetect_H_
|
||||
|
||||
#include <vector>
|
||||
#include "STTypes.h"
|
||||
#include "FIFOSampleBuffer.h"
|
||||
|
||||
namespace soundtouch
|
||||
{
|
||||
|
||||
/// Minimum allowed BPM rate. Used to restrict accepted result above a reasonable limit.
|
||||
#define MIN_BPM 29
|
||||
/// Minimum allowed BPM rate. Used to restrict accepted result above a reasonable limit.
|
||||
#define MIN_BPM 45
|
||||
|
||||
/// Maximum allowed BPM rate. Used to restrict accepted result below a reasonable limit.
|
||||
#define MAX_BPM 200
|
||||
/// Maximum allowed BPM rate range. Used for calculating algorithm parametrs
|
||||
#define MAX_BPM_RANGE 200
|
||||
|
||||
/// Maximum allowed BPM rate range. Used to restrict accepted result below a reasonable limit.
|
||||
#define MAX_BPM_VALID 190
|
||||
|
||||
|
||||
/// Class for calculating BPM rate for audio data.
|
||||
class BPMDetect
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
class BeatCollection
|
||||
{
|
||||
protected:
|
||||
/// Auto-correlation accumulator bins.
|
||||
float *xcorr;
|
||||
|
||||
/// Sample average counter.
|
||||
int decimateCount;
|
||||
private:
|
||||
|
||||
/// Sample average accumulator for FIFO-like decimation.
|
||||
soundtouch::LONG_SAMPLETYPE decimateSum;
|
||||
int size;
|
||||
|
||||
/// Decimate sound by this coefficient to reach approx. 500 Hz.
|
||||
int decimateBy;
|
||||
|
||||
/// Auto-correlation window length
|
||||
int windowLen;
|
||||
|
||||
/// Number of channels (1 = mono, 2 = stereo)
|
||||
int channels;
|
||||
|
||||
/// sample rate
|
||||
int sampleRate;
|
||||
|
||||
/// Beginning of auto-correlation window: Autocorrelation isn't being updated for
|
||||
/// the first these many correlation bins.
|
||||
int windowStart;
|
||||
|
||||
/// FIFO-buffer for decimated processing samples.
|
||||
soundtouch::FIFOSampleBuffer *buffer;
|
||||
|
||||
/// Updates auto-correlation function for given number of decimated samples that
|
||||
/// are read from the internal 'buffer' pipe (samples aren't removed from the pipe
|
||||
/// though).
|
||||
void updateXCorr(int process_samples /// How many samples are processed.
|
||||
);
|
||||
|
||||
/// Decimates samples to approx. 500 Hz.
|
||||
///
|
||||
/// \return Number of output samples.
|
||||
int decimate(soundtouch::SAMPLETYPE *dest, ///< Destination buffer
|
||||
const soundtouch::SAMPLETYPE *src, ///< Source sample buffer
|
||||
int numsamples ///< Number of source samples.
|
||||
);
|
||||
|
||||
/// Calculates amplitude envelope for the buffer of samples.
|
||||
/// Result is output to 'samples'.
|
||||
void calcEnvelope(soundtouch::SAMPLETYPE *samples, ///< Pointer to input/output data buffer
|
||||
int numsamples ///< Number of samples in buffer
|
||||
);
|
||||
|
||||
/// remove constant bias from xcorr data
|
||||
void removeBias();
|
||||
// Ensure there's enough capacity in arrays
|
||||
void EnsureCapacity(int newCapacity)
|
||||
{
|
||||
if (newCapacity > size)
|
||||
{
|
||||
// enlarge arrays
|
||||
int oldSize = size;
|
||||
float *beatPosOld = beatPos;
|
||||
float *beatValuesOld = beatValues;
|
||||
while (size < newCapacity) size *= 2;
|
||||
printf("Alloc more %d\n", size);
|
||||
beatPos = new float[size];
|
||||
beatValues = new float[size];
|
||||
if ((beatPos == NULL) || (beatValues == NULL))
|
||||
{
|
||||
ST_THROW_RT_ERROR("can't allocate memory");
|
||||
}
|
||||
// copy old arrays to new arrays
|
||||
memcpy(beatPos, beatPosOld, sizeof(float)*oldSize);
|
||||
memcpy(beatValues, beatValuesOld, sizeof(float)*oldSize);
|
||||
// free old arrays
|
||||
delete[] beatPosOld;
|
||||
delete[] beatValuesOld;
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
/// Constructor.
|
||||
BPMDetect(int numChannels, ///< Number of channels in sample data.
|
||||
int sampleRate ///< Sample rate in Hz.
|
||||
);
|
||||
// beat position array
|
||||
float *beatPos;
|
||||
|
||||
/// Destructor.
|
||||
virtual ~BPMDetect();
|
||||
// beat values array
|
||||
float *beatValues;
|
||||
|
||||
/// Inputs a block of samples for analyzing: Envelopes the samples and then
|
||||
/// updates the autocorrelation estimation. When whole song data has been input
|
||||
/// in smaller blocks using this function, read the resulting bpm with 'getBpm'
|
||||
/// function.
|
||||
///
|
||||
/// Notice that data in 'samples' array can be disrupted in processing.
|
||||
void inputSamples(const soundtouch::SAMPLETYPE *samples, ///< Pointer to input/working data buffer
|
||||
int numSamples ///< Number of samples in buffer
|
||||
);
|
||||
// number of beats in arrays
|
||||
int numBeats;
|
||||
|
||||
// constructor
|
||||
BeatCollection()
|
||||
{
|
||||
numBeats = 0;
|
||||
size = 1024;
|
||||
beatPos = new float[size];
|
||||
beatValues = new float[size];
|
||||
}
|
||||
|
||||
|
||||
/// Analyzes the results and returns the BPM rate. Use this function to read result
|
||||
/// after whole song data has been input to the class by consecutive calls of
|
||||
/// 'inputSamples' function.
|
||||
///
|
||||
/// \return Beats-per-minute rate, or zero if detection failed.
|
||||
float getBpm();
|
||||
// destructor
|
||||
~BeatCollection()
|
||||
{
|
||||
delete[] beatPos;
|
||||
delete[] beatValues;
|
||||
}
|
||||
|
||||
|
||||
// add new beat position into array
|
||||
void Add(float pos, float value)
|
||||
{
|
||||
EnsureCapacity(numBeats + 1);
|
||||
beatPos[numBeats] = pos;
|
||||
beatValues[numBeats] = value;
|
||||
numBeats++;
|
||||
}
|
||||
|
||||
};
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float pos;
|
||||
float strength;
|
||||
} BEAT;
|
||||
|
||||
|
||||
class IIR2_filter
|
||||
{
|
||||
double coeffs[5];
|
||||
double prev[5];
|
||||
|
||||
public:
|
||||
IIR2_filter(const double *lpf_coeffs);
|
||||
float update(float x);
|
||||
};
|
||||
|
||||
|
||||
/// Class for calculating BPM rate for audio data.
|
||||
class BPMDetect
|
||||
{
|
||||
protected:
|
||||
/// Auto-correlation accumulator bins.
|
||||
float *xcorr;
|
||||
|
||||
/// Sample average counter.
|
||||
int decimateCount;
|
||||
|
||||
/// Sample average accumulator for FIFO-like decimation.
|
||||
soundtouch::LONG_SAMPLETYPE decimateSum;
|
||||
|
||||
/// Decimate sound by this coefficient to reach approx. 500 Hz.
|
||||
int decimateBy;
|
||||
|
||||
/// Auto-correlation window length
|
||||
int windowLen;
|
||||
|
||||
/// Number of channels (1 = mono, 2 = stereo)
|
||||
int channels;
|
||||
|
||||
/// sample rate
|
||||
int sampleRate;
|
||||
|
||||
/// Beginning of auto-correlation window: Autocorrelation isn't being updated for
|
||||
/// the first these many correlation bins.
|
||||
int windowStart;
|
||||
|
||||
/// window functions for data preconditioning
|
||||
float *hamw;
|
||||
float *hamw2;
|
||||
|
||||
// beat detection variables
|
||||
int pos;
|
||||
int peakPos;
|
||||
int beatcorr_ringbuffpos;
|
||||
int init_scaler;
|
||||
float peakVal;
|
||||
float *beatcorr_ringbuff;
|
||||
|
||||
/// FIFO-buffer for decimated processing samples.
|
||||
soundtouch::FIFOSampleBuffer *buffer;
|
||||
|
||||
/// Collection of detected beat positions
|
||||
//BeatCollection beats;
|
||||
std::vector<BEAT> beats;
|
||||
|
||||
// 2nd order low-pass-filter
|
||||
IIR2_filter beat_lpf;
|
||||
|
||||
/// Updates auto-correlation function for given number of decimated samples that
|
||||
/// are read from the internal 'buffer' pipe (samples aren't removed from the pipe
|
||||
/// though).
|
||||
void updateXCorr(int process_samples /// How many samples are processed.
|
||||
);
|
||||
|
||||
/// Decimates samples to approx. 500 Hz.
|
||||
///
|
||||
/// \return Number of output samples.
|
||||
int decimate(soundtouch::SAMPLETYPE *dest, ///< Destination buffer
|
||||
const soundtouch::SAMPLETYPE *src, ///< Source sample buffer
|
||||
int numsamples ///< Number of source samples.
|
||||
);
|
||||
|
||||
/// Calculates amplitude envelope for the buffer of samples.
|
||||
/// Result is output to 'samples'.
|
||||
void calcEnvelope(soundtouch::SAMPLETYPE *samples, ///< Pointer to input/output data buffer
|
||||
int numsamples ///< Number of samples in buffer
|
||||
);
|
||||
|
||||
/// remove constant bias from xcorr data
|
||||
void removeBias();
|
||||
|
||||
// Detect individual beat positions
|
||||
void updateBeatPos(int process_samples);
|
||||
|
||||
|
||||
public:
|
||||
/// Constructor.
|
||||
BPMDetect(int numChannels, ///< Number of channels in sample data.
|
||||
int sampleRate ///< Sample rate in Hz.
|
||||
);
|
||||
|
||||
/// Destructor.
|
||||
virtual ~BPMDetect();
|
||||
|
||||
/// Inputs a block of samples for analyzing: Envelopes the samples and then
|
||||
/// updates the autocorrelation estimation. When whole song data has been input
|
||||
/// in smaller blocks using this function, read the resulting bpm with 'getBpm'
|
||||
/// function.
|
||||
///
|
||||
/// Notice that data in 'samples' array can be disrupted in processing.
|
||||
void inputSamples(const soundtouch::SAMPLETYPE *samples, ///< Pointer to input/working data buffer
|
||||
int numSamples ///< Number of samples in buffer
|
||||
);
|
||||
|
||||
|
||||
/// Analyzes the results and returns the BPM rate. Use this function to read result
|
||||
/// after whole song data has been input to the class by consecutive calls of
|
||||
/// 'inputSamples' function.
|
||||
///
|
||||
/// \return Beats-per-minute rate, or zero if detection failed.
|
||||
float getBpm();
|
||||
|
||||
|
||||
/// Get beat position arrays. Note: The array includes also really low beat detection values
|
||||
/// in absence of clear strong beats. Consumer may wish to filter low values away.
|
||||
/// - "pos" receive array of beat positions
|
||||
/// - "values" receive array of beat detection strengths
|
||||
/// - max_num indicates max.size of "pos" and "values" array.
|
||||
///
|
||||
/// You can query a suitable array sized by calling this with NULL in "pos" & "values".
|
||||
///
|
||||
/// \return number of beats in the arrays.
|
||||
int getBeats(float *pos, float *strength, int max_num);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // _BPMDetect_H_
|
||||
|
@ -72,10 +72,10 @@ namespace soundtouch
|
||||
{
|
||||
|
||||
/// Soundtouch library version string
|
||||
#define SOUNDTOUCH_VERSION "2.0.1pre"
|
||||
#define SOUNDTOUCH_VERSION "2.1pre"
|
||||
|
||||
/// SoundTouch library version id
|
||||
#define SOUNDTOUCH_VERSION_ID (20001)
|
||||
#define SOUNDTOUCH_VERSION_ID (20009)
|
||||
|
||||
//
|
||||
// Available setting IDs for the 'setSetting' & 'get_setting' functions:
|
||||
|
@ -58,8 +58,8 @@ using namespace std;
|
||||
|
||||
static const char _helloText[] =
|
||||
"\n"
|
||||
" SoundStretch v%s - Copyright (c) Olli Parviainen 2001 - 2017\n"
|
||||
"==================================================================\n"
|
||||
" SoundStretch v%s - Copyright (c) Olli Parviainen\n"
|
||||
"=========================================================\n"
|
||||
"author e-mail: <oparviai"
|
||||
"@"
|
||||
"iki.fi> - WWW: http://www.surina.net/soundtouch\n"
|
||||
|
@ -47,6 +47,8 @@
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define _USE_MATH_DEFINES
|
||||
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
@ -57,40 +59,49 @@
|
||||
|
||||
using namespace soundtouch;
|
||||
|
||||
#define INPUT_BLOCK_SAMPLES 2048
|
||||
#define DECIMATED_BLOCK_SAMPLES 256
|
||||
// algorithm input sample block size
|
||||
static const int INPUT_BLOCK_SIZE = 2048;
|
||||
|
||||
// decimated sample block size
|
||||
static const int DECIMATED_BLOCK_SIZE = 256;
|
||||
|
||||
/// Target sample rate after decimation
|
||||
const int target_srate = 1000;
|
||||
static const int TARGET_SRATE = 1000;
|
||||
|
||||
/// XCorr update sequence size, update in about 200msec chunks
|
||||
const int xcorr_update_sequence = 200;
|
||||
static const int XCORR_UPDATE_SEQUENCE = (int)(TARGET_SRATE / 5);
|
||||
|
||||
/// Moving average N size
|
||||
static const int MOVING_AVERAGE_N = 15;
|
||||
|
||||
/// XCorr decay time constant, decay to half in 30 seconds
|
||||
/// If it's desired to have the system adapt quicker to beat rate
|
||||
/// changes within a continuing music stream, then the
|
||||
/// 'xcorr_decay_time_constant' value can be reduced, yet that
|
||||
/// can increase possibility of glitches in bpm detection.
|
||||
const double xcorr_decay_time_constant = 30.0;
|
||||
static const double XCORR_DECAY_TIME_CONSTANT = 30.0;
|
||||
|
||||
/// Data overlap factor for beat detection algorithm
|
||||
static const int OVERLAP_FACTOR = 4;
|
||||
|
||||
static const double TWOPI = (2 * M_PI);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Enable following define to create bpm analysis file:
|
||||
|
||||
// #define _CREATE_BPM_DEBUG_FILE
|
||||
//#define _CREATE_BPM_DEBUG_FILE
|
||||
|
||||
#ifdef _CREATE_BPM_DEBUG_FILE
|
||||
|
||||
#define DEBUGFILE_NAME "c:\\temp\\soundtouch-bpm-debug.txt"
|
||||
|
||||
static void _SaveDebugData(const float *data, int minpos, int maxpos, double coeff)
|
||||
static void _SaveDebugData(const char *name, const float *data, int minpos, int maxpos, double coeff)
|
||||
{
|
||||
FILE *fptr = fopen(DEBUGFILE_NAME, "wt");
|
||||
FILE *fptr = fopen(name, "wt");
|
||||
int i;
|
||||
|
||||
if (fptr)
|
||||
{
|
||||
printf("\n\nWriting BPM debug data into file " DEBUGFILE_NAME "\n\n");
|
||||
printf("\nWriting BPM debug data into file %s\n", name);
|
||||
for (i = minpos; i < maxpos; i ++)
|
||||
{
|
||||
fprintf(fptr, "%d\t%.1lf\t%f\n", i, coeff / (double)i, data[i]);
|
||||
@ -98,15 +109,74 @@ const double xcorr_decay_time_constant = 30.0;
|
||||
fclose(fptr);
|
||||
}
|
||||
}
|
||||
|
||||
void _SaveDebugBeatPos(const char *name, const std::vector<BEAT> &beats)
|
||||
{
|
||||
printf("\nWriting beat detections data into file %s\n", name);
|
||||
|
||||
FILE *fptr = fopen(name, "wt");
|
||||
if (fptr)
|
||||
{
|
||||
for (uint i = 0; i < beats.size(); i++)
|
||||
{
|
||||
BEAT b = beats[i];
|
||||
fprintf(fptr, "%lf\t%lf\n", b.pos, b.strength);
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
}
|
||||
#else
|
||||
#define _SaveDebugData(a,b,c,d)
|
||||
#define _SaveDebugData(name, a,b,c,d)
|
||||
#define _SaveDebugBeatPos(name, b)
|
||||
#endif
|
||||
|
||||
// Hamming window
|
||||
void hamming(float *w, int N)
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
w[i] = (float)(0.54 - 0.46 * cos(TWOPI * i / (N - 1)));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IIR2_filter - 2nd order IIR filter
|
||||
|
||||
IIR2_filter::IIR2_filter(const double *lpf_coeffs)
|
||||
{
|
||||
memcpy(coeffs, lpf_coeffs, 5 * sizeof(double));
|
||||
memset(prev, 0, sizeof(prev));
|
||||
}
|
||||
|
||||
|
||||
float IIR2_filter::update(float x)
|
||||
{
|
||||
prev[0] = x;
|
||||
double y = x * coeffs[0];
|
||||
|
||||
for (int i = 4; i >= 1; i--)
|
||||
{
|
||||
y += coeffs[i] * prev[i];
|
||||
prev[i] = prev[i - 1];
|
||||
}
|
||||
|
||||
prev[3] = y;
|
||||
return (float)y;
|
||||
}
|
||||
|
||||
|
||||
// IIR low-pass filter coefficients, calculated with matlab/octave cheby2(2,40,0.05)
|
||||
const double _LPF_coeffs[5] = { 0.00996655391939, -0.01944529148401, 0.00996655391939, 1.96867605796247, -0.96916387431724 };
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
BPMDetect::BPMDetect(int numChannels, int aSampleRate)
|
||||
BPMDetect::BPMDetect(int numChannels, int aSampleRate) :
|
||||
beat_lpf(_LPF_coeffs)
|
||||
{
|
||||
beats.reserve(250); // initial reservation to prevent frequent reallocation
|
||||
|
||||
this->sampleRate = aSampleRate;
|
||||
this->channels = numChannels;
|
||||
|
||||
@ -114,13 +184,13 @@ BPMDetect::BPMDetect(int numChannels, int aSampleRate)
|
||||
decimateCount = 0;
|
||||
|
||||
// choose decimation factor so that result is approx. 1000 Hz
|
||||
decimateBy = sampleRate / target_srate;
|
||||
decimateBy = sampleRate / TARGET_SRATE;
|
||||
assert(decimateBy > 0);
|
||||
assert(INPUT_BLOCK_SAMPLES < decimateBy * DECIMATED_BLOCK_SAMPLES);
|
||||
assert(INPUT_BLOCK_SIZE < decimateBy * DECIMATED_BLOCK_SIZE);
|
||||
|
||||
// Calculate window length & starting item according to desired min & max bpms
|
||||
windowLen = (60 * sampleRate) / (decimateBy * MIN_BPM);
|
||||
windowStart = (60 * sampleRate) / (decimateBy * MAX_BPM);
|
||||
windowStart = (60 * sampleRate) / (decimateBy * MAX_BPM_RANGE);
|
||||
|
||||
assert(windowLen > windowStart);
|
||||
|
||||
@ -128,23 +198,38 @@ BPMDetect::BPMDetect(int numChannels, int aSampleRate)
|
||||
xcorr = new float[windowLen];
|
||||
memset(xcorr, 0, windowLen * sizeof(float));
|
||||
|
||||
pos = 0;
|
||||
peakPos = 0;
|
||||
peakVal = 0;
|
||||
init_scaler = 1;
|
||||
beatcorr_ringbuffpos = 0;
|
||||
beatcorr_ringbuff = new float[windowLen];
|
||||
memset(beatcorr_ringbuff, 0, windowLen * sizeof(float));
|
||||
|
||||
// allocate processing buffer
|
||||
buffer = new FIFOSampleBuffer();
|
||||
// we do processing in mono mode
|
||||
buffer->setChannels(1);
|
||||
buffer->clear();
|
||||
}
|
||||
|
||||
// calculate hamming windows
|
||||
hamw = new float[XCORR_UPDATE_SEQUENCE];
|
||||
hamming(hamw, XCORR_UPDATE_SEQUENCE);
|
||||
hamw2 = new float[XCORR_UPDATE_SEQUENCE / 2];
|
||||
hamming(hamw2, XCORR_UPDATE_SEQUENCE / 2);
|
||||
}
|
||||
|
||||
|
||||
BPMDetect::~BPMDetect()
|
||||
{
|
||||
delete[] xcorr;
|
||||
delete[] beatcorr_ringbuff;
|
||||
delete[] hamw;
|
||||
delete[] hamw2;
|
||||
delete buffer;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// convert to mono, low-pass filter & decimate to about 500 Hz.
|
||||
/// return number of outputted samples.
|
||||
///
|
||||
@ -201,7 +286,6 @@ int BPMDetect::decimate(SAMPLETYPE *dest, const SAMPLETYPE *src, int numsamples)
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Calculates autocorrelation function of the sample history buffer
|
||||
void BPMDetect::updateXCorr(int process_samples)
|
||||
{
|
||||
@ -209,22 +293,30 @@ void BPMDetect::updateXCorr(int process_samples)
|
||||
SAMPLETYPE *pBuffer;
|
||||
|
||||
assert(buffer->numSamples() >= (uint)(process_samples + windowLen));
|
||||
assert(process_samples == XCORR_UPDATE_SEQUENCE);
|
||||
|
||||
pBuffer = buffer->ptrBegin();
|
||||
|
||||
// calculate decay factor for xcorr filtering
|
||||
float xcorr_decay = (float)pow(0.5, 1.0 / (xcorr_decay_time_constant * target_srate / process_samples));
|
||||
float xcorr_decay = (float)pow(0.5, 1.0 / (XCORR_DECAY_TIME_CONSTANT * TARGET_SRATE / process_samples));
|
||||
|
||||
// prescale pbuffer
|
||||
float tmp[XCORR_UPDATE_SEQUENCE];
|
||||
for (int i = 0; i < process_samples; i++)
|
||||
{
|
||||
tmp[i] = hamw[i] * hamw[i] * pBuffer[i];
|
||||
}
|
||||
|
||||
#pragma omp parallel for
|
||||
for (offs = windowStart; offs < windowLen; offs ++)
|
||||
{
|
||||
LONG_SAMPLETYPE sum;
|
||||
double sum;
|
||||
int i;
|
||||
|
||||
sum = 0;
|
||||
for (i = 0; i < process_samples; i ++)
|
||||
{
|
||||
sum += pBuffer[i] * pBuffer[i + offs]; // scaling the sub-result shouldn't be necessary
|
||||
sum += tmp[i] * pBuffer[i + offs]; // scaling the sub-result shouldn't be necessary
|
||||
}
|
||||
xcorr[offs] *= xcorr_decay; // decay 'xcorr' here with suitable time constant.
|
||||
|
||||
@ -233,10 +325,92 @@ void BPMDetect::updateXCorr(int process_samples)
|
||||
}
|
||||
|
||||
|
||||
// Detect individual beat positions
|
||||
void BPMDetect::updateBeatPos(int process_samples)
|
||||
{
|
||||
SAMPLETYPE *pBuffer;
|
||||
|
||||
assert(buffer->numSamples() >= (uint)(process_samples + windowLen));
|
||||
|
||||
pBuffer = buffer->ptrBegin();
|
||||
assert(process_samples == XCORR_UPDATE_SEQUENCE / 2);
|
||||
|
||||
// static double thr = 0.0003;
|
||||
double posScale = (double)this->decimateBy / (double)this->sampleRate;
|
||||
int resetDur = (int)(0.12 / posScale + 0.5);
|
||||
double corrScale = 1.0 / (double)(windowLen - windowStart);
|
||||
|
||||
// prescale pbuffer
|
||||
float tmp[XCORR_UPDATE_SEQUENCE / 2];
|
||||
for (int i = 0; i < process_samples; i++)
|
||||
{
|
||||
tmp[i] = hamw2[i] * hamw2[i] * pBuffer[i];
|
||||
}
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int offs = windowStart; offs < windowLen; offs++)
|
||||
{
|
||||
double sum = 0;
|
||||
for (int i = 0; i < process_samples; i++)
|
||||
{
|
||||
sum += tmp[i] * pBuffer[offs + i];
|
||||
}
|
||||
beatcorr_ringbuff[(beatcorr_ringbuffpos + offs) % windowLen] += (float)((sum > 0) ? sum : 0); // accumulate only positive correlations
|
||||
}
|
||||
|
||||
int skipstep = XCORR_UPDATE_SEQUENCE / OVERLAP_FACTOR;
|
||||
|
||||
// compensate empty buffer at beginning by scaling coefficient
|
||||
float scale = (float)windowLen / (float)(skipstep * init_scaler);
|
||||
if (scale > 1.0f)
|
||||
{
|
||||
init_scaler++;
|
||||
}
|
||||
else
|
||||
{
|
||||
scale = 1.0f;
|
||||
}
|
||||
|
||||
// detect beats
|
||||
for (int i = 0; i < skipstep; i++)
|
||||
{
|
||||
LONG_SAMPLETYPE max = 0;
|
||||
|
||||
float sum = beatcorr_ringbuff[beatcorr_ringbuffpos];
|
||||
sum -= beat_lpf.update(sum);
|
||||
|
||||
if (sum > peakVal)
|
||||
{
|
||||
// found new local largest value
|
||||
peakVal = sum;
|
||||
peakPos = pos;
|
||||
}
|
||||
if (pos > peakPos + resetDur)
|
||||
{
|
||||
// largest value not updated for 200msec => accept as beat
|
||||
peakPos += skipstep;
|
||||
if (peakVal > 0)
|
||||
{
|
||||
// add detected beat to end of "beats" vector
|
||||
beats.push_back({ (float)(peakPos * posScale), (float)(peakVal * scale) });
|
||||
}
|
||||
|
||||
peakVal = 0;
|
||||
peakPos = pos;
|
||||
}
|
||||
|
||||
beatcorr_ringbuff[beatcorr_ringbuffpos] = 0;
|
||||
pos++;
|
||||
beatcorr_ringbuffpos = (beatcorr_ringbuffpos + 1) % windowLen;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define max(x,y) ((x) > (y) ? (x) : (y))
|
||||
|
||||
void BPMDetect::inputSamples(const SAMPLETYPE *samples, int numSamples)
|
||||
{
|
||||
SAMPLETYPE decimated[DECIMATED_BLOCK_SAMPLES];
|
||||
SAMPLETYPE decimated[DECIMATED_BLOCK_SIZE];
|
||||
|
||||
// iterate so that max INPUT_BLOCK_SAMPLES processed per iteration
|
||||
while (numSamples > 0)
|
||||
@ -244,7 +418,7 @@ void BPMDetect::inputSamples(const SAMPLETYPE *samples, int numSamples)
|
||||
int block;
|
||||
int decSamples;
|
||||
|
||||
block = (numSamples > INPUT_BLOCK_SAMPLES) ? INPUT_BLOCK_SAMPLES : numSamples;
|
||||
block = (numSamples > INPUT_BLOCK_SIZE) ? INPUT_BLOCK_SIZE : numSamples;
|
||||
|
||||
// decimate. note that converts to mono at the same time
|
||||
decSamples = decimate(decimated, samples, block);
|
||||
@ -254,31 +428,60 @@ void BPMDetect::inputSamples(const SAMPLETYPE *samples, int numSamples)
|
||||
buffer->putSamples(decimated, decSamples);
|
||||
}
|
||||
|
||||
// when the buffer has enough samples for processing...
|
||||
while ((int)buffer->numSamples() >= windowLen + xcorr_update_sequence)
|
||||
// when the buffer has enought samples for processing...
|
||||
int req = max(windowLen + XCORR_UPDATE_SEQUENCE, 2 * XCORR_UPDATE_SEQUENCE);
|
||||
while ((int)buffer->numSamples() >= req)
|
||||
{
|
||||
// ... calculate autocorrelations for oldest samples...
|
||||
updateXCorr(xcorr_update_sequence);
|
||||
// ... and remove these from the buffer
|
||||
buffer->receiveSamples(xcorr_update_sequence);
|
||||
// ... update autocorrelations...
|
||||
updateXCorr(XCORR_UPDATE_SEQUENCE);
|
||||
// ...update beat position calculation...
|
||||
updateBeatPos(XCORR_UPDATE_SEQUENCE / 2);
|
||||
// ... and remove proceessed samples from the buffer
|
||||
int n = XCORR_UPDATE_SEQUENCE / OVERLAP_FACTOR;
|
||||
buffer->receiveSamples(n);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void BPMDetect::removeBias()
|
||||
{
|
||||
int i;
|
||||
float minval = 1e12f; // arbitrary large number
|
||||
|
||||
// Remove linear bias: calculate linear regression coefficient
|
||||
// 1. calc mean of 'xcorr' and 'i'
|
||||
double mean_i = 0;
|
||||
double mean_x = 0;
|
||||
for (i = windowStart; i < windowLen; i++)
|
||||
{
|
||||
mean_x += xcorr[i];
|
||||
}
|
||||
mean_x /= (windowLen - windowStart);
|
||||
mean_i = 0.5 * (windowLen - 1 + windowStart);
|
||||
|
||||
// 2. calculate linear regression coefficient
|
||||
double b = 0;
|
||||
double div = 0;
|
||||
for (i = windowStart; i < windowLen; i++)
|
||||
{
|
||||
double xt = xcorr[i] - mean_x;
|
||||
double xi = i - mean_i;
|
||||
b += xt * xi;
|
||||
div += xi * xi;
|
||||
}
|
||||
b /= div;
|
||||
|
||||
// subtract linear regression and resolve min. value bias
|
||||
float minval = FLT_MAX; // arbitrary large number
|
||||
for (i = windowStart; i < windowLen; i ++)
|
||||
{
|
||||
xcorr[i] -= (float)(b * i);
|
||||
if (xcorr[i] < minval)
|
||||
{
|
||||
minval = xcorr[i];
|
||||
}
|
||||
}
|
||||
|
||||
// subtract min.value
|
||||
for (i = windowStart; i < windowLen; i ++)
|
||||
{
|
||||
xcorr[i] -= minval;
|
||||
@ -286,26 +489,82 @@ void BPMDetect::removeBias()
|
||||
}
|
||||
|
||||
|
||||
// Calculate N-point moving average for "source" values
|
||||
void MAFilter(float *dest, const float *source, int start, int end, int N)
|
||||
{
|
||||
for (int i = start; i < end; i++)
|
||||
{
|
||||
int i1 = i - N / 2;
|
||||
int i2 = i + N / 2 + 1;
|
||||
if (i1 < start) i1 = start;
|
||||
if (i2 > end) i2 = end;
|
||||
|
||||
double sum = 0;
|
||||
for (int j = i1; j < i2; j ++)
|
||||
{
|
||||
sum += source[j];
|
||||
}
|
||||
dest[i] = (float)(sum / (i2 - i1));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
float BPMDetect::getBpm()
|
||||
{
|
||||
double peakPos;
|
||||
double coeff;
|
||||
PeakFinder peakFinder;
|
||||
|
||||
coeff = 60.0 * ((double)sampleRate / (double)decimateBy);
|
||||
|
||||
// save bpm debug analysis data if debug data enabled
|
||||
_SaveDebugData(xcorr, windowStart, windowLen, coeff);
|
||||
|
||||
// remove bias from xcorr data
|
||||
removeBias();
|
||||
|
||||
coeff = 60.0 * ((double)sampleRate / (double)decimateBy);
|
||||
|
||||
// save bpm debug data if debug data writing enabled
|
||||
_SaveDebugData("soundtouch-bpm-xcorr.txt", xcorr, windowStart, windowLen, coeff);
|
||||
|
||||
// Smoothen by N-point moving-average
|
||||
float *data = new float[windowLen];
|
||||
memset(data, 0, sizeof(float) * windowLen);
|
||||
MAFilter(data, xcorr, windowStart, windowLen, MOVING_AVERAGE_N);
|
||||
|
||||
// find peak position
|
||||
peakPos = peakFinder.detectPeak(xcorr, windowStart, windowLen);
|
||||
peakPos = peakFinder.detectPeak(data, windowStart, windowLen);
|
||||
|
||||
// save bpm debug data if debug data writing enabled
|
||||
_SaveDebugData("soundtouch-bpm-smoothed.txt", data, windowStart, windowLen, coeff);
|
||||
|
||||
delete[] data;
|
||||
|
||||
assert(decimateBy != 0);
|
||||
if (peakPos < 1e-9) return 0.0; // detection failed.
|
||||
|
||||
_SaveDebugBeatPos("soundtouch-detected-beats.txt", beats);
|
||||
|
||||
// calculate BPM
|
||||
return (float) (coeff / peakPos);
|
||||
float bpm = (float)(coeff / peakPos);
|
||||
return (bpm >= MIN_BPM && bpm <= MAX_BPM_VALID) ? bpm : 0;
|
||||
}
|
||||
|
||||
|
||||
/// Get beat position arrays. Note: The array includes also really low beat detection values
|
||||
/// in absence of clear strong beats. Consumer may wish to filter low values away.
|
||||
/// - "pos" receive array of beat positions
|
||||
/// - "values" receive array of beat detection strengths
|
||||
/// - max_num indicates max.size of "pos" and "values" array.
|
||||
///
|
||||
/// You can query a suitable array sized by calling this with NULL in "pos" & "values".
|
||||
///
|
||||
/// \return number of beats in the arrays.
|
||||
int BPMDetect::getBeats(float *pos, float *values, int max_num)
|
||||
{
|
||||
int num = beats.size();
|
||||
if ((!pos) || (!values)) return num; // pos or values NULL, return just size
|
||||
|
||||
for (int i = 0; (i < num) && (i < max_num); i++)
|
||||
{
|
||||
pos[i] = beats[i].pos;
|
||||
values[i] = beats[i].strength;
|
||||
}
|
||||
return num;
|
||||
}
|
||||
|
@ -242,12 +242,12 @@ double PeakFinder::detectPeak(const float *data, int aminPos, int amaxPos)
|
||||
// - sometimes the highest peak can be Nth harmonic of the true base peak yet
|
||||
// just a slightly higher than the true base
|
||||
|
||||
for (i = 3; i < 10; i ++)
|
||||
for (i = 1; i < 3; i ++)
|
||||
{
|
||||
double peaktmp, harmonic;
|
||||
int i1,i2;
|
||||
|
||||
harmonic = (double)i * 0.5;
|
||||
harmonic = (double)pow(2.0, i);
|
||||
peakpos = (int)(highPeak / harmonic + 0.5f);
|
||||
if (peakpos < minPos) break;
|
||||
peakpos = findTop(data, peakpos); // seek true local maximum index
|
||||
|
@ -252,7 +252,12 @@ copy $(OutDir)$(TargetName)$(TargetExt) ..\..\lib</Command>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
|
||||
</ClCompile>
|
||||
<ClCompile Include="BPMDetect.cpp" />
|
||||
<ClCompile Include="BPMDetect.cpp">
|
||||
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4996</DisableSpecificWarnings>
|
||||
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">4996</DisableSpecificWarnings>
|
||||
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">4996</DisableSpecificWarnings>
|
||||
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Release|x64'">4996</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<ClCompile Include="cpu_detect_x86.cpp" />
|
||||
<ClCompile Include="FIFOSampleBuffer.cpp">
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
|
||||
|
@ -390,4 +390,9 @@ uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, uint numS
|
||||
return (numSamples & 0xfffffffe) - length;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// workaround to not complain about empty module
|
||||
bool _dontcomplain_mmx_empty;
|
||||
|
||||
#endif // SOUNDTOUCH_ALLOW_MMX
|
||||
|
Loading…
Reference in New Issue
Block a user