Merge branch 'bpm-work' into 'master'

BPM algorithm work - improved beat analysis routine and added individual beat detection See merge request soundtouch/soundtouch!1
2024-11-23 19:39:47 +00:00 · 2018-05-16 16:04:33 +00:00 · 2018-05-16 16:04:33 +00:00 · 8f6f91f9b3
commit 8f6f91f9b3
parent 47f74e83ef 007481d711
8 changed files with 532 additions and 128 deletions
--- a/README.html
+++ b/README.html
@ -13,8 +13,8 @@
 </head>
 <body class="normal">
 <hr>
-<h1>SoundTouch audio processing library v2.0.1pre</h1>
-<p class="normal">SoundTouch library Copyright &copy Olli Parviainen 2001-2017</p>
+<h1>SoundTouch audio processing library v2.1pre</h1>
+<p class="normal">SoundTouch library Copyright © Olli Parviainen 2001-2018</p>
 <hr>
 <h2>1. Introduction </h2>
 <p>SoundTouch is an open-source audio processing library that allows
@ -573,7 +573,7 @@ this corresponds to lowering the pitch by -0.318 semitones:</p>
 <hr>
 <h2>5. Change History</h2>
 <h3>5.1. SoundTouch library Change History </h3>
-    <p><b>2.0.1pre:</b></p>
+    <p><b>2.1pre:</b></p>
    <ul>
        <li>Refactored C# interface example</li>
        <li>Disable anti-alias filter when switch 
@ -581,7 +581,10 @@ this corresponds to lowering the pitch by -0.318 semitones:</p>
        filter cause slight click if the rate change crosses zero during 
        processing</li>
        <li>Added script for building SoundTouchDll dynamic-link-library for GNU platforms</li>
-        <li>Added BPM functions to SoundTouchDll API</li>
+        <li>Rewrote Beats-per-Minute analysis algorithm for more reliable BPM 
+		detection</li>
+		<li>Added BPM functions to SoundTouchDll API</li>
+		<li>Migrated Visual Studio project files to MSVC 201x format</li>
    </ul>
    <p><b>2.0:</b></p>
    <ul>
@ -901,6 +904,8 @@ General Public License for more details.</p>
 <p>You should have received a copy of the GNU Lesser General Public
 License along with this library; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA</p>
+<p>---</p>
+<p>commercial license alternative also available, contact author for details.</p>
 <hr>
 <p><i>README.html file updated in May-2018</i></p>
 </body>
--- a/include/BPMDetect.h
+++ b/include/BPMDetect.h
@ -50,102 +50,232 @@
 #ifndef _BPMDetect_H_
 #define _BPMDetect_H_

+#include <vector>
 #include "STTypes.h"
 #include "FIFOSampleBuffer.h"

 namespace soundtouch
 {

-/// Minimum allowed BPM rate. Used to restrict accepted result above a reasonable limit.
-#define MIN_BPM 29
+    /// Minimum allowed BPM rate. Used to restrict accepted result above a reasonable limit.
+    #define MIN_BPM 45

-/// Maximum allowed BPM rate. Used to restrict accepted result below a reasonable limit.
-#define MAX_BPM 200
+    /// Maximum allowed BPM rate range. Used for calculating algorithm parametrs
+    #define MAX_BPM_RANGE 200
+
+    /// Maximum allowed BPM rate range. Used to restrict accepted result below a reasonable limit.
+    #define MAX_BPM_VALID 190


-/// Class for calculating BPM rate for audio data.
-class BPMDetect
+////////////////////////////////////////////////////////////////////////////////
+
+/*
+class BeatCollection
 {
-protected:
-    /// Auto-correlation accumulator bins.
-    float *xcorr;
-    
-    /// Sample average counter.
-    int decimateCount;
+private:

-    /// Sample average accumulator for FIFO-like decimation.
-    soundtouch::LONG_SAMPLETYPE decimateSum;
+    int size;

-    /// Decimate sound by this coefficient to reach approx. 500 Hz.
-    int decimateBy;
-
-    /// Auto-correlation window length
-    int windowLen;
-
-    /// Number of channels (1 = mono, 2 = stereo)
-    int channels;
-
-    /// sample rate
-    int sampleRate;
-
-    /// Beginning of auto-correlation window: Autocorrelation isn't being updated for
-    /// the first these many correlation bins.
-    int windowStart;
- 
-    /// FIFO-buffer for decimated processing samples.
-    soundtouch::FIFOSampleBuffer *buffer;
-
-    /// Updates auto-correlation function for given number of decimated samples that 
-    /// are read from the internal 'buffer' pipe (samples aren't removed from the pipe 
-    /// though).
-    void updateXCorr(int process_samples      /// How many samples are processed.
-                     );
-
-    /// Decimates samples to approx. 500 Hz.
-    ///
-    /// \return Number of output samples.
-    int decimate(soundtouch::SAMPLETYPE *dest,      ///< Destination buffer
-                 const soundtouch::SAMPLETYPE *src, ///< Source sample buffer
-                 int numsamples                     ///< Number of source samples.
-                 );
-
-    /// Calculates amplitude envelope for the buffer of samples.
-    /// Result is output to 'samples'.
-    void calcEnvelope(soundtouch::SAMPLETYPE *samples,  ///< Pointer to input/output data buffer
-                      int numsamples                    ///< Number of samples in buffer
-                      );
-
-    /// remove constant bias from xcorr data
-    void removeBias();
+    // Ensure there's enough capacity in arrays
+    void EnsureCapacity(int newCapacity)
+    {
+        if (newCapacity > size)
+        {
+            // enlarge arrays
+            int oldSize = size;
+            float *beatPosOld = beatPos;
+            float *beatValuesOld = beatValues;
+            while (size < newCapacity) size *= 2;
+            printf("Alloc more %d\n", size);
+            beatPos = new float[size];
+            beatValues = new float[size];
+            if ((beatPos == NULL) || (beatValues == NULL))
+            {
+                ST_THROW_RT_ERROR("can't allocate memory");
+            }
+            // copy old arrays to new arrays
+            memcpy(beatPos, beatPosOld, sizeof(float)*oldSize);
+            memcpy(beatValues, beatValuesOld, sizeof(float)*oldSize);
+            // free old arrays
+            delete[] beatPosOld;
+            delete[] beatValuesOld;
+        }
+    }

 public:
-    /// Constructor.
-    BPMDetect(int numChannels,  ///< Number of channels in sample data.
-              int sampleRate    ///< Sample rate in Hz.
-              );
+    // beat position array
+    float *beatPos;

-    /// Destructor.
-    virtual ~BPMDetect();
+    // beat values array
+    float *beatValues;

-    /// Inputs a block of samples for analyzing: Envelopes the samples and then
-    /// updates the autocorrelation estimation. When whole song data has been input
-    /// in smaller blocks using this function, read the resulting bpm with 'getBpm' 
-    /// function. 
-    /// 
-    /// Notice that data in 'samples' array can be disrupted in processing.
-    void inputSamples(const soundtouch::SAMPLETYPE *samples,    ///< Pointer to input/working data buffer
-                      int numSamples                            ///< Number of samples in buffer
-                      );
+    // number of beats in arrays
+    int numBeats;
+
+    // constructor
+    BeatCollection()
+    {
+        numBeats = 0;
+        size = 1024;
+        beatPos = new float[size];
+        beatValues = new float[size];
+    }


-    /// Analyzes the results and returns the BPM rate. Use this function to read result
-    /// after whole song data has been input to the class by consecutive calls of
-    /// 'inputSamples' function.
-    ///
-    /// \return Beats-per-minute rate, or zero if detection failed.
-    float getBpm();
+    // destructor
+    ~BeatCollection()
+    {
+        delete[] beatPos;
+        delete[] beatValues;
+    }
+
+
+    // add new beat position into array
+    void Add(float pos, float value)
+    {
+        EnsureCapacity(numBeats + 1);
+        beatPos[numBeats] = pos;
+        beatValues[numBeats] = value;
+        numBeats++;
+    }
+
 };
+*/

+    typedef struct
+    {
+        float pos;
+        float strength;
+    } BEAT;
+
+
+    class IIR2_filter
+    {
+        double coeffs[5];
+        double prev[5];
+
+    public:
+        IIR2_filter(const double *lpf_coeffs);
+        float update(float x);
+    };
+
+
+    /// Class for calculating BPM rate for audio data.
+    class BPMDetect
+    {
+    protected:
+        /// Auto-correlation accumulator bins.
+        float *xcorr;
+
+        /// Sample average counter.
+        int decimateCount;
+
+        /// Sample average accumulator for FIFO-like decimation.
+        soundtouch::LONG_SAMPLETYPE decimateSum;
+
+        /// Decimate sound by this coefficient to reach approx. 500 Hz.
+        int decimateBy;
+
+        /// Auto-correlation window length
+        int windowLen;
+
+        /// Number of channels (1 = mono, 2 = stereo)
+        int channels;
+
+        /// sample rate
+        int sampleRate;
+
+        /// Beginning of auto-correlation window: Autocorrelation isn't being updated for
+        /// the first these many correlation bins.
+        int windowStart;
+
+        /// window functions for data preconditioning
+        float *hamw;
+        float *hamw2;
+
+        // beat detection variables
+        int pos;
+        int peakPos;
+        int beatcorr_ringbuffpos;
+        int init_scaler;
+        float peakVal;
+        float *beatcorr_ringbuff;
+
+        /// FIFO-buffer for decimated processing samples.
+        soundtouch::FIFOSampleBuffer *buffer;
+
+        /// Collection of detected beat positions
+        //BeatCollection beats;
+        std::vector<BEAT> beats;
+
+        // 2nd order low-pass-filter
+        IIR2_filter beat_lpf;
+
+        /// Updates auto-correlation function for given number of decimated samples that 
+        /// are read from the internal 'buffer' pipe (samples aren't removed from the pipe 
+        /// though).
+        void updateXCorr(int process_samples      /// How many samples are processed.
+        );
+
+        /// Decimates samples to approx. 500 Hz.
+        ///
+        /// \return Number of output samples.
+        int decimate(soundtouch::SAMPLETYPE *dest,      ///< Destination buffer
+            const soundtouch::SAMPLETYPE *src, ///< Source sample buffer
+            int numsamples                     ///< Number of source samples.
+        );
+
+        /// Calculates amplitude envelope for the buffer of samples.
+        /// Result is output to 'samples'.
+        void calcEnvelope(soundtouch::SAMPLETYPE *samples,  ///< Pointer to input/output data buffer
+            int numsamples                    ///< Number of samples in buffer
+        );
+
+        /// remove constant bias from xcorr data
+        void removeBias();
+
+        // Detect individual beat positions
+        void updateBeatPos(int process_samples);
+
+
+    public:
+        /// Constructor.
+        BPMDetect(int numChannels,  ///< Number of channels in sample data.
+            int sampleRate    ///< Sample rate in Hz.
+        );
+
+        /// Destructor.
+        virtual ~BPMDetect();
+
+        /// Inputs a block of samples for analyzing: Envelopes the samples and then
+        /// updates the autocorrelation estimation. When whole song data has been input
+        /// in smaller blocks using this function, read the resulting bpm with 'getBpm' 
+        /// function. 
+        /// 
+        /// Notice that data in 'samples' array can be disrupted in processing.
+        void inputSamples(const soundtouch::SAMPLETYPE *samples,    ///< Pointer to input/working data buffer
+            int numSamples                            ///< Number of samples in buffer
+        );
+
+
+        /// Analyzes the results and returns the BPM rate. Use this function to read result
+        /// after whole song data has been input to the class by consecutive calls of
+        /// 'inputSamples' function.
+        ///
+        /// \return Beats-per-minute rate, or zero if detection failed.
+        float getBpm();
+
+
+        /// Get beat position arrays. Note: The array includes also really low beat detection values 
+        /// in absence of clear strong beats. Consumer may wish to filter low values away.
+        /// - "pos" receive array of beat positions
+        /// - "values" receive array of beat detection strengths
+        /// - max_num indicates max.size of "pos" and "values" array.  
+        ///
+        /// You can query a suitable array sized by calling this with NULL in "pos" & "values".
+        ///
+        /// \return number of beats in the arrays.
+        int getBeats(float *pos, float *strength, int max_num);
+    };
 }
-
 #endif // _BPMDetect_H_
--- a/include/SoundTouch.h
+++ b/include/SoundTouch.h
@ -72,10 +72,10 @@ namespace soundtouch
 {

 /// Soundtouch library version string
-#define SOUNDTOUCH_VERSION          "2.0.1pre"
+#define SOUNDTOUCH_VERSION          "2.1pre"

 /// SoundTouch library version id
-#define SOUNDTOUCH_VERSION_ID       (20001)
+#define SOUNDTOUCH_VERSION_ID       (20009)

 //
 // Available setting IDs for the 'setSetting' & 'get_setting' functions:
--- a/source/SoundStretch/main.cpp
+++ b/source/SoundStretch/main.cpp
@ -58,8 +58,8 @@ using namespace std;

 static const char _helloText[] = 
    "\n"
-    "   SoundStretch v%s -  Copyright (c) Olli Parviainen 2001 - 2017\n"
-    "==================================================================\n"
+    "   SoundStretch v%s -  Copyright (c) Olli Parviainen\n"
+    "=========================================================\n"
    "author e-mail: <oparviai"
    "@"
    "iki.fi> - WWW: http://www.surina.net/soundtouch\n"
--- a/source/SoundTouch/BPMDetect.cpp
+++ b/source/SoundTouch/BPMDetect.cpp
@ -47,6 +47,8 @@
 //
 ////////////////////////////////////////////////////////////////////////////////

+#define _USE_MATH_DEFINES
+
 #include <math.h>
 #include <assert.h>
 #include <string.h>
@ -57,40 +59,49 @@

 using namespace soundtouch;

-#define INPUT_BLOCK_SAMPLES       2048
-#define DECIMATED_BLOCK_SAMPLES   256
+// algorithm input sample block size
+static const int INPUT_BLOCK_SIZE = 2048;
+
+// decimated sample block size
+static const int DECIMATED_BLOCK_SIZE = 256;

 /// Target sample rate after decimation
-const int target_srate = 1000;
+static const int TARGET_SRATE = 1000;

 /// XCorr update sequence size, update in about 200msec chunks
-const int xcorr_update_sequence = 200;
+static const int XCORR_UPDATE_SEQUENCE = (int)(TARGET_SRATE / 5);
+
+/// Moving average N size
+static const int MOVING_AVERAGE_N = 15;

 /// XCorr decay time constant, decay to half in 30 seconds
 /// If it's desired to have the system adapt quicker to beat rate 
 /// changes within a continuing music stream, then the 
 /// 'xcorr_decay_time_constant' value can be reduced, yet that
 /// can increase possibility of glitches in bpm detection.
-const double xcorr_decay_time_constant = 30.0;
+static const double XCORR_DECAY_TIME_CONSTANT = 30.0;
+
+/// Data overlap factor for beat detection algorithm
+static const int OVERLAP_FACTOR = 4;
+
+static const double TWOPI = (2 * M_PI);

 ////////////////////////////////////////////////////////////////////////////////

 // Enable following define to create bpm analysis file:

-// #define _CREATE_BPM_DEBUG_FILE
+//#define _CREATE_BPM_DEBUG_FILE

 #ifdef _CREATE_BPM_DEBUG_FILE

-    #define DEBUGFILE_NAME  "c:\\temp\\soundtouch-bpm-debug.txt"
-
-    static void _SaveDebugData(const float *data, int minpos, int maxpos, double coeff)
+    static void _SaveDebugData(const char *name, const float *data, int minpos, int maxpos, double coeff)
    {
-        FILE *fptr = fopen(DEBUGFILE_NAME, "wt");
+        FILE *fptr = fopen(name, "wt");
        int i;

        if (fptr)
        {
-            printf("\n\nWriting BPM debug data into file " DEBUGFILE_NAME "\n\n");
+            printf("\nWriting BPM debug data into file %s\n", name);
            for (i = minpos; i < maxpos; i ++)
            {
                fprintf(fptr, "%d\t%.1lf\t%f\n", i, coeff / (double)i, data[i]);
@ -98,15 +109,74 @@ const double xcorr_decay_time_constant = 30.0;
            fclose(fptr);
        }
    }
+
+    void _SaveDebugBeatPos(const char *name, const std::vector<BEAT> &beats)
+    {
+        printf("\nWriting beat detections data into file %s\n", name);
+
+        FILE *fptr = fopen(name, "wt");
+        if (fptr)
+        {
+            for (uint i = 0; i < beats.size(); i++)
+            {
+                BEAT b = beats[i];
+                fprintf(fptr, "%lf\t%lf\n", b.pos, b.strength);
+            }
+            fclose(fptr);
+        }
+    }
 #else
-    #define _SaveDebugData(a,b,c,d)
+    #define _SaveDebugData(name, a,b,c,d)
+    #define _SaveDebugBeatPos(name, b)
 #endif

+// Hamming window
+void hamming(float *w, int N)
+{
+    for (int i = 0; i < N; i++)
+    {
+        w[i] = (float)(0.54 - 0.46 * cos(TWOPI * i / (N - 1)));
+    }
+
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// IIR2_filter - 2nd order IIR filter
+
+IIR2_filter::IIR2_filter(const double *lpf_coeffs)
+{
+    memcpy(coeffs, lpf_coeffs, 5 * sizeof(double));
+    memset(prev, 0, sizeof(prev));
+}
+
+
+float IIR2_filter::update(float x)
+{
+    prev[0] = x;
+    double y = x * coeffs[0];
+
+    for (int i = 4; i >= 1; i--)
+    {
+        y += coeffs[i] * prev[i];
+        prev[i] = prev[i - 1];
+    }
+
+    prev[3] = y;
+    return (float)y;
+}
+
+
+// IIR low-pass filter coefficients, calculated with matlab/octave cheby2(2,40,0.05)
+const double _LPF_coeffs[5] = { 0.00996655391939, -0.01944529148401, 0.00996655391939, 1.96867605796247, -0.96916387431724 };
+
 ////////////////////////////////////////////////////////////////////////////////

-
-BPMDetect::BPMDetect(int numChannels, int aSampleRate)
+BPMDetect::BPMDetect(int numChannels, int aSampleRate) :
+    beat_lpf(_LPF_coeffs)
 {
+    beats.reserve(250); // initial reservation to prevent frequent reallocation
+
    this->sampleRate = aSampleRate;
    this->channels = numChannels;

@ -114,13 +184,13 @@ BPMDetect::BPMDetect(int numChannels, int aSampleRate)
    decimateCount = 0;

    // choose decimation factor so that result is approx. 1000 Hz
-    decimateBy = sampleRate / target_srate;
+    decimateBy = sampleRate / TARGET_SRATE;
    assert(decimateBy > 0);
-    assert(INPUT_BLOCK_SAMPLES < decimateBy * DECIMATED_BLOCK_SAMPLES);
+    assert(INPUT_BLOCK_SIZE < decimateBy * DECIMATED_BLOCK_SIZE);

    // Calculate window length & starting item according to desired min & max bpms
    windowLen = (60 * sampleRate) / (decimateBy * MIN_BPM);
-    windowStart = (60 * sampleRate) / (decimateBy * MAX_BPM);
+    windowStart = (60 * sampleRate) / (decimateBy * MAX_BPM_RANGE);

    assert(windowLen > windowStart);

@ -128,23 +198,38 @@ BPMDetect::BPMDetect(int numChannels, int aSampleRate)
    xcorr = new float[windowLen];
    memset(xcorr, 0, windowLen * sizeof(float));

+    pos = 0;
+    peakPos = 0;
+    peakVal = 0;
+    init_scaler = 1;
+    beatcorr_ringbuffpos = 0;
+    beatcorr_ringbuff = new float[windowLen];
+    memset(beatcorr_ringbuff, 0, windowLen * sizeof(float));
+
    // allocate processing buffer
    buffer = new FIFOSampleBuffer();
    // we do processing in mono mode
    buffer->setChannels(1);
    buffer->clear();
-}

+    // calculate hamming windows
+    hamw = new float[XCORR_UPDATE_SEQUENCE];
+    hamming(hamw, XCORR_UPDATE_SEQUENCE);
+    hamw2 = new float[XCORR_UPDATE_SEQUENCE / 2];
+    hamming(hamw2, XCORR_UPDATE_SEQUENCE / 2);
+}


 BPMDetect::~BPMDetect()
 {
    delete[] xcorr;
+    delete[] beatcorr_ringbuff;
+    delete[] hamw;
+    delete[] hamw2;
    delete buffer;
 }


-
 /// convert to mono, low-pass filter & decimate to about 500 Hz. 
 /// return number of outputted samples.
 ///
@ -201,7 +286,6 @@ int BPMDetect::decimate(SAMPLETYPE *dest, const SAMPLETYPE *src, int numsamples)
 }


-
 // Calculates autocorrelation function of the sample history buffer
 void BPMDetect::updateXCorr(int process_samples)
 {
@ -209,22 +293,30 @@ void BPMDetect::updateXCorr(int process_samples)
    SAMPLETYPE *pBuffer;
    
    assert(buffer->numSamples() >= (uint)(process_samples + windowLen));
+    assert(process_samples == XCORR_UPDATE_SEQUENCE);

    pBuffer = buffer->ptrBegin();

    // calculate decay factor for xcorr filtering
-    float xcorr_decay = (float)pow(0.5, 1.0 / (xcorr_decay_time_constant * target_srate / process_samples));
+    float xcorr_decay = (float)pow(0.5, 1.0 / (XCORR_DECAY_TIME_CONSTANT * TARGET_SRATE / process_samples));
+
+    // prescale pbuffer
+    float tmp[XCORR_UPDATE_SEQUENCE];
+    for (int i = 0; i < process_samples; i++)
+    {
+        tmp[i] = hamw[i] * hamw[i] * pBuffer[i];
+    }

    #pragma omp parallel for
    for (offs = windowStart; offs < windowLen; offs ++) 
    {
-        LONG_SAMPLETYPE sum;
+        double sum;
        int i;

        sum = 0;
        for (i = 0; i < process_samples; i ++) 
        {
-            sum += pBuffer[i] * pBuffer[i + offs];    // scaling the sub-result shouldn't be necessary
+            sum += tmp[i] * pBuffer[i + offs];  // scaling the sub-result shouldn't be necessary
        }
        xcorr[offs] *= xcorr_decay;   // decay 'xcorr' here with suitable time constant.

@ -233,10 +325,92 @@ void BPMDetect::updateXCorr(int process_samples)
 }


+// Detect individual beat positions
+void BPMDetect::updateBeatPos(int process_samples)
+{
+    SAMPLETYPE *pBuffer;
+
+    assert(buffer->numSamples() >= (uint)(process_samples + windowLen));
+
+    pBuffer = buffer->ptrBegin();
+    assert(process_samples == XCORR_UPDATE_SEQUENCE / 2);
+
+    //    static double thr = 0.0003;
+    double posScale = (double)this->decimateBy / (double)this->sampleRate;
+    int resetDur = (int)(0.12 / posScale + 0.5);
+    double corrScale = 1.0 / (double)(windowLen - windowStart);
+
+    // prescale pbuffer
+    float tmp[XCORR_UPDATE_SEQUENCE / 2];
+    for (int i = 0; i < process_samples; i++)
+    {
+        tmp[i] = hamw2[i] * hamw2[i] * pBuffer[i];
+    }
+
+    #pragma omp parallel for
+    for (int offs = windowStart; offs < windowLen; offs++)
+    {
+        double sum = 0;
+        for (int i = 0; i < process_samples; i++)
+        {
+            sum += tmp[i] * pBuffer[offs + i];
+        }
+        beatcorr_ringbuff[(beatcorr_ringbuffpos + offs) % windowLen] += (float)((sum > 0) ? sum : 0); // accumulate only positive correlations
+    }
+
+    int skipstep = XCORR_UPDATE_SEQUENCE / OVERLAP_FACTOR;
+
+    // compensate empty buffer at beginning by scaling coefficient
+    float scale = (float)windowLen / (float)(skipstep * init_scaler);
+    if (scale > 1.0f)
+    {
+        init_scaler++;
+    }
+    else
+    {
+        scale = 1.0f;
+    }
+
+    // detect beats
+    for (int i = 0; i < skipstep; i++)
+    {
+        LONG_SAMPLETYPE max = 0;
+
+        float sum = beatcorr_ringbuff[beatcorr_ringbuffpos];
+        sum -= beat_lpf.update(sum);
+
+        if (sum > peakVal)
+        {
+            // found new local largest value
+            peakVal = sum;
+            peakPos = pos;
+        }
+        if (pos > peakPos + resetDur)
+        {
+            // largest value not updated for 200msec => accept as beat
+            peakPos += skipstep;
+            if (peakVal > 0)
+            {
+                // add detected beat to end of "beats" vector
+                beats.push_back({ (float)(peakPos * posScale), (float)(peakVal * scale) });
+            }
+
+            peakVal = 0;
+            peakPos = pos;
+        }
+
+        beatcorr_ringbuff[beatcorr_ringbuffpos] = 0;
+        pos++;
+        beatcorr_ringbuffpos = (beatcorr_ringbuffpos + 1) % windowLen;
+    }
+}
+
+
+#define max(x,y) ((x) > (y) ? (x) : (y))

 void BPMDetect::inputSamples(const SAMPLETYPE *samples, int numSamples)
 {
-    SAMPLETYPE decimated[DECIMATED_BLOCK_SAMPLES];
+    SAMPLETYPE decimated[DECIMATED_BLOCK_SIZE];

    // iterate so that max INPUT_BLOCK_SAMPLES processed per iteration
    while (numSamples > 0)
@ -244,7 +418,7 @@ void BPMDetect::inputSamples(const SAMPLETYPE *samples, int numSamples)
        int block;
        int decSamples;

-        block = (numSamples > INPUT_BLOCK_SAMPLES) ? INPUT_BLOCK_SAMPLES : numSamples;
+        block = (numSamples > INPUT_BLOCK_SIZE) ? INPUT_BLOCK_SIZE : numSamples;

        // decimate. note that converts to mono at the same time
        decSamples = decimate(decimated, samples, block);
@ -254,31 +428,60 @@ void BPMDetect::inputSamples(const SAMPLETYPE *samples, int numSamples)
        buffer->putSamples(decimated, decSamples);
    }

-    // when the buffer has enough samples for processing...
-    while ((int)buffer->numSamples() >= windowLen + xcorr_update_sequence) 
+    // when the buffer has enought samples for processing...
+    int req = max(windowLen + XCORR_UPDATE_SEQUENCE, 2 * XCORR_UPDATE_SEQUENCE);
+    while ((int)buffer->numSamples() >= req) 
    {
-        // ... calculate autocorrelations for oldest samples...
-        updateXCorr(xcorr_update_sequence);
-        // ... and remove these from the buffer
-        buffer->receiveSamples(xcorr_update_sequence);
+        // ... update autocorrelations...
+        updateXCorr(XCORR_UPDATE_SEQUENCE);
+        // ...update beat position calculation...
+        updateBeatPos(XCORR_UPDATE_SEQUENCE / 2);
+        // ... and remove proceessed samples from the buffer
+        int n = XCORR_UPDATE_SEQUENCE / OVERLAP_FACTOR;
+        buffer->receiveSamples(n);
    }
 }


-
 void BPMDetect::removeBias()
 {
    int i;
-    float minval = 1e12f;   // arbitrary large number

+    // Remove linear bias: calculate linear regression coefficient
+    // 1. calc mean of 'xcorr' and 'i'
+    double mean_i = 0;
+    double mean_x = 0;
+    for (i = windowStart; i < windowLen; i++)
+    {
+        mean_x += xcorr[i];
+    }
+    mean_x /= (windowLen - windowStart);
+    mean_i = 0.5 * (windowLen - 1 + windowStart);
+
+    // 2. calculate linear regression coefficient
+    double b = 0;
+    double div = 0;
+    for (i = windowStart; i < windowLen; i++)
+    {
+        double xt = xcorr[i] - mean_x;
+        double xi = i - mean_i;
+        b += xt * xi;
+        div += xi * xi;
+    }
+    b /= div;
+
+    // subtract linear regression and resolve min. value bias
+    float minval = FLT_MAX;   // arbitrary large number
    for (i = windowStart; i < windowLen; i ++)
    {
+        xcorr[i] -= (float)(b * i);
        if (xcorr[i] < minval)
        {
            minval = xcorr[i];
        }
    }

+    // subtract min.value
    for (i = windowStart; i < windowLen; i ++)
    {
        xcorr[i] -= minval;
@ -286,26 +489,82 @@ void BPMDetect::removeBias()
 }


+// Calculate N-point moving average for "source" values
+void MAFilter(float *dest, const float *source, int start, int end, int N)
+{
+    for (int i = start; i < end; i++)
+    {
+        int i1 = i - N / 2;
+        int i2 = i + N / 2 + 1;
+        if (i1 < start) i1 = start;
+        if (i2 > end)   i2 = end;
+
+        double sum = 0;
+        for (int j = i1; j < i2; j ++)
+        { 
+            sum += source[j];
+        }
+        dest[i] = (float)(sum / (i2 - i1));
+    }
+}
+
+
 float BPMDetect::getBpm()
 {
    double peakPos;
    double coeff;
    PeakFinder peakFinder;

-    coeff = 60.0 * ((double)sampleRate / (double)decimateBy);
-
-    // save bpm debug analysis data if debug data enabled
-    _SaveDebugData(xcorr, windowStart, windowLen, coeff);
-
    // remove bias from xcorr data
    removeBias();

+    coeff = 60.0 * ((double)sampleRate / (double)decimateBy);
+
+    // save bpm debug data if debug data writing enabled
+    _SaveDebugData("soundtouch-bpm-xcorr.txt", xcorr, windowStart, windowLen, coeff);
+
+    // Smoothen by N-point moving-average
+    float *data = new float[windowLen];
+    memset(data, 0, sizeof(float) * windowLen);
+    MAFilter(data, xcorr, windowStart, windowLen, MOVING_AVERAGE_N);
+
    // find peak position
-    peakPos = peakFinder.detectPeak(xcorr, windowStart, windowLen);
+    peakPos = peakFinder.detectPeak(data, windowStart, windowLen);
+
+    // save bpm debug data if debug data writing enabled
+    _SaveDebugData("soundtouch-bpm-smoothed.txt", data, windowStart, windowLen, coeff);
+
+    delete[] data;

    assert(decimateBy != 0);
    if (peakPos < 1e-9) return 0.0; // detection failed.

+    _SaveDebugBeatPos("soundtouch-detected-beats.txt", beats);
+
    // calculate BPM
-    return (float) (coeff / peakPos);
+    float bpm = (float)(coeff / peakPos);
+    return (bpm >= MIN_BPM && bpm <= MAX_BPM_VALID) ? bpm : 0;
+}
+
+
+/// Get beat position arrays. Note: The array includes also really low beat detection values 
+/// in absence of clear strong beats. Consumer may wish to filter low values away.
+/// - "pos" receive array of beat positions
+/// - "values" receive array of beat detection strengths
+/// - max_num indicates max.size of "pos" and "values" array.  
+///
+/// You can query a suitable array sized by calling this with NULL in "pos" & "values".
+///
+/// \return number of beats in the arrays.
+int BPMDetect::getBeats(float *pos, float *values, int max_num)
+{
+    int num = beats.size();
+    if ((!pos) || (!values)) return num;    // pos or values NULL, return just size
+
+    for (int i = 0; (i < num) && (i < max_num); i++)
+    {
+        pos[i] = beats[i].pos;
+        values[i] = beats[i].strength;
+    }
+    return num;
 }
--- a/source/SoundTouch/PeakFinder.cpp
+++ b/source/SoundTouch/PeakFinder.cpp
@ -242,12 +242,12 @@ double PeakFinder::detectPeak(const float *data, int aminPos, int amaxPos)
    // - sometimes the highest peak can be Nth harmonic of the true base peak yet 
    // just a slightly higher than the true base

-    for (i = 3; i < 10; i ++)
+    for (i = 1; i < 3; i ++)
    {
        double peaktmp, harmonic;
        int i1,i2;

-        harmonic = (double)i * 0.5;
+        harmonic = (double)pow(2.0, i);
        peakpos = (int)(highPeak / harmonic + 0.5f);
        if (peakpos < minPos) break;
        peakpos = findTop(data, peakpos);   // seek true local maximum index
--- a/source/SoundTouch/SoundTouch.vcxproj
+++ b/source/SoundTouch/SoundTouch.vcxproj
@ -252,7 +252,12 @@ copy $(OutDir)$(TargetName)$(TargetExt) ..\..\lib</Command>
      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
      <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
    </ClCompile>
-    <ClCompile Include="BPMDetect.cpp" />
+    <ClCompile Include="BPMDetect.cpp">
+      <DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4996</DisableSpecificWarnings>
+      <DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">4996</DisableSpecificWarnings>
+      <DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">4996</DisableSpecificWarnings>
+      <DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Release|x64'">4996</DisableSpecificWarnings>
+    </ClCompile>
    <ClCompile Include="cpu_detect_x86.cpp" />
    <ClCompile Include="FIFOSampleBuffer.cpp">
      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
--- a/source/SoundTouch/mmx_optimized.cpp
+++ b/source/SoundTouch/mmx_optimized.cpp
@ -390,4 +390,9 @@ uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, uint numS
    return (numSamples & 0xfffffffe) - length;
 }

+#else
+
+// workaround to not complain about empty module
+bool _dontcomplain_mmx_empty;
+
 #endif  // SOUNDTOUCH_ALLOW_MMX