Skip to content

Commit

Permalink
Merge pull request #1413 from xaviliz/add-new-algo-audio2pitch
Browse files Browse the repository at this point in the history
Add new algo audio2pitch
  • Loading branch information
dbogdanov committed Jul 11, 2024
2 parents 9f32d3c + e84bbcf commit 52d8a35
Show file tree
Hide file tree
Showing 8 changed files with 501 additions and 86 deletions.
113 changes: 113 additions & 0 deletions src/algorithms/tonal/audio2pitch.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#include "audio2pitch.h"
#include "essentiamath.h"

using namespace essentia;
using namespace standard;

const char* Audio2Pitch::name = "Audio2Pitch";
const char* Audio2Pitch::category = "Pitch";
const char* Audio2Pitch::description = DOC("This algorithm computes pitch with various pitch algorithms, specifically targeted for real-time pitch detection on audio signals. The algorithm internally uses pitch estimation with PitchYin (pitchyin) and PitchYinFFT (pitchyinfft).");

bool Audio2Pitch::isAboveThresholds(Real pitchConfidence, Real loudness) {
return (pitchConfidence >= _pitchConfidenceThreshold) && (loudness >= _loudnessThresholdGain);
}

void Audio2Pitch::configure() {

_sampleRate = parameter("sampleRate").toReal();
_frameSize = parameter("frameSize").toInt();
_minFrequency = parameter("minFrequency").toReal();
_maxFrequency = parameter("maxFrequency").toReal();
_pitchAlgorithmName = parameter("pitchAlgorithm").toString();
_tolerance = parameter("tolerance").toReal();
_pitchConfidenceThreshold = parameter("pitchConfidenceThreshold").toReal();
_loudnessThreshold = parameter("loudnessThreshold").toReal();
_loudnessThresholdGain = db2amp(_loudnessThreshold);

if (_maxFrequency > _sampleRate * 0.5) {
throw EssentiaException("Audio2Pitch: Max frequency cannot be higher than Nyquist frequency");
}
if (_maxFrequency <= _minFrequency) {
throw EssentiaException("Audio2Pitch: Max frequency cannot be lower or equal than the minimum frequency");
}

if (_pitchAlgorithmName != "pitchyinfft" && _pitchAlgorithmName != "pitchyin") {
throw EssentiaException("Audio2Pitch: Bad 'pitchAlgorithm' =", _pitchAlgorithmName);
}

if (_pitchAlgorithmName == "pitchyinfft") {
_windowing = AlgorithmFactory::create("Windowing");
_spectrum = AlgorithmFactory::create("Spectrum");
_pitchAlgorithm = AlgorithmFactory::create("PitchYinFFT");

_windowing->configure("type", "hann",
"size", _frameSize);
_spectrum->configure("size", _frameSize);
}
else {
_pitchAlgorithm = AlgorithmFactory::create("PitchYin");
}

_loudnessAlgorithm = AlgorithmFactory::create("RMS");

// switch between pyin and pyin_fft to propagate the weighting parameter
if (_pitchAlgorithmName == "pitchyin") {
_pitchAlgorithm->configure(INHERIT("frameSize"),
INHERIT("maxFrequency"),
INHERIT("minFrequency"),
INHERIT("sampleRate"),
INHERIT("tolerance"));
}
else {
_pitchAlgorithm->configure(INHERIT("frameSize"),
INHERIT("maxFrequency"),
INHERIT("minFrequency"),
INHERIT("sampleRate"),
INHERIT("weighting"),
INHERIT("tolerance"));
}
}

void Audio2Pitch::compute() {
const std::vector<Real>& frame = _frame.get();
Real& pitch = _pitch.get();
Real& pitchConfidence = _pitchConfidence.get();
Real& loudness = _loudness.get();
int& voiced = _voiced.get();

if (frame.empty()) {
throw EssentiaException("Audio2Pitch: cannot compute the pitch of an empty frame");
}

if (frame.size() == 1) {
throw EssentiaException("Audio2Pitch: cannot compute the pitch of a frame of size 1");
}

_loudnessAlgorithm->input("array").set(frame);
_loudnessAlgorithm->output("rms").set(loudness);
_loudnessAlgorithm->compute();

std::vector<Real> windowedFrame, spectrum;
if (_pitchAlgorithmName == "pitchyinfft") {
_windowing->input("frame").set(frame);
_windowing->output("frame").set(windowedFrame);
_windowing->compute();
_spectrum->input("frame").set(windowedFrame);
_spectrum->output("spectrum").set(spectrum);
_spectrum->compute();
_pitchAlgorithm->input("spectrum").set(spectrum);
}
else if (_pitchAlgorithmName == "pitchyin") {
_pitchAlgorithm->input("signal").set(frame);
}

_pitchAlgorithm->output("pitch").set(pitch);
_pitchAlgorithm->output("pitchConfidence").set(pitchConfidence);
_pitchAlgorithm->compute();

// define voiced by thresholding
voiced = 0; // initially assumes an unvoiced frame
if (isAboveThresholds(pitchConfidence, loudness)) {
voiced = 1;
}
}
75 changes: 75 additions & 0 deletions src/algorithms/tonal/audio2pitch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#ifndef ESSENTIA_AUDIO2PITCH_H
#define ESSENTIA_AUDIO2PITCH_H

#include "algorithmfactory.h"

namespace essentia {
namespace standard {

class Audio2Pitch : public Algorithm {

protected:
Input<std::vector<Real>> _frame;
Output<Real> _pitch;
Output<Real> _pitchConfidence;
Output<Real> _loudness;
Output<int> _voiced;

Algorithm* _pitchAlgorithm;
Algorithm* _loudnessAlgorithm;
// auxiliary algorithms for FFT-based pitch
Algorithm* _windowing;
Algorithm* _spectrum;

Real _sampleRate;
int _frameSize;
Real _minFrequency;
Real _maxFrequency;
std::string _pitchAlgorithmName;
Real _tolerance;
Real _pitchConfidenceThreshold;
Real _loudnessThreshold;
Real _loudnessThresholdGain;

bool isAboveThresholds(Real pitchConfidence, Real loudness);

public:
Audio2Pitch() {
declareInput(_frame, "frame", "the input frame to analyse");
declareOutput(_pitch, "pitch", "detected pitch in Hz");
declareOutput(_pitchConfidence, "pitchConfidence", "confidence of detected pitch (from 0.0 to 1.0)");
declareOutput(_loudness, "loudness", "detected loudness in decibels");
declareOutput(_voiced, "voiced", "voiced frame categorization, 1 for voiced and 0 for unvoiced frame");
}

~Audio2Pitch() {
if (_pitchAlgorithm) delete _pitchAlgorithm;
if (_loudnessAlgorithm) delete _loudnessAlgorithm;
if (_windowing) delete _windowing;
if (_spectrum) delete _spectrum;
}

void declareParameters() {
declareParameter("sampleRate", "sample rate of incoming audio frames", "[8000,inf)", 44100);
declareParameter("frameSize", "size of input frame in samples", "[1,inf)", 1024);
declareParameter("minFrequency", "minimum frequency to detect in Hz", "[10,20000]", 60.0);
declareParameter("maxFrequency", "maximum frequency to detect in Hz", "[10,20000]", 2300.0);
declareParameter("pitchAlgorithm", "pitch algorithm to use", "{pitchyin,pitchyinfft}", "pitchyinfft");
declareParameter("weighting", "string to assign a weighting function", "{custom,A,B,C,D,Z}", "custom");
declareParameter("tolerance", "sets tolerance for peak detection on pitch algorithm", "[0,1]", 1.0);
declareParameter("pitchConfidenceThreshold", "level of pitch confidence above/below which note ON/OFF start to be considered", "[0,1]", 0.25);
declareParameter("loudnessThreshold", "loudness level above/below which note ON/OFF start to be considered, in decibels", "[-inf,0]", -51.0);
}

void configure();
void compute();

static const char* name;
static const char* category;
static const char* description;
};

} // namespace standard
} // namespace essentia

#endif
6 changes: 3 additions & 3 deletions src/algorithms/tonal/pitchyinfft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ static Real _weightMask[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
static const Real _weights[] = {-75.8, -70.1, -60.8, -52.1, -44.2, -37.5,
-31.3, -25.6, -20.9, -16.5, -12.6, -9.6, -7.0, -4.7, -3.0, -1.8, -0.8,
-0.2, -0.0, 0.5, 1.6, 3.2, 5.4, 7.8, 8.1, 5.3, -2.4, -11.1, -12.8,
-12.2, -7.4, -17.8, -17.8, -17.8}; // by default the original one is selected
-12.2, -7.4, -17.8, -17.8, -17.8}; // by default use custom weights designed specifically for the PitchYinFFT algorithm

static const Real _aWeighting[] = {-148.6, -50.4, -44.8, -39.5, -34.5, -30.3,
-26.2, -22.4, -19.1, -16.2, -13.2, -10.8, -8.7, -6.6, -4.8, -3.2, -1.9,
Expand Down Expand Up @@ -91,7 +91,7 @@ void PitchYinFFT::configure() {
// configure algorithms
_fft->configure("size", _frameSize);

if (_weighting != "default" && _weighting != "A" && _weighting != "B" && _weighting != "C" && _weighting != "D" && _weighting != "Z") {
if (_weighting != "custom" && _weighting != "A" && _weighting != "B" && _weighting != "C" && _weighting != "D" && _weighting != "Z") {
E_INFO("PitchYinFFT: 'weighting' = "<<_weighting<<"\n");
throw EssentiaException("PitchYinFFT: Bad 'weighting' parameter");
}
Expand All @@ -118,7 +118,7 @@ void PitchYinFFT::spectralWeights(std::string weighting) {
int i = 0, j = 1;
Real freq = 0, a0 = 0, a1 = 0, f0 = 0, f1 = 0;
int _maskSize = 34;
if (weighting == "default") {
if (weighting == "custom") {
for (int n=0; n<_maskSize; n++)
_weightMask[n] = _weights[n];
}
Expand Down
2 changes: 1 addition & 1 deletion src/algorithms/tonal/pitchyinfft.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class PitchYinFFT : public Algorithm {
declareParameter("maxFrequency", "the maximum allowed frequency [Hz]", "(0,inf)", 22050.0);
declareParameter("interpolate", "boolean flag to enable interpolation", "{true,false}", true);
declareParameter("tolerance", "tolerance for peak detection", "[0,1]", 1.0);
declareParameter("weighting", "string to assign a weighting function", "{default,A,B,C,D,Z}", "default");
declareParameter("weighting", "string to assign a weighting function", "{custom,A,B,C,D,Z}", "custom");
}

void configure();
Expand Down
Loading

0 comments on commit 52d8a35

Please sign in to comment.