Skip to content

Commit

Permalink
Add audio2pitch and pitch2midi algorithms.
Browse files Browse the repository at this point in the history
  • Loading branch information
xaviliz committed Oct 20, 2023
1 parent efc65d8 commit 48231af
Show file tree
Hide file tree
Showing 4 changed files with 328 additions and 0 deletions.
105 changes: 105 additions & 0 deletions src/algorithms/tonal/audio2pitch.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#include "audio2pitch.h"

using namespace essentia;
using namespace standard;

const char* Audio2Pitch::name = "Audio2Pitch";
const char* Audio2Pitch::category = "Pitch";
const char* Audio2Pitch::description = DOC("Extractor algorithm to compute pitch with several possible pitch algorithms, specifically targeted for real-time pitch detection on saxophone signals.");

void Audio2Pitch::configure() {

_sampleRate = parameter("sampleRate").toReal();
_frameSize = parameter("frameSize").toInt();
_minFrequency = parameter("minFrequency").toReal();
_maxFrequency = parameter("maxFrequency").toReal();
_pitchAlgorithmName = parameter("pitchAlgorithm").toString();
_loudnessAlgorithmName = parameter("loudnessAlgorithm").toString();

if (_maxFrequency > _sampleRate*0.5 ) {
throw EssentiaException("Audio2Pitch: Max frequency cannot be higher than Nyquist frequency");
}
if (_maxFrequency <= _minFrequency) {
throw EssentiaException("Audio2Pitch: Max frequency cannot be lower than min frequency");
}

if (_pitchAlgorithmName != "pyin_fft" && _pitchAlgorithmName != "pyin") {
E_INFO("Audio2Pitch: 'pitchAlgorithm' = "<<_pitchAlgorithmName<<"\n");
throw EssentiaException("Audio2Pitch: Bad 'pitchAlgorithm' parameter");
}
if (_pitchAlgorithmName == "pyin_fft") _isSpectral = true;
if (_pitchAlgorithmName == "pyin") _isSpectral = false;

if (_isSpectral) {
_windowing = AlgorithmFactory::create("Windowing");
_spectrum = AlgorithmFactory::create("Spectrum");
_pitchAlgorithm = AlgorithmFactory::create("PitchYinFFT");

_windowing->configure("type", "hann",
"size", _frameSize);
_spectrum->configure("size", _frameSize);
}
else {
_pitchAlgorithm = AlgorithmFactory::create("PitchYin");
}

if (_loudnessAlgorithmName == "loudness") {
_loudnessAlgorithm = AlgorithmFactory::create("Loudness");
}
else if (_loudnessAlgorithmName == "rms") {
_loudnessAlgorithm = AlgorithmFactory::create("RMS");
}
else {
E_INFO("Audio2Pitch: 'loudnessAlgorithm' = "<<_loudnessAlgorithmName<<"\n");
throw EssentiaException("Audio2Pitch: Bad 'loudnessAlgorithm' parameter");
}

_pitchAlgorithm->configure(INHERIT("frameSize"),
INHERIT("maxFrequency"),
INHERIT("minFrequency"),
INHERIT("sampleRate"));
}

void Audio2Pitch::compute() {
const std::vector<Real>& frame = _frame.get();
Real& pitch = _pitch.get();
Real& pitchConfidence = _pitchConfidence.get();
Real& loudness = _loudness.get();

if (frame.empty()) {
throw EssentiaException("Audio2Pitch: cannot compute the pitch of an empty frame");
}

if (frame.size() == 1) {
throw EssentiaException("Audio2Pitch: cannot compute the pitch of a frame of size 1");
}

if (_loudnessAlgorithmName == "loudness") {
_loudnessAlgorithm->input("signal").set(frame);
_loudnessAlgorithm->output("loudness").set(loudness);
}
else {
_loudnessAlgorithm->input("array").set(frame);
_loudnessAlgorithm->output("rms").set(loudness);
}
_loudnessAlgorithm->compute();

std::vector<Real> windowedFrame, spectrum;
if (_isSpectral) {
_windowing->input("frame").set(frame);
_windowing->output("frame").set(windowedFrame);
_windowing->compute();
_spectrum->input("frame").set(windowedFrame);
_spectrum->output("spectrum").set(spectrum);
_spectrum->compute();
_pitchAlgorithm->input("spectrum").set(spectrum);
}
else {
_pitchAlgorithm->input("signal").set(frame);
}

_pitchAlgorithm->output("pitch").set(pitch);
_pitchAlgorithm->output("pitchConfidence").set(pitchConfidence);
_pitchAlgorithm->compute();

}
69 changes: 69 additions & 0 deletions src/algorithms/tonal/audio2pitch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#ifndef ESSENTIA_AUDIO2PITCH_H
#define ESSENTIA_AUDIO2PITCH_H

#include "algorithmfactory.h"

namespace essentia {
namespace standard {

class Audio2Pitch : public Algorithm {

protected:
Input<std::vector<Real>> _frame;
Output<Real> _pitch;
Output<Real> _pitchConfidence;
Output<Real> _loudness;

Algorithm* _pitchAlgorithm;
Algorithm* _loudnessAlgorithm;
// auxiliary algorithms for FFT-based pitch
Algorithm* _windowing;
Algorithm* _spectrum;

Real _sampleRate;
int _frameSize;
Real _minFrequency;
Real _maxFrequency;
std::string _pitchAlgorithmName;
std::string _loudnessAlgorithmName;

bool _isSpectral;

public:
Audio2Pitch() {
declareInput(_frame, "frame", "the input frame to analyse");
declareOutput(_pitch, "pitch", "detected pitch in Hz");
declareOutput(_pitchConfidence, "pitchConfidence", "confidence of detected pitch from 0.0 - 1.0");
declareOutput(_loudness, "loudness", "detected loudness in dBFS");

_isSpectral = true;
}

~Audio2Pitch() {
if (_pitchAlgorithm) delete _pitchAlgorithm;
if (_loudnessAlgorithm) delete _loudnessAlgorithm;
if (_windowing) delete _windowing;
if (_spectrum) delete _spectrum;
}

void declareParameters() {
declareParameter("sampleRate", "sample rate of incoming audio frames", "[8000,inf)", 44100);
declareParameter("frameSize", "size of input frame in samples", "[1,inf)", 1024);
declareParameter("minFrequency", "minimum frequency to detect in Hz", "(20,20000)", 60.f);
declareParameter("maxFrequency", "maximum frequency to detect in Hz", "(20,20000)", 2300.f);
declareParameter("pitchAlgorithm", "pitch algorithm to use", "{pyin,pyin_fft}", "pyin_fft");
declareParameter("loudnessAlgorithm", "loudness algorithm to use", "{loudness,rms}", "rms");
}

void configure();
void compute();

static const char* name;
static const char* category;
static const char* description;
};

} // namespace standard
} // namespace essentia

#endif
46 changes: 46 additions & 0 deletions src/algorithms/tonal/pitch2midi.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#include "pitch2midi.h"

using namespace essentia;
using namespace standard;

const char* Pitch2Midi::name = "Pitch2Midi";
const char* Pitch2Midi::category = "Input/Output";
const char* Pitch2Midi::description = DOC("");

void Pitch2Midi::configure() {
_tuningFreq = parameter("tuningFreq").toReal();
_transposition = parameter("transposition").toInt();
}

void Pitch2Midi::compute() {
// get ref to input
const Real& pitch = _pitch.get();
const Real& loudness = _loudness.get();
// get refs to outputs
int& midiNoteNumber = _midiNoteNumber.get();
int& midiNoteNumberTransposed = _midiNoteNumberTransposed.get();
std::string& closestNoteName = _closestNoteName.get();
std::string& closestNoteNameTransposed = _closestNoteNameTransposed.get();
Real& closestPitch = _closestPitch.get();
Real& diff = _diff.get();
Real& cents = _cents.get();
int& velocity = _velocity.get();

Real _detectedPitch = pitch;
Real _detectedLoudness = loudness;

if (pitch <= 0) { _detectedPitch = 1e-05; }

int idx = getMIDINoteIndex(_detectedPitch);
int transposed_idx = idx + _transposition;
midiNoteNumber = getMidiNoteNumberFromNoteIndex(idx);
midiNoteNumberTransposed = getMidiNoteNumberFromNoteIndex(transposed_idx);
closestNoteName = getClosestNoteName(idx);
closestNoteNameTransposed = getClosestNoteName(transposed_idx);
closestPitch = getClosestPitch(idx);
diff = getDiff(closestPitch, _detectedPitch);
cents = getCents(closestPitch, _detectedPitch);

velocity = decibelsToVelocity(gainToDecibels(_detectedLoudness));
}

108 changes: 108 additions & 0 deletions src/algorithms/tonal/pitch2midi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#ifndef ESSENTIA_PITCH2MIDI_H
#define ESSENTIA_PITCH2MIDI_H

#include "algorithmfactory.h"

namespace essentia {
namespace standard {

class Pitch2Midi : public Algorithm {

protected:
Input<Real> _pitch;
Input<Real> _loudness;
Output<int> _midiNoteNumber;
Output<int> _midiNoteNumberTransposed;
Output<std::string> _closestNoteName;
Output<std::string> _closestNoteNameTransposed;
Output<Real> _closestPitch;
Output<Real> _diff;
Output<Real> _cents;
Output<int> _velocity;

Real _tuningFreq;
int _transposition;

const std::vector<std::string> ALL_NOTES { "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#" };
Real hearing_threshold {-96.0f}; // we consider the 16-bits dynamic range - 96dB(SPL)

int inline getMIDINoteIndex(Real& pitch) {
return (int) round(log2(pitch / _tuningFreq) * 12); // it should be added +69 to get midiNote
}

// convert pitch in MIDI note
int inline getMidiNoteNumberFromNoteIndex(int idx) {
return 69 + idx;
}

std::string inline getClosestNoteName(int i) {
int idx = abs(i) % 12;
int octave = 4 + floor((i + 9) / 12.f);
if (i < 0)
idx = abs(idx - 12) % 12; // keep the index in music notes array when i is negative
std::string closest_note = ALL_NOTES[idx] + std::to_string(octave);

// TODO: for the line above check this https://forum.juce.com/t/efficiency-of-string-concatenation-vs-getting-a-substring/18296
return closest_note;
}

Real inline getClosestPitch(int i) {
return _tuningFreq * powf(2, i / 12.f);
}

Real inline getDiff(Real& closest, Real& detected) {
return round(detected - closest);
}

// convert pitch in cents
Real inline getCents(Real& frequency_a, Real& frequency_b) {
return 1200 * log2(frequency_b / frequency_a);
}

// convert loudness [dB] in to velocity
int inline decibelsToVelocity (Real decibels) {
int velocity = 0;
if (decibels > hearing_threshold)
velocity = (int)((hearing_threshold - decibels) * 127 / hearing_threshold); // decibels should be negative
return velocity;
}

// convert gain to decibels
Real inline gainToDecibels(Real& gain){
return 20 * log10(gain);;
}

public:
Pitch2Midi() {
declareInput(_pitch, "pitch", "pitch given in Hz for conversion");
declareInput(_loudness, "loudness", "loudness given in dB for velocity conversion");
declareOutput(_midiNoteNumber, "midiNoteNumber", "midi note number, as integer, in range [0,127]");
declareOutput(_midiNoteNumberTransposed, "midiNoteNumberTransposed", "midi note number with applied transposition, as integer, in range [0,127]");
declareOutput(_closestNoteName, "closestNoteName", "pitch class and octave number to detected pitch, as string (e.g. A4)");
declareOutput(_closestNoteNameTransposed, "closestNoteNameTransposed", "pitch class and octave number to detected pitch, with applied transposition, as string (e.g. A4)");
declareOutput(_closestPitch, "closestPitch", "equal-tempered pitch closest to detected pitch, in Hz");
declareOutput(_diff, "diff", "difference between pitch and closestPitch, in Hz");
declareOutput(_cents, "cents", "difference between pitch and closestPitch, in cents (1/100 of a semitone)");
declareOutput(_velocity, "velocity", "control message over the feel and volume of MIDI notes, as integer, in range [0,127])");

}
~Pitch2Midi() {
}

void declareParameters() {
declareParameter("tuningFreq", "reference tuning frequency in Hz", "{432,440}", 440);
declareParameter("transposition", "amount of semitones to apply for transposed instruments", "(-69,50)", 0);
}

void configure();
void compute();

static const char* name;
static const char* category;
static const char* description;
};

} // namespace standard
} // namespace essentia

#endif

0 comments on commit 48231af

Please sign in to comment.