-
Notifications
You must be signed in to change notification settings - Fork 525
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add audio2pitch and pitch2midi algorithms.
- Loading branch information
Showing
4 changed files
with
328 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
#include "audio2pitch.h" | ||
|
||
using namespace essentia; | ||
using namespace standard; | ||
|
||
const char* Audio2Pitch::name = "Audio2Pitch"; | ||
const char* Audio2Pitch::category = "Pitch"; | ||
const char* Audio2Pitch::description = DOC("Extractor algorithm to compute pitch with several possible pitch algorithms, specifically targeted for real-time pitch detection on saxophone signals."); | ||
|
||
void Audio2Pitch::configure() { | ||
|
||
_sampleRate = parameter("sampleRate").toReal(); | ||
_frameSize = parameter("frameSize").toInt(); | ||
_minFrequency = parameter("minFrequency").toReal(); | ||
_maxFrequency = parameter("maxFrequency").toReal(); | ||
_pitchAlgorithmName = parameter("pitchAlgorithm").toString(); | ||
_loudnessAlgorithmName = parameter("loudnessAlgorithm").toString(); | ||
|
||
if (_maxFrequency > _sampleRate*0.5 ) { | ||
throw EssentiaException("Audio2Pitch: Max frequency cannot be higher than Nyquist frequency"); | ||
} | ||
if (_maxFrequency <= _minFrequency) { | ||
throw EssentiaException("Audio2Pitch: Max frequency cannot be lower than min frequency"); | ||
} | ||
|
||
if (_pitchAlgorithmName != "pyin_fft" && _pitchAlgorithmName != "pyin") { | ||
E_INFO("Audio2Pitch: 'pitchAlgorithm' = "<<_pitchAlgorithmName<<"\n"); | ||
throw EssentiaException("Audio2Pitch: Bad 'pitchAlgorithm' parameter"); | ||
} | ||
if (_pitchAlgorithmName == "pyin_fft") _isSpectral = true; | ||
if (_pitchAlgorithmName == "pyin") _isSpectral = false; | ||
|
||
if (_isSpectral) { | ||
_windowing = AlgorithmFactory::create("Windowing"); | ||
_spectrum = AlgorithmFactory::create("Spectrum"); | ||
_pitchAlgorithm = AlgorithmFactory::create("PitchYinFFT"); | ||
|
||
_windowing->configure("type", "hann", | ||
"size", _frameSize); | ||
_spectrum->configure("size", _frameSize); | ||
} | ||
else { | ||
_pitchAlgorithm = AlgorithmFactory::create("PitchYin"); | ||
} | ||
|
||
if (_loudnessAlgorithmName == "loudness") { | ||
_loudnessAlgorithm = AlgorithmFactory::create("Loudness"); | ||
} | ||
else if (_loudnessAlgorithmName == "rms") { | ||
_loudnessAlgorithm = AlgorithmFactory::create("RMS"); | ||
} | ||
else { | ||
E_INFO("Audio2Pitch: 'loudnessAlgorithm' = "<<_loudnessAlgorithmName<<"\n"); | ||
throw EssentiaException("Audio2Pitch: Bad 'loudnessAlgorithm' parameter"); | ||
} | ||
|
||
_pitchAlgorithm->configure(INHERIT("frameSize"), | ||
INHERIT("maxFrequency"), | ||
INHERIT("minFrequency"), | ||
INHERIT("sampleRate")); | ||
} | ||
|
||
void Audio2Pitch::compute() { | ||
const std::vector<Real>& frame = _frame.get(); | ||
Real& pitch = _pitch.get(); | ||
Real& pitchConfidence = _pitchConfidence.get(); | ||
Real& loudness = _loudness.get(); | ||
|
||
if (frame.empty()) { | ||
throw EssentiaException("Audio2Pitch: cannot compute the pitch of an empty frame"); | ||
} | ||
|
||
if (frame.size() == 1) { | ||
throw EssentiaException("Audio2Pitch: cannot compute the pitch of a frame of size 1"); | ||
} | ||
|
||
if (_loudnessAlgorithmName == "loudness") { | ||
_loudnessAlgorithm->input("signal").set(frame); | ||
_loudnessAlgorithm->output("loudness").set(loudness); | ||
} | ||
else { | ||
_loudnessAlgorithm->input("array").set(frame); | ||
_loudnessAlgorithm->output("rms").set(loudness); | ||
} | ||
_loudnessAlgorithm->compute(); | ||
|
||
std::vector<Real> windowedFrame, spectrum; | ||
if (_isSpectral) { | ||
_windowing->input("frame").set(frame); | ||
_windowing->output("frame").set(windowedFrame); | ||
_windowing->compute(); | ||
_spectrum->input("frame").set(windowedFrame); | ||
_spectrum->output("spectrum").set(spectrum); | ||
_spectrum->compute(); | ||
_pitchAlgorithm->input("spectrum").set(spectrum); | ||
} | ||
else { | ||
_pitchAlgorithm->input("signal").set(frame); | ||
} | ||
|
||
_pitchAlgorithm->output("pitch").set(pitch); | ||
_pitchAlgorithm->output("pitchConfidence").set(pitchConfidence); | ||
_pitchAlgorithm->compute(); | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#ifndef ESSENTIA_AUDIO2PITCH_H | ||
#define ESSENTIA_AUDIO2PITCH_H | ||
|
||
#include "algorithmfactory.h" | ||
|
||
namespace essentia { | ||
namespace standard { | ||
|
||
class Audio2Pitch : public Algorithm { | ||
|
||
protected: | ||
Input<std::vector<Real>> _frame; | ||
Output<Real> _pitch; | ||
Output<Real> _pitchConfidence; | ||
Output<Real> _loudness; | ||
|
||
Algorithm* _pitchAlgorithm; | ||
Algorithm* _loudnessAlgorithm; | ||
// auxiliary algorithms for FFT-based pitch | ||
Algorithm* _windowing; | ||
Algorithm* _spectrum; | ||
|
||
Real _sampleRate; | ||
int _frameSize; | ||
Real _minFrequency; | ||
Real _maxFrequency; | ||
std::string _pitchAlgorithmName; | ||
std::string _loudnessAlgorithmName; | ||
|
||
bool _isSpectral; | ||
|
||
public: | ||
Audio2Pitch() { | ||
declareInput(_frame, "frame", "the input frame to analyse"); | ||
declareOutput(_pitch, "pitch", "detected pitch in Hz"); | ||
declareOutput(_pitchConfidence, "pitchConfidence", "confidence of detected pitch from 0.0 - 1.0"); | ||
declareOutput(_loudness, "loudness", "detected loudness in dBFS"); | ||
|
||
_isSpectral = true; | ||
} | ||
|
||
~Audio2Pitch() { | ||
if (_pitchAlgorithm) delete _pitchAlgorithm; | ||
if (_loudnessAlgorithm) delete _loudnessAlgorithm; | ||
if (_windowing) delete _windowing; | ||
if (_spectrum) delete _spectrum; | ||
} | ||
|
||
void declareParameters() { | ||
declareParameter("sampleRate", "sample rate of incoming audio frames", "[8000,inf)", 44100); | ||
declareParameter("frameSize", "size of input frame in samples", "[1,inf)", 1024); | ||
declareParameter("minFrequency", "minimum frequency to detect in Hz", "(20,20000)", 60.f); | ||
declareParameter("maxFrequency", "maximum frequency to detect in Hz", "(20,20000)", 2300.f); | ||
declareParameter("pitchAlgorithm", "pitch algorithm to use", "{pyin,pyin_fft}", "pyin_fft"); | ||
declareParameter("loudnessAlgorithm", "loudness algorithm to use", "{loudness,rms}", "rms"); | ||
} | ||
|
||
void configure(); | ||
void compute(); | ||
|
||
static const char* name; | ||
static const char* category; | ||
static const char* description; | ||
}; | ||
|
||
} // namespace standard | ||
} // namespace essentia | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#include "pitch2midi.h" | ||
|
||
using namespace essentia; | ||
using namespace standard; | ||
|
||
const char* Pitch2Midi::name = "Pitch2Midi"; | ||
const char* Pitch2Midi::category = "Input/Output"; | ||
const char* Pitch2Midi::description = DOC(""); | ||
|
||
void Pitch2Midi::configure() { | ||
_tuningFreq = parameter("tuningFreq").toReal(); | ||
_transposition = parameter("transposition").toInt(); | ||
} | ||
|
||
void Pitch2Midi::compute() { | ||
// get ref to input | ||
const Real& pitch = _pitch.get(); | ||
const Real& loudness = _loudness.get(); | ||
// get refs to outputs | ||
int& midiNoteNumber = _midiNoteNumber.get(); | ||
int& midiNoteNumberTransposed = _midiNoteNumberTransposed.get(); | ||
std::string& closestNoteName = _closestNoteName.get(); | ||
std::string& closestNoteNameTransposed = _closestNoteNameTransposed.get(); | ||
Real& closestPitch = _closestPitch.get(); | ||
Real& diff = _diff.get(); | ||
Real& cents = _cents.get(); | ||
int& velocity = _velocity.get(); | ||
|
||
Real _detectedPitch = pitch; | ||
Real _detectedLoudness = loudness; | ||
|
||
if (pitch <= 0) { _detectedPitch = 1e-05; } | ||
|
||
int idx = getMIDINoteIndex(_detectedPitch); | ||
int transposed_idx = idx + _transposition; | ||
midiNoteNumber = getMidiNoteNumberFromNoteIndex(idx); | ||
midiNoteNumberTransposed = getMidiNoteNumberFromNoteIndex(transposed_idx); | ||
closestNoteName = getClosestNoteName(idx); | ||
closestNoteNameTransposed = getClosestNoteName(transposed_idx); | ||
closestPitch = getClosestPitch(idx); | ||
diff = getDiff(closestPitch, _detectedPitch); | ||
cents = getCents(closestPitch, _detectedPitch); | ||
|
||
velocity = decibelsToVelocity(gainToDecibels(_detectedLoudness)); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
#ifndef ESSENTIA_PITCH2MIDI_H | ||
#define ESSENTIA_PITCH2MIDI_H | ||
|
||
#include "algorithmfactory.h" | ||
|
||
namespace essentia { | ||
namespace standard { | ||
|
||
class Pitch2Midi : public Algorithm { | ||
|
||
protected: | ||
Input<Real> _pitch; | ||
Input<Real> _loudness; | ||
Output<int> _midiNoteNumber; | ||
Output<int> _midiNoteNumberTransposed; | ||
Output<std::string> _closestNoteName; | ||
Output<std::string> _closestNoteNameTransposed; | ||
Output<Real> _closestPitch; | ||
Output<Real> _diff; | ||
Output<Real> _cents; | ||
Output<int> _velocity; | ||
|
||
Real _tuningFreq; | ||
int _transposition; | ||
|
||
const std::vector<std::string> ALL_NOTES { "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#" }; | ||
Real hearing_threshold {-96.0f}; // we consider the 16-bits dynamic range - 96dB(SPL) | ||
|
||
int inline getMIDINoteIndex(Real& pitch) { | ||
return (int) round(log2(pitch / _tuningFreq) * 12); // it should be added +69 to get midiNote | ||
} | ||
|
||
// convert pitch in MIDI note | ||
int inline getMidiNoteNumberFromNoteIndex(int idx) { | ||
return 69 + idx; | ||
} | ||
|
||
std::string inline getClosestNoteName(int i) { | ||
int idx = abs(i) % 12; | ||
int octave = 4 + floor((i + 9) / 12.f); | ||
if (i < 0) | ||
idx = abs(idx - 12) % 12; // keep the index in music notes array when i is negative | ||
std::string closest_note = ALL_NOTES[idx] + std::to_string(octave); | ||
|
||
// TODO: for the line above check this https://forum.juce.com/t/efficiency-of-string-concatenation-vs-getting-a-substring/18296 | ||
return closest_note; | ||
} | ||
|
||
Real inline getClosestPitch(int i) { | ||
return _tuningFreq * powf(2, i / 12.f); | ||
} | ||
|
||
Real inline getDiff(Real& closest, Real& detected) { | ||
return round(detected - closest); | ||
} | ||
|
||
// convert pitch in cents | ||
Real inline getCents(Real& frequency_a, Real& frequency_b) { | ||
return 1200 * log2(frequency_b / frequency_a); | ||
} | ||
|
||
// convert loudness [dB] in to velocity | ||
int inline decibelsToVelocity (Real decibels) { | ||
int velocity = 0; | ||
if (decibels > hearing_threshold) | ||
velocity = (int)((hearing_threshold - decibels) * 127 / hearing_threshold); // decibels should be negative | ||
return velocity; | ||
} | ||
|
||
// convert gain to decibels | ||
Real inline gainToDecibels(Real& gain){ | ||
return 20 * log10(gain);; | ||
} | ||
|
||
public: | ||
Pitch2Midi() { | ||
declareInput(_pitch, "pitch", "pitch given in Hz for conversion"); | ||
declareInput(_loudness, "loudness", "loudness given in dB for velocity conversion"); | ||
declareOutput(_midiNoteNumber, "midiNoteNumber", "midi note number, as integer, in range [0,127]"); | ||
declareOutput(_midiNoteNumberTransposed, "midiNoteNumberTransposed", "midi note number with applied transposition, as integer, in range [0,127]"); | ||
declareOutput(_closestNoteName, "closestNoteName", "pitch class and octave number to detected pitch, as string (e.g. A4)"); | ||
declareOutput(_closestNoteNameTransposed, "closestNoteNameTransposed", "pitch class and octave number to detected pitch, with applied transposition, as string (e.g. A4)"); | ||
declareOutput(_closestPitch, "closestPitch", "equal-tempered pitch closest to detected pitch, in Hz"); | ||
declareOutput(_diff, "diff", "difference between pitch and closestPitch, in Hz"); | ||
declareOutput(_cents, "cents", "difference between pitch and closestPitch, in cents (1/100 of a semitone)"); | ||
declareOutput(_velocity, "velocity", "control message over the feel and volume of MIDI notes, as integer, in range [0,127])"); | ||
|
||
} | ||
~Pitch2Midi() { | ||
} | ||
|
||
void declareParameters() { | ||
declareParameter("tuningFreq", "reference tuning frequency in Hz", "{432,440}", 440); | ||
declareParameter("transposition", "amount of semitones to apply for transposed instruments", "(-69,50)", 0); | ||
} | ||
|
||
void configure(); | ||
void compute(); | ||
|
||
static const char* name; | ||
static const char* category; | ||
static const char* description; | ||
}; | ||
|
||
} // namespace standard | ||
} // namespace essentia | ||
|
||
#endif |