Add audio2pitch and pitch2midi algorithms.

MTG · Oct 20, 2023 · 48231af · 48231af
1 parent efc65d8
commit 48231af
Show file tree

Hide file tree

Showing 4 changed files with 328 additions and 0 deletions.
diff --git a/src/algorithms/tonal/audio2pitch.cpp b/src/algorithms/tonal/audio2pitch.cpp
@@ -0,0 +1,105 @@
+#include "audio2pitch.h"
+
+using namespace essentia;
+using namespace standard;
+
+const char* Audio2Pitch::name = "Audio2Pitch";
+const char* Audio2Pitch::category = "Pitch";
+const char* Audio2Pitch::description = DOC("Extractor algorithm to compute pitch with several possible pitch algorithms, specifically targeted for real-time pitch detection on saxophone signals.");
+
+void Audio2Pitch::configure() {
+
+  _sampleRate = parameter("sampleRate").toReal();
+  _frameSize = parameter("frameSize").toInt();
+  _minFrequency = parameter("minFrequency").toReal();
+  _maxFrequency = parameter("maxFrequency").toReal();
+  _pitchAlgorithmName = parameter("pitchAlgorithm").toString();
+  _loudnessAlgorithmName = parameter("loudnessAlgorithm").toString();
+
+  if (_maxFrequency > _sampleRate*0.5 ) {
+    throw EssentiaException("Audio2Pitch: Max frequency cannot be higher than Nyquist frequency");
+  }
+  if (_maxFrequency <= _minFrequency) {
+    throw EssentiaException("Audio2Pitch: Max frequency cannot be lower than min frequency");
+  }
+
+  if (_pitchAlgorithmName != "pyin_fft" && _pitchAlgorithmName != "pyin") {
+    E_INFO("Audio2Pitch: 'pitchAlgorithm' = "<<_pitchAlgorithmName<<"\n");
+    throw EssentiaException("Audio2Pitch: Bad 'pitchAlgorithm' parameter");
+  }
+  if (_pitchAlgorithmName == "pyin_fft") _isSpectral = true;
+  if (_pitchAlgorithmName == "pyin") _isSpectral = false;
+
+  if (_isSpectral) {
+    _windowing = AlgorithmFactory::create("Windowing");
+    _spectrum = AlgorithmFactory::create("Spectrum");
+    _pitchAlgorithm = AlgorithmFactory::create("PitchYinFFT");
+
+    _windowing->configure("type", "hann",
+                          "size", _frameSize);
+    _spectrum->configure("size", _frameSize);
+  }
+  else {
+    _pitchAlgorithm = AlgorithmFactory::create("PitchYin");
+  }
+
+  if (_loudnessAlgorithmName == "loudness") {
+    _loudnessAlgorithm = AlgorithmFactory::create("Loudness");
+  }
+  else if (_loudnessAlgorithmName == "rms") {
+    _loudnessAlgorithm = AlgorithmFactory::create("RMS");
+  }
+  else {
+    E_INFO("Audio2Pitch: 'loudnessAlgorithm' = "<<_loudnessAlgorithmName<<"\n");
+    throw EssentiaException("Audio2Pitch: Bad 'loudnessAlgorithm' parameter");
+  }
+
+  _pitchAlgorithm->configure(INHERIT("frameSize"),
+                             INHERIT("maxFrequency"),
+                             INHERIT("minFrequency"),
+                             INHERIT("sampleRate"));
+}
+
+void Audio2Pitch::compute() {
+  const std::vector<Real>& frame = _frame.get();
+  Real& pitch = _pitch.get();
+  Real& pitchConfidence = _pitchConfidence.get();
+  Real& loudness = _loudness.get();
+
+  if (frame.empty()) {
+    throw EssentiaException("Audio2Pitch: cannot compute the pitch of an empty frame");
+  }
+
+  if (frame.size() == 1) {
+    throw EssentiaException("Audio2Pitch: cannot compute the pitch of a frame of size 1");
+  }
+
+  if (_loudnessAlgorithmName == "loudness") {
+    _loudnessAlgorithm->input("signal").set(frame);
+    _loudnessAlgorithm->output("loudness").set(loudness);
+  }
+  else {
+    _loudnessAlgorithm->input("array").set(frame);
+    _loudnessAlgorithm->output("rms").set(loudness);
+  }
+  _loudnessAlgorithm->compute();
+
+  std::vector<Real> windowedFrame, spectrum;
+  if (_isSpectral) {
+    _windowing->input("frame").set(frame);
+    _windowing->output("frame").set(windowedFrame);
+    _windowing->compute();
+    _spectrum->input("frame").set(windowedFrame);
+    _spectrum->output("spectrum").set(spectrum);
+    _spectrum->compute();
+    _pitchAlgorithm->input("spectrum").set(spectrum);
+  }
+  else {
+    _pitchAlgorithm->input("signal").set(frame);
+  }
+
+  _pitchAlgorithm->output("pitch").set(pitch);
+  _pitchAlgorithm->output("pitchConfidence").set(pitchConfidence);
+  _pitchAlgorithm->compute();
+
+}
diff --git a/src/algorithms/tonal/audio2pitch.h b/src/algorithms/tonal/audio2pitch.h
@@ -0,0 +1,69 @@
+#ifndef ESSENTIA_AUDIO2PITCH_H
+#define ESSENTIA_AUDIO2PITCH_H
+
+#include "algorithmfactory.h"
+
+namespace essentia {
+namespace standard {
+
+class Audio2Pitch : public Algorithm {
+
+  protected: 
+    Input<std::vector<Real>> _frame;
+    Output<Real> _pitch;
+    Output<Real> _pitchConfidence;
+    Output<Real> _loudness;
+
+    Algorithm* _pitchAlgorithm;
+    Algorithm* _loudnessAlgorithm;
+    // auxiliary algorithms for FFT-based pitch
+    Algorithm* _windowing;
+    Algorithm* _spectrum;
+
+    Real _sampleRate;
+    int _frameSize;
+    Real _minFrequency;
+    Real _maxFrequency;
+    std::string _pitchAlgorithmName;
+    std::string _loudnessAlgorithmName;
+
+    bool _isSpectral;
+
+  public:
+    Audio2Pitch() {
+      declareInput(_frame, "frame", "the input frame to analyse");
+      declareOutput(_pitch, "pitch", "detected pitch in Hz");
+      declareOutput(_pitchConfidence, "pitchConfidence", "confidence of detected pitch from 0.0 - 1.0");
+      declareOutput(_loudness, "loudness", "detected loudness in dBFS");
+
+      _isSpectral = true;
+    }
+
+    ~Audio2Pitch() {
+      if (_pitchAlgorithm) delete _pitchAlgorithm;
+      if (_loudnessAlgorithm) delete _loudnessAlgorithm;
+      if (_windowing) delete _windowing;
+      if (_spectrum) delete _spectrum;
+    }
+
+    void declareParameters() {
+      declareParameter("sampleRate", "sample rate of incoming audio frames", "[8000,inf)", 44100);
+      declareParameter("frameSize", "size of input frame in samples", "[1,inf)", 1024);
+      declareParameter("minFrequency", "minimum frequency to detect in Hz", "(20,20000)", 60.f);
+      declareParameter("maxFrequency", "maximum frequency to detect in Hz", "(20,20000)", 2300.f);
+      declareParameter("pitchAlgorithm", "pitch algorithm to use", "{pyin,pyin_fft}", "pyin_fft");
+      declareParameter("loudnessAlgorithm", "loudness algorithm to use", "{loudness,rms}", "rms");
+    }
+
+    void configure();
+    void compute();
+
+    static const char* name;
+    static const char* category;
+    static const char* description;
+};
+
+} // namespace standard
+} // namespace essentia
+
+#endif
diff --git a/src/algorithms/tonal/pitch2midi.cpp b/src/algorithms/tonal/pitch2midi.cpp
@@ -0,0 +1,46 @@
+#include "pitch2midi.h"
+
+using namespace essentia;
+using namespace standard;
+
+const char* Pitch2Midi::name = "Pitch2Midi";
+const char* Pitch2Midi::category = "Input/Output";
+const char* Pitch2Midi::description = DOC("");
+
+void Pitch2Midi::configure() {
+  _tuningFreq = parameter("tuningFreq").toReal();
+  _transposition = parameter("transposition").toInt();
+}
+
+void Pitch2Midi::compute() {
+  // get ref to input
+  const Real& pitch = _pitch.get();
+  const Real& loudness = _loudness.get();
+  // get refs to outputs
+  int& midiNoteNumber = _midiNoteNumber.get();
+  int& midiNoteNumberTransposed = _midiNoteNumberTransposed.get();
+  std::string& closestNoteName = _closestNoteName.get();
+  std::string& closestNoteNameTransposed = _closestNoteNameTransposed.get();
+  Real& closestPitch = _closestPitch.get();
+  Real& diff = _diff.get();
+  Real& cents = _cents.get();
+  int& velocity = _velocity.get();
+
+  Real _detectedPitch = pitch;
+  Real _detectedLoudness = loudness;
+
+  if (pitch <= 0) { _detectedPitch = 1e-05; }
+
+  int idx = getMIDINoteIndex(_detectedPitch);
+  int transposed_idx = idx + _transposition;
+  midiNoteNumber = getMidiNoteNumberFromNoteIndex(idx);
+  midiNoteNumberTransposed = getMidiNoteNumberFromNoteIndex(transposed_idx);
+  closestNoteName = getClosestNoteName(idx);
+  closestNoteNameTransposed = getClosestNoteName(transposed_idx);
+  closestPitch = getClosestPitch(idx);
+  diff = getDiff(closestPitch, _detectedPitch);
+  cents = getCents(closestPitch, _detectedPitch);
+
+  velocity = decibelsToVelocity(gainToDecibels(_detectedLoudness));
+}
+
diff --git a/src/algorithms/tonal/pitch2midi.h b/src/algorithms/tonal/pitch2midi.h
@@ -0,0 +1,108 @@
+#ifndef ESSENTIA_PITCH2MIDI_H
+#define ESSENTIA_PITCH2MIDI_H
+
+#include "algorithmfactory.h"
+
+namespace essentia {
+namespace standard {
+
+class Pitch2Midi : public Algorithm {
+
+  protected:
+    Input<Real> _pitch;
+    Input<Real> _loudness;
+    Output<int> _midiNoteNumber;
+    Output<int> _midiNoteNumberTransposed;
+    Output<std::string> _closestNoteName;
+    Output<std::string> _closestNoteNameTransposed;
+    Output<Real> _closestPitch;
+    Output<Real> _diff;
+    Output<Real> _cents;
+    Output<int> _velocity;
+
+    Real _tuningFreq;
+    int _transposition;
+
+    const std::vector<std::string> ALL_NOTES { "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#" };
+    Real hearing_threshold {-96.0f};  // we consider the 16-bits dynamic range - 96dB(SPL)
+
+    int inline getMIDINoteIndex(Real& pitch) {
+      return (int) round(log2(pitch / _tuningFreq) * 12); // it should be added +69 to get midiNote
+    }
+
+    // convert pitch in MIDI note
+    int inline getMidiNoteNumberFromNoteIndex(int idx) {
+      return 69 + idx;
+    }
+
+    std::string inline getClosestNoteName(int i) {
+      int idx = abs(i) % 12;
+      int octave = 4 + floor((i + 9) / 12.f);
+      if (i < 0)
+          idx = abs(idx - 12) % 12;   // keep the index in music notes array when i is negative
+      std::string closest_note = ALL_NOTES[idx] + std::to_string(octave);
+
+      // TODO: for the line above check this https://forum.juce.com/t/efficiency-of-string-concatenation-vs-getting-a-substring/18296
+      return closest_note;
+    }
+
+    Real inline getClosestPitch(int i) {
+      return _tuningFreq * powf(2, i / 12.f);
+    }
+
+    Real inline getDiff(Real& closest, Real& detected) {
+      return round(detected - closest);
+    }
+
+    // convert pitch in cents
+    Real inline getCents(Real& frequency_a, Real& frequency_b) {
+      return 1200 * log2(frequency_b / frequency_a);
+    }
+
+    // convert loudness [dB] in to velocity
+    int inline decibelsToVelocity (Real decibels) {
+        int velocity = 0;
+        if (decibels > hearing_threshold)
+            velocity = (int)((hearing_threshold - decibels) * 127 / hearing_threshold);  // decibels should be negative
+        return velocity;
+    }
+
+    // convert gain to decibels
+    Real inline gainToDecibels(Real& gain){
+        return 20 * log10(gain);;
+    }
+
+  public:
+    Pitch2Midi() {
+      declareInput(_pitch, "pitch", "pitch given in Hz for conversion");
+      declareInput(_loudness, "loudness", "loudness given in dB for velocity conversion");
+      declareOutput(_midiNoteNumber, "midiNoteNumber", "midi note number, as integer, in range [0,127]");
+      declareOutput(_midiNoteNumberTransposed, "midiNoteNumberTransposed", "midi note number with applied transposition, as integer, in range [0,127]");
+      declareOutput(_closestNoteName, "closestNoteName", "pitch class and octave number to detected pitch, as string (e.g. A4)");
+      declareOutput(_closestNoteNameTransposed, "closestNoteNameTransposed", "pitch class and octave number to detected pitch, with applied transposition, as string (e.g. A4)");
+      declareOutput(_closestPitch, "closestPitch", "equal-tempered pitch closest to detected pitch, in Hz");
+      declareOutput(_diff, "diff", "difference between pitch and closestPitch, in Hz");
+      declareOutput(_cents, "cents", "difference between pitch and closestPitch, in cents (1/100 of a semitone)");
+      declareOutput(_velocity, "velocity", "control message over the feel and volume of MIDI notes, as integer, in range [0,127])");
+
+    }
+    ~Pitch2Midi() {
+    }
+
+    void declareParameters() {
+      declareParameter("tuningFreq", "reference tuning frequency in Hz", "{432,440}", 440);
+      declareParameter("transposition", "amount of semitones to apply for transposed instruments", "(-69,50)", 0);
+    }
+
+    void configure();
+    void compute();
+
+    static const char* name;
+    static const char* category;
+    static const char* description;
+};
+
+} // namespace standard
+} // namespace essentia
+
+#endif