Add SpeakerDiarizationConfig, deprecate enable_speaker_diarization an…

…d diarization_speaker_count (via synth). (#8795)
googleapis · Jul 26, 2019 · da53518 · da53518
1 parent a46d4f0
commit da53518
Show file tree

Hide file tree

Showing 3 changed files with 227 additions and 64 deletions.
diff --git a/packages/google-cloud-speech/google/cloud/speech_v1p1beta1/proto/cloud_speech.proto b/packages/google-cloud-speech/google/cloud/speech_v1p1beta1/proto/cloud_speech.proto
@@ -306,19 +306,24 @@ message RecognitionConfig {
   // *Optional* If 'true', enables speaker detection for each recognized word in
   // the top alternative of the recognition result using a speaker_tag provided
   // in the WordInfo.
-  // Note: When this is true, we send all the words from the beginning of the
+  // Note: Use diarization_config instead.
+  bool enable_speaker_diarization = 16 [deprecated = true];
+
+  // *Optional*
+  // If set, specifies the estimated number of speakers in the conversation.
+  // Defaults to '2'. Ignored unless enable_speaker_diarization is set to true.
+  // Note: Use diarization_config instead.
+  int32 diarization_speaker_count = 17 [deprecated = true];
+
+  // *Optional* Config to enable speaker diarization and set additional
+  // parameters to make diarization better suited for your application.
+  // Note: When this is enabled, we send all the words from the beginning of the
   // audio for the top alternative in every consecutive STREAMING responses.
   // This is done in order to improve our speaker tags as our models learn to
   // identify the speakers in the conversation over time.
   // For non-streaming requests, the diarization results will be provided only
   // in the top alternative of the FINAL SpeechRecognitionResult.
-  bool enable_speaker_diarization = 16;
-
-  // *Optional*
-  // If set, specifies the estimated number of speakers in the conversation.
-  // If not set, defaults to '2'.
-  // Ignored unless enable_speaker_diarization is set to true."
-  int32 diarization_speaker_count = 17;
+  SpeakerDiarizationConfig diarization_config = 19;
 
   // *Optional* Metadata regarding this request.
   RecognitionMetadata metadata = 9;
@@ -368,6 +373,29 @@ message RecognitionConfig {
   bool use_enhanced = 14;
 }
 
+// *Optional* Config to enable speaker diarization.
+message SpeakerDiarizationConfig {
+  // *Optional* If 'true', enables speaker detection for each recognized word in
+  // the top alternative of the recognition result using a speaker_tag provided
+  // in the WordInfo.
+  bool enable_speaker_diarization = 1;
+
+  // Note: Set min_speaker_count = max_speaker_count to fix the number of
+  // speakers to be detected in the audio.
+
+  // *Optional*
+  // Minimum number of speakers in the conversation. This range gives you more
+  // flexibility by allowing the system to automatically determine the correct
+  // number of speakers. If not set, the default value is 2.
+  int32 min_speaker_count = 2;
+
+  // *Optional*
+  // Maximum number of speakers in the conversation. This range gives you more
+  // flexibility by allowing the system to automatically determine the correct
+  // number of speakers. If not set, the default value is 6.
+  int32 max_speaker_count = 3;
+}
+
 // Description of audio data to be recognized.
 message RecognitionMetadata {
   // Use case categories that the audio recognition request can be described