Skip to content

Commit

Permalink
Add SpeakerDiarizationConfig, deprecate enable_speaker_diarization an…
Browse files Browse the repository at this point in the history
…d diarization_speaker_count (via synth). (#8795)
  • Loading branch information
yoshi-automation authored and busunkim96 committed Jul 26, 2019
1 parent a46d4f0 commit da53518
Show file tree
Hide file tree
Showing 3 changed files with 227 additions and 64 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -306,19 +306,24 @@ message RecognitionConfig {
// *Optional* If 'true', enables speaker detection for each recognized word in
// the top alternative of the recognition result using a speaker_tag provided
// in the WordInfo.
// Note: When this is true, we send all the words from the beginning of the
// Note: Use diarization_config instead.
bool enable_speaker_diarization = 16 [deprecated = true];

// *Optional*
// If set, specifies the estimated number of speakers in the conversation.
// Defaults to '2'. Ignored unless enable_speaker_diarization is set to true.
// Note: Use diarization_config instead.
int32 diarization_speaker_count = 17 [deprecated = true];

// *Optional* Config to enable speaker diarization and set additional
// parameters to make diarization better suited for your application.
// Note: When this is enabled, we send all the words from the beginning of the
// audio for the top alternative in every consecutive STREAMING responses.
// This is done in order to improve our speaker tags as our models learn to
// identify the speakers in the conversation over time.
// For non-streaming requests, the diarization results will be provided only
// in the top alternative of the FINAL SpeechRecognitionResult.
bool enable_speaker_diarization = 16;

// *Optional*
// If set, specifies the estimated number of speakers in the conversation.
// If not set, defaults to '2'.
// Ignored unless enable_speaker_diarization is set to true."
int32 diarization_speaker_count = 17;
SpeakerDiarizationConfig diarization_config = 19;

// *Optional* Metadata regarding this request.
RecognitionMetadata metadata = 9;
Expand Down Expand Up @@ -368,6 +373,29 @@ message RecognitionConfig {
bool use_enhanced = 14;
}

// *Optional* Config to enable speaker diarization.
message SpeakerDiarizationConfig {
// *Optional* If 'true', enables speaker detection for each recognized word in
// the top alternative of the recognition result using a speaker_tag provided
// in the WordInfo.
bool enable_speaker_diarization = 1;

// Note: Set min_speaker_count = max_speaker_count to fix the number of
// speakers to be detected in the audio.

// *Optional*
// Minimum number of speakers in the conversation. This range gives you more
// flexibility by allowing the system to automatically determine the correct
// number of speakers. If not set, the default value is 2.
int32 min_speaker_count = 2;

// *Optional*
// Maximum number of speakers in the conversation. This range gives you more
// flexibility by allowing the system to automatically determine the correct
// number of speakers. If not set, the default value is 6.
int32 max_speaker_count = 3;
}

// Description of audio data to be recognized.
message RecognitionMetadata {
// Use case categories that the audio recognition request can be described
Expand Down
Loading

0 comments on commit da53518

Please sign in to comment.