Skip to content

Commit

Permalink
AI-Talk: Refine the display of logs. v5.14.17
Browse files Browse the repository at this point in the history
  • Loading branch information
winlinvip committed Apr 19, 2024
1 parent 06d06d7 commit 28d1fef
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 79 deletions.
1 change: 1 addition & 0 deletions DEVELOPER.md
Original file line number Diff line number Diff line change
Expand Up @@ -1162,6 +1162,7 @@ The following are the update records for the Oryx server.
* Use fastfail for test and utest. v5.14.15
* Rename project to Oryx.[v5.14.15](https://github.com/ossrs/oryx/releases/tag/v5.14.15)
* API: Support kickoff stream by name. v5.14.16
* AI-Talk: Refine the delay of ASR to 3s. v5.14.17
* v5.13:
* Fix bug for vlive and transcript. v5.13.1
* Support AWS Lightsail install script. v5.13.2
Expand Down
10 changes: 5 additions & 5 deletions platform/ai-talk.go
Original file line number Diff line number Diff line change
Expand Up @@ -1367,9 +1367,6 @@ func (v *TTSWorker) SubmitSegment(ctx context.Context, stage *Stage, sreq *Stage
}()
}

// Merge ASR text of conversations, which is small duration audio segment.
const mergeConversations = 3

func handleAITalkService(ctx context.Context, handler *http.ServeMux) error {
// TODO: FIXME: Should use relative path, never expose absolute path to client.
aiTalkWorkDir = path.Join(conf.Pwd, "containers/data/ai-talk")
Expand Down Expand Up @@ -1608,6 +1605,7 @@ func handleAITalkService(ctx context.Context, handler *http.ServeMux) error {
var roomUUID, roomToken string
var userMayInput float64
var audioBase64Data, textMessage string
var mergeMessages int
if err := ParseBody(ctx, r.Body, &struct {
Token *string `json:"token"`
RoomUUID *string `json:"room"`
Expand All @@ -1618,10 +1616,12 @@ func handleAITalkService(ctx context.Context, handler *http.ServeMux) error {
UserMayInput *float64 `json:"umi"`
AudioData *string `json:"audio"`
TextMessage *string `json:"text"`
// Merge ASR text of conversations, which is small duration audio segment.
MergeMessages *int `json:"mergeMessages"`
}{
Token: &token, StageUUID: &sid, UserID: &userID, RequestUUID: &rid,
UserMayInput: &userMayInput, TextMessage: &textMessage, AudioData: &audioBase64Data,
RoomUUID: &roomUUID, RoomToken: &roomToken,
RoomUUID: &roomUUID, RoomToken: &roomToken, MergeMessages: &mergeMessages,
}); err != nil {
return errors.Wrapf(err, "parse body")
}
Expand Down Expand Up @@ -1726,7 +1726,7 @@ func handleAITalkService(ctx context.Context, handler *http.ServeMux) error {

// If merge conversation to next one, we do not submit to chat and post processing.
conversations := stage.queryPreviousNotMergedRequests(sreq)
mergeToNextConversation := len(conversations) < mergeConversations
mergeToNextConversation := mergeMessages > 0 && len(conversations) < mergeMessages
if !mergeToNextConversation {
sreq.merged, user.previousAsrText = true, ""
// Generate the merged text for chat input.
Expand Down
70 changes: 43 additions & 27 deletions ui/src/components/AIDictation.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export function AITalkDictationPanel({roomUUID, roomToken, username, userLanguag
const timeoutWaitForMicrophoneToClose = 300;
const timeoutWaitForLastVoice = 200;
const maxSegmentTime = 3 * 1000; // in ms.
const mergeMessages = 3;

// The player ref, to access the audio player.
const playerRef = React.useRef(null);
Expand Down Expand Up @@ -63,21 +64,32 @@ export function AITalkDictationPanel({roomUUID, roomToken, username, userLanguag
setErrorLogs(ref.current.errorLogs);
}, [setErrorLogs, ref]);

const traceLog = React.useCallback((role, msg, variant, ignoreMerge) => {
setTraceCount(++ref.current.traceCount);

// Merge to last log with the same role.
if (ref.current.traceLogs.length > 0 && !ignoreMerge) {
const last = ref.current.traceLogs[ref.current.traceLogs.length - 1];
if (last.role === role) {
last.msg = `${last.msg}${msg}`;
setTraceLogs([...ref.current.traceLogs]);
return;
const traceLog = React.useCallback((mid, rid, role, msg, variant) => {
// Find the last matched log with the same rid.
const lastMatched = ref.current.traceLogs.find((log) => log.rid === rid);

// Create a new message object.
const newMessage = {id: mid, rid, role, msg, variant};

if (!lastMatched) {
// Create a new log if not found.
ref.current.traceLogs = [...ref.current.traceLogs, {
rid, messages: [newMessage],
}];
} else {
// Find the last matched message with the same role.
const lastMessage = lastMatched.messages.find((m) => m.role === role);

if (!lastMessage) {
// Create a new message if not found.
lastMatched.messages = [...lastMatched.messages, newMessage];
} else {
// Merge to last log with the same role.
lastMessage.msg = `${lastMessage.msg}${msg}`;
}
}

const rid = `id-${Math.random().toString(16).slice(-4)}${new Date().getTime().toString(16).slice(-4)}`;
ref.current.traceLogs = [...ref.current.traceLogs, {id: rid, role, msg, variant}];
setTraceCount(++ref.current.traceCount);
setTraceLogs(ref.current.traceLogs);
}, [setTraceLogs, ref, setTraceCount]);

Expand Down Expand Up @@ -253,7 +265,7 @@ export function AITalkDictationPanel({roomUUID, roomToken, username, userLanguag

axios.post('/terraform/v1/ai-talk/stage/upload', {
room: roomUUID, roomToken, sid: stageUUID, rid: requestUUID, userId: userID,
umi: userMayInput, audio: audioBase64Data,
umi: userMayInput, audio: audioBase64Data, mergeMessages,
}, {
headers: Token.loadBearerHeader(),
}).then(res => {
Expand Down Expand Up @@ -374,7 +386,7 @@ export function AITalkDictationPanel({roomUUID, roomToken, username, userLanguag

const ts = new Date().toISOString().split('T')[1].split('Z')[0];
console.log(`${ts} Event: Recorder stopped, chunks=${artifact.audioChunks.length}, duration=${artifact.duration()}ms`);

resolve();
});

Expand Down Expand Up @@ -450,7 +462,7 @@ export function AITalkDictationPanel({roomUUID, roomToken, username, userLanguag
await new Promise((resolve, reject) => {
axios.post('/terraform/v1/ai-talk/stage/upload', {
room: roomUUID, roomToken, sid: stageUUID, rid: requestUUID, userId: userID,
text: text,
text: text, mergeMessages,
}, {
headers: Token.loadBearerHeader(),
}).then(res => {
Expand Down Expand Up @@ -512,12 +524,12 @@ export function AITalkDictationPanel({roomUUID, roomToken, username, userLanguag
for (let i = 0; i < msgs.length; i++) {
const msg = msgs[i];
if (msg.role === 'user') {
traceLog(msg.username || 'You', msg.msg, 'primary', true);
traceLog(msg.mid, msg.rid, msg.username || 'You', msg.msg, 'primary');
continue;
}

const audioSegmentUUID = msg.asid;
traceLog(msg.username || 'Bot', msg.msg, 'success', msg.sentence);
traceLog(msg.mid, msg.rid, msg.username || 'Bot', msg.msg, 'success');

// For dictation pattern, we always ignore TTS audio files.
// No audio file, skip it.
Expand Down Expand Up @@ -902,11 +914,13 @@ function AITalkTraceLogPC({traceLogs, traceCount, children, roomUUID, roomToken}
<div className='ai-talk-msgs-pc' ref={logPanelRef}>
{children}
{traceLogs.map((log) => {
return (
<Alert key={log.id} variant={log.variant} className='ai-talk-msgs-card'>
{log.role}: {log.msg}
</Alert>
);
return <>{log.messages.map((msg) => {
return (
<Alert key={msg.id} variant={msg.variant} className='ai-talk-msgs-card'>
{msg.role}: {msg.msg}
</Alert>
);
})}</>;
})}
</div>
</Card.Body>
Expand All @@ -927,11 +941,13 @@ function AITalkTraceLogMobile({traceLogs, traceCount}) {
return (
<div className='ai-talk-msgs-dictation-mobile' ref={logPanelRef}>
{traceLogs.map((log) => {
return (
<Alert key={log.id} variant={log.variant} className='ai-talk-msgs-card'>
{log.role}: {log.msg}
</Alert>
);
return <>{log.messages.map((msg) => {
return (
<Alert key={msg.id} variant={msg.variant} className='ai-talk-msgs-card'>
{msg.role}: {msg.msg}
</Alert>
);
})}</>;
})}
</div>
);
Expand Down
122 changes: 75 additions & 47 deletions ui/src/components/AITalk.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ export function AITalkAssistantPanel({roomUUID, roomToken, username, userLanguag

// The timeout in milliseconds.
const timeoutForMicrophoneTestToRun = 50;
const timeoutWaitForMicrophoneToClose = 900;
const timeoutWaitForLastVoice = 700;
const timeoutWaitForMicrophoneToClose = 300;
const timeoutWaitForLastVoice = 200;
const durationRequiredUserInput = 600;

// The player ref, to access the audio player.
Expand Down Expand Up @@ -68,21 +68,32 @@ export function AITalkAssistantPanel({roomUUID, roomToken, username, userLanguag
setErrorLogs(ref.current.errorLogs);
}, [setErrorLogs, ref]);

const traceLog = React.useCallback((role, msg, variant, ignoreMerge) => {
setTraceCount(++ref.current.traceCount);

// Merge to last log with the same role.
if (ref.current.traceLogs.length > 0 && !ignoreMerge) {
const last = ref.current.traceLogs[ref.current.traceLogs.length - 1];
if (last.role === role) {
last.msg = `${last.msg}${msg}`;
setTraceLogs([...ref.current.traceLogs]);
return;
const traceLog = React.useCallback((mid, rid, role, msg, variant) => {
// Find the last matched log with the same rid.
const lastMatched = ref.current.traceLogs.find((log) => log.rid === rid);

// Create a new message object.
const newMessage = {id: mid, rid, role, msg, variant};

if (!lastMatched) {
// Create a new log if not found.
ref.current.traceLogs = [...ref.current.traceLogs, {
rid, messages: [newMessage],
}];
} else {
// Find the last matched message with the same role.
const lastMessage = lastMatched.messages.find((m) => m.role === role);

if (!lastMessage) {
// Create a new message if not found.
lastMatched.messages = [...lastMatched.messages, newMessage];
} else {
// Merge to last log with the same role.
lastMessage.msg = `${lastMessage.msg}${msg}`;
}
}

const rid = `id-${Math.random().toString(16).slice(-4)}${new Date().getTime().toString(16).slice(-4)}`;
ref.current.traceLogs = [...ref.current.traceLogs, {id: rid, role, msg, variant}];
setTraceCount(++ref.current.traceCount);
setTraceLogs(ref.current.traceLogs);
}, [setTraceLogs, ref, setTraceCount]);

Expand Down Expand Up @@ -317,7 +328,7 @@ export function AITalkAssistantPanel({roomUUID, roomToken, username, userLanguag

axios.post('/terraform/v1/ai-talk/stage/upload', {
room: roomUUID, roomToken, sid: stageUUID, rid: requestUUID, userId: userID,
umi: userMayInput, audio: audioBase64Data,
umi: userMayInput, audio: audioBase64Data, mergeMessages: 0,
}, {
headers: Token.loadBearerHeader(),
}).then(res => {
Expand Down Expand Up @@ -437,7 +448,7 @@ export function AITalkAssistantPanel({roomUUID, roomToken, username, userLanguag
await new Promise((resolve, reject) => {
axios.post('/terraform/v1/ai-talk/stage/upload', {
room: roomUUID, roomToken, sid: stageUUID, rid: requestUUID, userId: userID,
text: text,
text: text, mergeMessages: 0,
}, {
headers: Token.loadBearerHeader(),
}).then(res => {
Expand Down Expand Up @@ -526,12 +537,12 @@ export function AITalkAssistantPanel({roomUUID, roomToken, username, userLanguag
for (let i = 0; i < msgs.length; i++) {
const msg = msgs[i];
if (msg.role === 'user') {
traceLog(msg.username || 'You', msg.msg, 'primary', msg.sentence);
traceLog(msg.mid, msg.rid, msg.username || 'You', msg.msg, 'primary');
continue;
}

const audioSegmentUUID = msg.asid;
traceLog(msg.username || 'Bot', msg.msg, 'success', msg.sentence);
traceLog(msg.mid, msg.rid, msg.username || 'Bot', msg.msg, 'success');

// No audio file, skip it.
if (!msg.hasAudio) {
Expand Down Expand Up @@ -701,21 +712,32 @@ export function AITalkChatOnlyPanel({roomUUID, roomToken}) {
setErrorLogs(ref.current.errorLogs);
}, [setErrorLogs, ref]);

const traceLog = React.useCallback((role, msg, variant, ignoreMerge) => {
setTraceCount(++ref.current.traceCount);

// Merge to last log with the same role.
if (ref.current.traceLogs.length > 0 && !ignoreMerge) {
const last = ref.current.traceLogs[ref.current.traceLogs.length - 1];
if (last.role === role) {
last.msg = `${last.msg}${msg}`;
setTraceLogs([...ref.current.traceLogs]);
return;
const traceLog = React.useCallback((mid, rid, role, msg, variant) => {
// Find the last matched log with the same rid.
const lastMatched = ref.current.traceLogs.find((log) => log.rid === rid);

// Create a new message object.
const newMessage = {id: mid, rid, role, msg, variant};

if (!lastMatched) {
// Create a new log if not found.
ref.current.traceLogs = [...ref.current.traceLogs, {
rid, messages: [newMessage],
}];
} else {
// Find the last matched message with the same role.
const lastMessage = lastMatched.messages.find((m) => m.role === role);

if (!lastMessage) {
// Create a new message if not found.
lastMatched.messages = [...lastMatched.messages, newMessage];
} else {
// Merge to last log with the same role.
lastMessage.msg = `${lastMessage.msg}${msg}`;
}
}

const rid = `id-${Math.random().toString(16).slice(-4)}${new Date().getTime().toString(16).slice(-4)}`;
ref.current.traceLogs = [...ref.current.traceLogs, {id: rid, role, msg, variant}];
setTraceCount(++ref.current.traceCount);
setTraceLogs(ref.current.traceLogs);
}, [setTraceLogs, ref, setTraceCount]);

Expand Down Expand Up @@ -848,12 +870,12 @@ export function AITalkChatOnlyPanel({roomUUID, roomToken}) {
for (let i = 0; i < msgs.length; i++) {
const msg = msgs[i];
if (msg.role === 'user') {
traceLog(msg.username, msg.msg, 'primary', msg.sentence);
traceLog(msg.mid, msg.rid, msg.username, msg.msg, 'primary');
return;
}

const audioSegmentUUID = msg.asid;
traceLog(msg.username, msg.msg, 'success', msg.sentence);
traceLog(msg.mid, msg.rid, msg.username, msg.msg, 'success');

// Play the AI generated audio.
await new Promise(resolve => {
Expand Down Expand Up @@ -1197,11 +1219,13 @@ function AITalkTraceLogPC({traceLogs, traceCount, children, roomUUID, roomToken}
<div className='ai-talk-msgs-pc' ref={logPanelRef}>
{children}
{traceLogs.map((log) => {
return (
<Alert key={log.id} variant={log.variant} className='ai-talk-msgs-card'>
{log.role}: {log.msg}
</Alert>
);
return <>{log.messages.map((msg) => {
return (
<Alert key={msg.id} variant={msg.variant} className='ai-talk-msgs-card'>
{msg.role}: {msg.msg}
</Alert>
);
})}</>;
})}
</div>
</Card.Body>
Expand All @@ -1222,11 +1246,13 @@ function AITalkTraceLogMobile({traceLogs, traceCount}) {
return (
<div className='ai-talk-msgs-chat-mobile' ref={logPanelRef}>
{traceLogs.map((log) => {
return (
<Alert key={log.id} variant={log.variant} className='ai-talk-msgs-card'>
{log.role}: {log.msg}
</Alert>
);
return <>{log.messages.map((msg) => {
return (
<Alert key={msg.id} variant={msg.variant} className='ai-talk-msgs-card'>
{msg.role}: {msg.msg}
</Alert>
);
})}</>;
})}
</div>
);
Expand All @@ -1244,11 +1270,13 @@ function AITalkTraceLogChatOnly({traceLogs, traceCount}) {
return (
<div className='ai-talk-msgs-text-only' ref={logPanelRef}>
{traceLogs.map((log) => {
return (
<Alert key={log.id} variant={log.variant}>
{log.role}: {log.msg}
</Alert>
);
return <>{log.messages.map((msg) => {
return (
<Alert key={msg.id} variant={msg.variant}>
{msg.role}: {msg.msg}
</Alert>
);
})}</>;
})}
</div>
);
Expand Down

0 comments on commit 28d1fef

Please sign in to comment.