mirror of
https://github.com/idootop/mi-gpt.git
synced 2025-04-07 22:49:21 +00:00
feat: support stream response
This commit is contained in:
parent
e948261e4e
commit
631876d2ac
|
@ -8,6 +8,7 @@ import {
|
|||
} from "mi-service-lite";
|
||||
import { sleep } from "../../utils/base";
|
||||
import { Http } from "../http";
|
||||
import { ResponseStream } from "./stream";
|
||||
|
||||
export type TTSProvider = "xiaoai" | "doubao";
|
||||
|
||||
|
@ -49,6 +50,7 @@ export class BaseSpeaker {
|
|||
}
|
||||
|
||||
async unWakeUp() {
|
||||
// ! FIXME 新版小爱音箱固件在关闭/打开麦克风时会有提示音
|
||||
await this.MiIOT!.setProperty(4, 1, true); // 关闭麦克风
|
||||
await this.MiIOT!.setProperty(4, 1, false); // 打开麦克风
|
||||
}
|
||||
|
@ -57,6 +59,7 @@ export class BaseSpeaker {
|
|||
async response(options: {
|
||||
tts?: TTSProvider;
|
||||
text?: string;
|
||||
stream?: ResponseStream;
|
||||
audio?: string;
|
||||
speaker?: string;
|
||||
keepAlive?: boolean;
|
||||
|
@ -65,16 +68,98 @@ export class BaseSpeaker {
|
|||
let {
|
||||
text,
|
||||
audio,
|
||||
stream,
|
||||
playSFX = true,
|
||||
keepAlive = false,
|
||||
tts = this.tts,
|
||||
} = options ?? {};
|
||||
|
||||
const ttsNotXiaoai = (!!stream || !!text) && !audio && tts !== "xiaoai";
|
||||
playSFX = ttsNotXiaoai && playSFX;
|
||||
|
||||
if (ttsNotXiaoai && !stream) {
|
||||
// 长文本 TTS 转化成 stream 分段模式
|
||||
stream = ResponseStream.createResponseStream(text!);
|
||||
}
|
||||
|
||||
let res;
|
||||
this.responding = true;
|
||||
// 开始响应
|
||||
if (stream) {
|
||||
let _response = "";
|
||||
while (true) {
|
||||
const { nextSentence, noMore } = stream.getNextResponse();
|
||||
if (nextSentence) {
|
||||
if (_response.length < 1) {
|
||||
// 播放开始提示音
|
||||
if (playSFX) {
|
||||
await this.MiNA!.play({ url: process.env.AUDIO_BEEP });
|
||||
}
|
||||
// 在播放 TTS 语音之前,先取消小爱音箱的唤醒状态,防止将 TTS 语音识别成用户指令
|
||||
if (ttsNotXiaoai) {
|
||||
await this.unWakeUp();
|
||||
}
|
||||
}
|
||||
res = await this._response({
|
||||
...options,
|
||||
text: nextSentence,
|
||||
playSFX: false,
|
||||
keepAlive: false,
|
||||
});
|
||||
if (res === "break") {
|
||||
// 终止回复
|
||||
stream.cancel();
|
||||
break;
|
||||
}
|
||||
_response += nextSentence;
|
||||
}
|
||||
if (noMore) {
|
||||
if (_response.length > 0) {
|
||||
// 播放结束提示音
|
||||
if (playSFX) {
|
||||
await this.MiNA!.play({ url: process.env.AUDIO_BEEP });
|
||||
}
|
||||
}
|
||||
// 保持唤醒状态
|
||||
if (keepAlive) {
|
||||
await this.wakeUp();
|
||||
}
|
||||
// 播放完毕
|
||||
break;
|
||||
}
|
||||
await sleep(this.interval);
|
||||
}
|
||||
} else {
|
||||
res = await this._response(options);
|
||||
}
|
||||
this.responding = false;
|
||||
return res;
|
||||
}
|
||||
|
||||
private async _response(options: {
|
||||
tts?: TTSProvider;
|
||||
text?: string;
|
||||
stream?: ResponseStream;
|
||||
audio?: string;
|
||||
speaker?: string;
|
||||
keepAlive?: boolean;
|
||||
playSFX?: boolean;
|
||||
}) {
|
||||
let {
|
||||
text,
|
||||
audio,
|
||||
stream,
|
||||
playSFX = true,
|
||||
keepAlive = false,
|
||||
tts = this.tts,
|
||||
speaker = this._defaultSpeaker,
|
||||
} = options ?? {};
|
||||
|
||||
const ttsNotXiaoai = !stream && !!text && !audio && tts !== "xiaoai";
|
||||
playSFX = ttsNotXiaoai && playSFX;
|
||||
|
||||
// 播放回复
|
||||
const play = async (args?: { tts?: string; url?: string }) => {
|
||||
const ttsNotXiaoai = !audio && tts !== "xiaoai";
|
||||
playSFX = ttsNotXiaoai && playSFX;
|
||||
// 播放开始提示音
|
||||
if (playSFX) {
|
||||
await this.MiNA!.play({ url: process.env.AUDIO_BEEP });
|
||||
|
@ -112,7 +197,6 @@ export class BaseSpeaker {
|
|||
|
||||
// 开始响应
|
||||
let res;
|
||||
this.responding = true;
|
||||
if (audio) {
|
||||
// 音频回复
|
||||
res = await play({ url: audio });
|
||||
|
@ -120,18 +204,18 @@ export class BaseSpeaker {
|
|||
// 文字回复
|
||||
switch (tts) {
|
||||
case "doubao":
|
||||
text = encodeURIComponent(text);
|
||||
const _text = encodeURIComponent(text);
|
||||
const doubaoTTS = process.env.TTS_DOUBAO;
|
||||
const url = `${doubaoTTS}?speaker=${speaker}&text=${text}`;
|
||||
const url = `${doubaoTTS}?speaker=${speaker}&text=${_text}`;
|
||||
res = await play({ url });
|
||||
break;
|
||||
case "xiaoai":
|
||||
default:
|
||||
res = await play({ tts: text });
|
||||
break;
|
||||
}
|
||||
this.responding = false;
|
||||
return res;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
private _doubaoSpeakers?: Speaker[];
|
||||
|
|
|
@ -266,7 +266,7 @@ export class Speaker extends BaseSpeaker {
|
|||
const ttsAnswer = e.answers.find((e) => e.type === "TTS") as any;
|
||||
return {
|
||||
text: e.query,
|
||||
answer: ttsAnswer?.tts?.text,
|
||||
answer: ttsAnswer?.tts?.text?.trim(),
|
||||
timestamp: e.time,
|
||||
};
|
||||
});
|
||||
|
|
190
src/services/speaker/stream.ts
Normal file
190
src/services/speaker/stream.ts
Normal file
|
@ -0,0 +1,190 @@
|
|||
type ResponseStatus = "idle" | "responding" | "finished" | "canceled";
|
||||
|
||||
interface ResponseStreamOptions {
|
||||
/**
|
||||
* 单次响应句子的最大长度
|
||||
*/
|
||||
maxSentenceLength?: number;
|
||||
/**
|
||||
* 首次响应句子的收集时长(单位:毫秒)
|
||||
*
|
||||
* 例子:100ms => 从收到第一条响应文本开始,聚合之后 100ms 内收到的文本,作为第一次 Response
|
||||
*
|
||||
* 默认值:200,(0 为立即响应)
|
||||
*/
|
||||
firstSubmitTimeout?: number;
|
||||
/**
|
||||
* 批量提交响应句子的收集时长(单位:毫秒)
|
||||
*
|
||||
* 例子:1000ms => 收集每隔 1s 内收到的文本,作为一次 Response
|
||||
*
|
||||
* 默认值:1s,(0 为立即提交)
|
||||
*/
|
||||
batchSubmitTimeout?: number;
|
||||
}
|
||||
|
||||
export class ResponseStream {
|
||||
// 将已有的大篇文字回复 chuck 成 stream 回复
|
||||
static createResponseStream(text: string, options?: ResponseStreamOptions) {
|
||||
const { maxSentenceLength = 100 } = options ?? {};
|
||||
if (text.length > maxSentenceLength) {
|
||||
const stream = new ResponseStream(options);
|
||||
stream.addResponse(text);
|
||||
stream.finish();
|
||||
return stream;
|
||||
}
|
||||
}
|
||||
|
||||
maxSentenceLength: number;
|
||||
firstSubmitTimeout: number;
|
||||
batchSubmitTimeout: number;
|
||||
constructor(options?: ResponseStreamOptions) {
|
||||
const {
|
||||
maxSentenceLength = 100,
|
||||
firstSubmitTimeout = 200,
|
||||
batchSubmitTimeout = 1000,
|
||||
} = options ?? {};
|
||||
this.maxSentenceLength = maxSentenceLength;
|
||||
this.firstSubmitTimeout = firstSubmitTimeout;
|
||||
this.batchSubmitTimeout = batchSubmitTimeout;
|
||||
}
|
||||
|
||||
status: ResponseStatus = "responding";
|
||||
|
||||
cancel() {
|
||||
if (["idle", "responding"].includes(this.status)) {
|
||||
this.status = "canceled";
|
||||
}
|
||||
return this.status === "canceled";
|
||||
}
|
||||
|
||||
addResponse(text: string) {
|
||||
if (this.status === "idle") {
|
||||
this.status = "responding";
|
||||
}
|
||||
if (this.status !== "responding") {
|
||||
return;
|
||||
}
|
||||
this._batchSubmit(text);
|
||||
}
|
||||
|
||||
private _nextChunkIdx = 0;
|
||||
getNextResponse() {
|
||||
const nextSentence = this._chunks[this._nextChunkIdx];
|
||||
if (nextSentence) {
|
||||
this._nextChunkIdx++;
|
||||
}
|
||||
const noMore =
|
||||
this._nextChunkIdx > this._chunks.length - 1 &&
|
||||
["finished", "canceled"].includes(this.status);
|
||||
return { nextSentence, noMore };
|
||||
}
|
||||
|
||||
finish() {
|
||||
if (["idle", "responding"].includes(this.status)) {
|
||||
if (this._tempText) {
|
||||
// 提交暂存的文本
|
||||
this._addResponse(this._tempText);
|
||||
this._tempText = "";
|
||||
}
|
||||
if (this._remainingText) {
|
||||
// 提交完整句子
|
||||
this._chunks.push(this._remainingText);
|
||||
this._remainingText = "";
|
||||
}
|
||||
this.status = "finished";
|
||||
}
|
||||
return this.status === "finished";
|
||||
}
|
||||
|
||||
private _chunks: string[] = [];
|
||||
private _tempText = "";
|
||||
private _remainingText: string = "";
|
||||
private _preSubmitTimestamp = 0;
|
||||
|
||||
/**
|
||||
* 批量收集/提交收到的文字响应
|
||||
*
|
||||
* 主要用途是使收到的 AI stream 回答的句子长度适中(不过长/短)。
|
||||
*/
|
||||
private _batchSubmit(text: string, immediately?: boolean) {
|
||||
this._tempText += text;
|
||||
const submitImmediately = () => {
|
||||
if (this._tempText) {
|
||||
this._addResponse(this._tempText);
|
||||
this._tempText = "";
|
||||
}
|
||||
this._preSubmitTimestamp = Date.now();
|
||||
};
|
||||
immediately =
|
||||
immediately ??
|
||||
(this.firstSubmitTimeout < 100 || this.batchSubmitTimeout < 100);
|
||||
if (immediately) {
|
||||
return submitImmediately();
|
||||
}
|
||||
const isFirstSubmit = this._preSubmitTimestamp === 0;
|
||||
const batchSubmit = (timeout: number) => {
|
||||
// 当消息长度积攒到一定长度,或达到一定时间间隔后,批量提交消息
|
||||
if (
|
||||
Date.now() - this._preSubmitTimestamp > timeout ||
|
||||
this._tempText.length > this.maxSentenceLength
|
||||
) {
|
||||
submitImmediately();
|
||||
}
|
||||
};
|
||||
const submit = (timeout: number) => {
|
||||
batchSubmit(timeout);
|
||||
setTimeout(() => {
|
||||
batchSubmit(timeout);
|
||||
}, timeout);
|
||||
};
|
||||
if (isFirstSubmit) {
|
||||
this._preSubmitTimestamp = Date.now();
|
||||
submit(this.firstSubmitTimeout);
|
||||
} else {
|
||||
submit(this.batchSubmitTimeout);
|
||||
}
|
||||
}
|
||||
|
||||
private _addResponse(text: string) {
|
||||
this._remainingText += text;
|
||||
while (this._remainingText.length > 0) {
|
||||
let lastCutIndex = this._findLastCutIndex(this._remainingText);
|
||||
if (lastCutIndex > 0) {
|
||||
const currentChunk = this._remainingText.substring(0, lastCutIndex);
|
||||
this._chunks.push(currentChunk);
|
||||
this._remainingText = this._remainingText.substring(lastCutIndex);
|
||||
} else {
|
||||
// 搜索不到
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private _findLastCutIndex(text: string): number {
|
||||
const punctuations = ",。?!:;……,.?!:;…";
|
||||
let lastCutIndex = -1;
|
||||
for (let i = 0; i < Math.min(text.length, this.maxSentenceLength); i++) {
|
||||
if (punctuations.includes(text[i])) {
|
||||
lastCutIndex = i + 1;
|
||||
}
|
||||
}
|
||||
return lastCutIndex;
|
||||
}
|
||||
}
|
||||
|
||||
const stream = new ResponseStream();
|
||||
|
||||
// ai onNewText
|
||||
// {
|
||||
// onNewText(text:string){
|
||||
// if(stream.status==='canceled'){
|
||||
// return 'canceled';
|
||||
// }
|
||||
// if(finished){
|
||||
// stream.finish()
|
||||
// }else{
|
||||
// stream.addResponse(text)
|
||||
// }
|
||||
// }
|
||||
// }
|
|
@ -3,12 +3,14 @@ import { println } from "../src/utils/base";
|
|||
import { kBannerASCII } from "../src/utils/string";
|
||||
import { runWithDB } from "../src/services/db";
|
||||
import { testDB } from "./db";
|
||||
import { testSpeaker } from "./speaker";
|
||||
|
||||
dotenv.config();
|
||||
|
||||
async function main() {
|
||||
println(kBannerASCII);
|
||||
testDB();
|
||||
// testDB();
|
||||
testSpeaker();
|
||||
}
|
||||
|
||||
runWithDB(main);
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import { AISpeaker } from "../src/services/speaker/ai";
|
||||
import { ResponseStream } from "../src/services/speaker/stream";
|
||||
import { sleep } from "../src/utils/base";
|
||||
|
||||
export async function main() {
|
||||
export async function testSpeaker() {
|
||||
const config: any = {
|
||||
userId: process.env.MI_USER!,
|
||||
password: process.env.MI_PASS!,
|
||||
|
@ -12,10 +13,11 @@ export async function main() {
|
|||
const speaker = new AISpeaker(config);
|
||||
await speaker.initMiServices();
|
||||
// await testSpeakerResponse(speaker);
|
||||
await testSpeakerStreamResponse(speaker);
|
||||
// await testSpeakerGetMessages(speaker);
|
||||
// await testSwitchSpeaker(speaker);
|
||||
// await testSpeakerUnWakeUp(speaker);
|
||||
await testAISpeaker(speaker);
|
||||
// await testAISpeaker(speaker);
|
||||
}
|
||||
|
||||
async function testAISpeaker(speaker: AISpeaker) {
|
||||
|
@ -51,8 +53,34 @@ async function testSpeakerGetMessages(speaker: AISpeaker) {
|
|||
async function testSpeakerResponse(speaker: AISpeaker) {
|
||||
let status = await speaker.MiNA!.getStatus();
|
||||
console.log("curent status", status);
|
||||
speaker.response({ text: "你好,我是豆包,很高兴认识你!" });
|
||||
await speaker.response({ text: "你好,我是豆包,很高兴认识你!" });
|
||||
sleep(1000);
|
||||
status = await speaker.MiNA!.getStatus();
|
||||
console.log("tts status", status);
|
||||
}
|
||||
|
||||
async function testSpeakerStreamResponse(speaker: AISpeaker) {
|
||||
const stream = new ResponseStream();
|
||||
const add = async (text: string) => {
|
||||
stream.addResponse(text);
|
||||
await sleep(100);
|
||||
};
|
||||
setTimeout(async () => {
|
||||
await add(`地球是圆的主要原因`);
|
||||
await add(`是由于地球的引力和自转。`);
|
||||
await add(`地球的引力使得地球在形成过程中变得更加圆滑,因为引力会使得地球`);
|
||||
await add(`的物质向地心靠拢,从而使得地球的形状更接近于一个球体。此外,`);
|
||||
await add(
|
||||
`地球的自转也会导致地球呈现出圆形,因为地球自转会使得地球的物质在赤道附近向外扩散,从而使得`
|
||||
);
|
||||
await add(
|
||||
`地球在赤道处稍微膨胀,而在极地处稍微收缩,最终形成一个近似于球体的形状。因此,地球是圆的`
|
||||
);
|
||||
await add(`主要原因是由于地球的引力和自转共同作用所致。`);
|
||||
await sleep(10 * 1000);
|
||||
console.log("finished!");
|
||||
stream.finish();
|
||||
});
|
||||
await speaker.response({ stream });
|
||||
console.log("hello!");
|
||||
}
|
||||
|
|
|
@ -874,8 +874,10 @@ merge2@^1.3.0, merge2@^1.4.1:
|
|||
resolved "https://registry.yarnpkg.com/merge2/-/merge2-1.4.1.tgz#4368892f885e907455a6fd7dc55c0c9d404990ae"
|
||||
integrity sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==
|
||||
|
||||
"mi-service-lite@file:../mi-service-lite":
|
||||
mi-service-lite@^2.0.0:
|
||||
version "2.0.0"
|
||||
resolved "https://registry.yarnpkg.com/mi-service-lite/-/mi-service-lite-2.0.0.tgz#c043a931574011c154a3113ecabe4fc2a61b328a"
|
||||
integrity sha512-PqMWtvEHQ7a6mhKee9RAnT6Xh+rqf+RvhlCki/8VsSTnjREAzl/kxZh3U0ogFhN5iQzwlK4YC8Is0rnSljl2og==
|
||||
dependencies:
|
||||
axios "^1.6.5"
|
||||
pako "^2.1.0"
|
||||
|
|
Loading…
Reference in New Issue
Block a user