fix: 修复流式播放无声音问题(SSE 行缓冲 + 解析层级)
This commit is contained in:
153
src/main.rs
153
src/main.rs
@@ -150,13 +150,15 @@ async fn run(cli: Cli) -> Result<()> {
|
||||
// 守护进程模式(由 daemon start -d 自动调用)
|
||||
daemon::start_daemon(port).await
|
||||
}
|
||||
Some(Commands::Send { text, voice, format, style, port }) => {
|
||||
Some(Commands::Send { text, voice, format, style, stream, port }) => {
|
||||
// 发送文本到守护进程
|
||||
let stream_opt = if stream { Some(true) } else { None };
|
||||
client::send_to_daemon(
|
||||
&text,
|
||||
voice.as_deref(),
|
||||
format.as_deref(),
|
||||
style.as_deref(),
|
||||
stream_opt,
|
||||
port,
|
||||
)
|
||||
.await
|
||||
@@ -178,7 +180,21 @@ async fn run(cli: Cli) -> Result<()> {
|
||||
));
|
||||
}
|
||||
|
||||
// 执行语音合成
|
||||
// 流式播放走独立路径:边下边播,不等待全量下载
|
||||
if cli.stream && cli.play {
|
||||
ui::show_playback_start();
|
||||
handle_stream_play(
|
||||
cli.text,
|
||||
cli.file,
|
||||
&cli.voice,
|
||||
cli.style.as_deref(),
|
||||
)
|
||||
.await?;
|
||||
ui::show_playback_complete();
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// 执行语音合成(非流式播放场景:全量下载后输出/播放)
|
||||
let audio_data = synthesize(
|
||||
cli.text,
|
||||
cli.file,
|
||||
@@ -191,15 +207,7 @@ async fn run(cli: Cli) -> Result<()> {
|
||||
|
||||
// 根据参数决定处理方式
|
||||
if cli.play {
|
||||
// 播放音频(流式数据需要封装成 WAV 格式)
|
||||
ui::show_playback_start();
|
||||
if cli.stream {
|
||||
// 流式返回的是 PCM16 原始数据,需要添加 WAV 头
|
||||
let wav_data = pcm16_to_wav(&audio_data);
|
||||
play_audio(&wav_data)?;
|
||||
} else {
|
||||
play_audio(&audio_data)?;
|
||||
}
|
||||
play_audio(&audio_data)?;
|
||||
ui::show_playback_complete();
|
||||
} else if let Some(output_path) = cli.output {
|
||||
// 保存到文件
|
||||
@@ -488,6 +496,128 @@ fn play_audio(data: &[u8]) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 流式播放:边下载 PCM16 chunk 边播放
|
||||
///
|
||||
/// 先缓冲约 1 秒数据再开始播放,抗网络抖动
|
||||
/// 每收到一块就解码为 i16 采样,追加到 rodio Sink 队列
|
||||
async fn handle_stream_play(
|
||||
text: Option<String>,
|
||||
file: Option<std::path::PathBuf>,
|
||||
voice: &str,
|
||||
style: Option<&str>,
|
||||
) -> Result<()> {
|
||||
let content = if let Some(t) = text {
|
||||
tone::apply_tone(&t)
|
||||
} else if let Some(f) = file {
|
||||
let mut file = fs::File::open(&f)
|
||||
.with_context(|| format!("无法打开文件: {:?}", f))?;
|
||||
let mut content = String::new();
|
||||
file.read_to_string(&mut content)
|
||||
.with_context(|| format!("无法读取文件: {:?}", f))?;
|
||||
tone::apply_tone(&content)
|
||||
} else {
|
||||
return Err(anyhow::anyhow!("没有提供文本内容"));
|
||||
};
|
||||
|
||||
let validated_voice = validate_voice(voice);
|
||||
|
||||
let config_manager = ConfigManager::new()
|
||||
.context("无法加载配置")?;
|
||||
let config = config_manager.get_config();
|
||||
|
||||
if config.api_key.is_empty() {
|
||||
return Err(anyhow::anyhow!(
|
||||
"API Key 未设置\n请使用: mimo-tts config set --api-key <YOUR_API_KEY>"
|
||||
));
|
||||
}
|
||||
|
||||
let client = api::TtsClient::builder()
|
||||
.base_url(config.base_url.clone())
|
||||
.api_key(config.api_key.clone())
|
||||
.build()
|
||||
.context("无法创建 TTS 客户端")?;
|
||||
|
||||
let mut builder = api::TtsRequest::builder()
|
||||
.audio(api::AudioConfig {
|
||||
format: "pcm16".to_string(),
|
||||
voice: validated_voice,
|
||||
});
|
||||
|
||||
if let Some(s) = style {
|
||||
builder = builder.add_message(api::Message {
|
||||
role: "user".to_string(),
|
||||
content: s.to_string(),
|
||||
});
|
||||
}
|
||||
builder = builder.add_message(api::Message {
|
||||
role: "assistant".to_string(),
|
||||
content,
|
||||
});
|
||||
builder = builder.stream(true);
|
||||
|
||||
let request = builder.build();
|
||||
|
||||
let (_stream, stream_handle) = rodio::OutputStream::try_default()
|
||||
.context("无法创建音频输出流")?;
|
||||
let sink = std::sync::Arc::new(rodio::Sink::try_new(&stream_handle)
|
||||
.context("无法创建音频播放器")?);
|
||||
|
||||
let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::<Vec<u8>>();
|
||||
|
||||
// 接收任务:先缓冲 ~1 秒 PCM16 再开始播放,后续逐块追加
|
||||
let play_sink = sink.clone();
|
||||
let play_handle = tokio::spawn(async move {
|
||||
let mut buffer = Vec::new();
|
||||
// 24000Hz * 16bit * 1ch = 48000 字节/秒
|
||||
let threshold = 48000;
|
||||
let mut started = false;
|
||||
|
||||
while let Some(chunk) = rx.recv().await {
|
||||
if !started {
|
||||
buffer.extend_from_slice(&chunk);
|
||||
if buffer.len() >= threshold {
|
||||
let samples: Vec<i16> = buffer.chunks(2)
|
||||
.filter(|c| c.len() == 2)
|
||||
.map(|c| i16::from_le_bytes([c[0], c[1]]))
|
||||
.collect();
|
||||
if !samples.is_empty() {
|
||||
play_sink.append(rodio::buffer::SamplesBuffer::new(1, 24000, samples));
|
||||
}
|
||||
buffer.clear();
|
||||
started = true;
|
||||
}
|
||||
} else {
|
||||
let samples: Vec<i16> = chunk.chunks(2)
|
||||
.filter(|c| c.len() == 2)
|
||||
.map(|c| i16::from_le_bytes([c[0], c[1]]))
|
||||
.collect();
|
||||
if !samples.is_empty() {
|
||||
play_sink.append(rodio::buffer::SamplesBuffer::new(1, 24000, samples));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 刷新剩余缓冲(文本较短下次未达到阈值)
|
||||
if !buffer.is_empty() {
|
||||
let samples: Vec<i16> = buffer.chunks(2)
|
||||
.filter(|c| c.len() == 2)
|
||||
.map(|c| i16::from_le_bytes([c[0], c[1]]))
|
||||
.collect();
|
||||
if !samples.is_empty() {
|
||||
play_sink.append(rodio::buffer::SamplesBuffer::new(1, 24000, samples));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
client.synthesize_stream_to_channel(&request, tx).await?;
|
||||
|
||||
play_handle.await.context("流式播放任务异常")?;
|
||||
|
||||
sink.sleep_until_end();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 将 PCM16 原始数据转换为 WAV 格式
|
||||
///
|
||||
/// # 参数
|
||||
@@ -495,6 +625,7 @@ fn play_audio(data: &[u8]) -> Result<()> {
|
||||
///
|
||||
/// # 返回
|
||||
/// 完整的 WAV 格式数据(包含 44 字节头部)
|
||||
#[allow(dead_code)]
|
||||
fn pcm16_to_wav(pcm_data: &[u8]) -> Vec<u8> {
|
||||
let sample_rate: u32 = 24000; // Mimo-TTS PCM16 输出通常是 24kHz
|
||||
let bits_per_sample: u16 = 16;
|
||||
|
||||
Reference in New Issue
Block a user