Files
haibao-tts-cli/src/main.rs

662 lines
20 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
mod cli;
mod config;
mod api;
mod ui;
mod tone;
mod daemon;
mod client;
/// 启动守护进程(后台运行)
///
/// 通过启动新进程来实现后台运行
fn spawn_daemon_process(port: u16) -> Result<()> {
// 获取当前可执行文件路径
let exe_path = std::env::current_exe()
.context("无法获取当前可执行文件路径")?;
// 启动新进程,执行 ttsd 命令
// 使用 nohup 实现后台运行Unix
#[cfg(unix)]
{
std::process::Command::new("nohup")
.arg(&exe_path)
.arg("ttsd")
.arg("--port")
.arg(port.to_string())
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.stdin(std::process::Stdio::null())
.spawn()
.context("无法启动守护进程")?;
}
#[cfg(windows)]
{
std::process::Command::new("cmd")
.args(["/C", "start", "", &exe_path.to_string_lossy(), "ttsd", "--port", &port.to_string()])
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.stdin(std::process::Stdio::null())
.spawn()
.context("无法启动守护进程")?;
}
Ok(())
}
use anyhow::{Context, Result};
use clap::Parser;
use cli::{Cli, Commands, ConfigAction};
use config::ConfigManager;
use rodio;
use std::fs;
use std::io::Read;
use std::process;
/// 程序退出码定义
///
/// 遵循 agents.md 中定义的退出码规范
#[derive(Debug)]
enum ExitCode {
Success = 0,
ArgumentError = 1,
ConfigError = 2,
ApiError = 3,
FileError = 4,
}
impl From<ExitCode> for i32 {
fn from(code: ExitCode) -> Self {
code as i32
}
}
/// 主函数
///
/// 使用 tokio 运行时处理异步 API 调用
#[tokio::main]
async fn main() {
// 解析命令行参数
let cli = Cli::parse();
// 执行程序逻辑,如果出错则处理错误并返回对应退出码
let exit_code = match run(cli).await {
Ok(_) => ExitCode::Success,
Err(e) => {
// 根据错误类型返回对应的退出码
eprintln!("错误: {:#}", e);
// 简化错误处理,根据错误信息判断类型
let error_msg = e.to_string();
if error_msg.contains("API") || error_msg.contains("请求") {
ExitCode::ApiError
} else if error_msg.contains("配置") {
ExitCode::ConfigError
} else if error_msg.contains("文件") || error_msg.contains("读取") || error_msg.contains("写入") {
ExitCode::FileError
} else {
ExitCode::ArgumentError
}
}
};
process::exit(exit_code.into());
}
/// 程序主逻辑
async fn run(cli: Cli) -> Result<()> {
match cli.command {
// 处理子命令
Some(Commands::Voices) => {
list_voices();
Ok(())
}
Some(Commands::ShowConfig) => {
show_config()
}
Some(Commands::Config { action }) => {
handle_config_command(action)
}
Some(Commands::Onboard) => {
// 引导式配置初始化
onboard().await
}
Some(Commands::Daemon(daemon_cmd)) => {
// 处理守护进程命令
match daemon_cmd.action {
cli::DaemonAction::Start { port, daemonize } => {
if daemonize {
// 后台运行:启动新进程执行 tt sd 命令
spawn_daemon_process(port)?;
println!("守护进程已在后台启动");
Ok(())
} else {
// 前台运行
daemon::start_daemon(port).await
}
}
cli::DaemonAction::Stop => {
daemon::stop_daemon()
}
cli::DaemonAction::Status => {
daemon::show_status()
}
cli::DaemonAction::Logs { lines } => {
daemon::show_logs(lines)
}
}
}
Some(Commands::DaemonMode { port }) => {
// 守护进程模式(由 daemon start -d 自动调用)
daemon::start_daemon(port).await
}
Some(Commands::Send { text, voice, format, style, stream, port }) => {
// 发送文本到守护进程
let stream_opt = if stream { Some(true) } else { None };
client::send_to_daemon(
&text,
voice.as_deref(),
format.as_deref(),
style.as_deref(),
stream_opt,
port,
)
.await
.map(|msg| {
println!("{}", msg);
})
}
// 没有子命令时,执行语音合成
None => {
// 检查参数组合
if cli.play && cli.output.is_some() {
return Err(anyhow::anyhow!("--play 和 --output 不能同时使用"));
}
// 检查是否有输入text 或 file
if cli.text.is_none() && cli.file.is_none() {
return Err(anyhow::anyhow!(
"必须提供 --text 或 --file 参数\n使用 --help 查看帮助信息"
));
}
// 流式播放走独立路径:边下边播,不等待全量下载
if cli.stream && cli.play {
ui::show_playback_start();
handle_stream_play(
cli.text,
cli.file,
&cli.voice,
cli.style.as_deref(),
)
.await?;
ui::show_playback_complete();
return Ok(());
}
// 执行语音合成(非流式播放场景:全量下载后输出/播放)
let audio_data = synthesize(
cli.text,
cli.file,
&cli.voice,
&cli.format,
cli.style.as_deref(),
cli.stream,
)
.await?;
// 根据参数决定处理方式
if cli.play {
play_audio(&audio_data)?;
ui::show_playback_complete();
} else if let Some(output_path) = cli.output {
// 保存到文件
fs::write(&output_path, &audio_data)
.with_context(|| format!("无法写入文件: {:?}", output_path))?;
ui::show_save_complete(&output_path.to_string_lossy());
} else {
// 输出到 stdout二进制流
let stdout = std::io::stdout();
let mut handle = stdout.lock();
use std::io::Write;
handle.write_all(&audio_data)
.context("无法写入标准输出")?;
handle.flush()
.context("无法刷新标准输出")?;
}
Ok(())
}
}
}
/// 列出所有可用的音色
///
/// 显示详细的音色信息,包括 Voice ID、语言、性别
fn list_voices() {
ui::show_voices();
}
/// 合法的音色列表mimo-v2.5-tts 支持)
const VALID_VOICES: &[&str] = &[
"mimo_default",
"冰糖",
"茉莉",
"苏打",
"白桦",
"Mia",
"Chloe",
"Milo",
"Dean",
];
/// 验证音色是否合法
///
/// 如果音色不在合法列表中,输出警告并使用默认音色 mimo_default
fn validate_voice(voice: &str) -> String {
if VALID_VOICES.contains(&voice) {
voice.to_string()
} else {
eprintln!("警告:无效音色 '{}',使用默认音色 'mimo_default'", voice);
"mimo_default".to_string()
}
}
/// 显示当前配置
fn show_config() -> Result<()> {
let config_manager = ConfigManager::new()
.context("无法加载配置")?;
let config = config_manager.get_config();
ui::show_config(
&config.api_key,
&config.default_voice,
&config_manager.get_config_path().to_string_lossy(),
);
Ok(())
}
/// 处理配置相关子命令
fn handle_config_command(action: ConfigAction) -> Result<()> {
match action {
ConfigAction::Set { api_key, voice, .. } => {
let mut config_manager = ConfigManager::new()
.context("无法加载配置")?;
if let Some(key) = api_key {
config_manager.set_api_key(key);
ui::show_success("API Key 已更新");
}
if let Some(v) = voice {
config_manager.set_default_voice(v);
ui::show_success("默认音色已更新");
}
config_manager.save()
.context("无法保存配置")?;
ui::show_info("📁 配置已保存到:", &config_manager.get_config_path().to_string_lossy());
}
ConfigAction::Show => {
show_config()?;
}
ConfigAction::Init => {
// 交互式初始化
ui::show_info("初始化配置...", "");
let config_manager = ConfigManager::new()
.context("无法创建配置")?;
ui::show_info("请使用以下命令设置 API Key:", "");
println!(" mimo-tts config set --api-key <YOUR_API_KEY>");
ui::show_info("配置文件将保存在:", &config_manager.get_config_path().to_string_lossy());
}
}
Ok(())
}
/// 引导式配置初始化
///
/// 交互式引导用户完成配置设置
async fn onboard() -> Result<()> {
let config_manager = ConfigManager::new()
.context("无法创建配置管理器")?;
let current_config = config_manager.get_config();
// 使用 UI 模块显示交互式表单
let result = ui::show_onboard_form(
&current_config.api_key,
&current_config.default_voice,
);
let (api_key, default_voice) = result
.map_err(|e| anyhow::anyhow!("表单输入错误: {}", e))?;
// 保存配置
let mut config_manager = ConfigManager::new()
.context("无法创建配置管理器")?;
if !api_key.is_empty() {
config_manager.set_api_key(api_key);
}
if !default_voice.is_empty() {
config_manager.set_default_voice(default_voice);
}
config_manager.save()
.context("无法保存配置")?;
ui::show_info("📁 配置已保存到:", &config_manager.get_config_path().to_string_lossy());
Ok(())
}
/// 执行语音合成
///
/// # 参数
/// - text: 直接提供的文本(可选)
/// - file: 文本文件路径(可选)
/// - voice: 音色名称
/// - format: 音频格式
/// - style: 风格描述(可选,会放在 user 消息中)
/// - stream: 是否使用流式输出
///
/// # 返回
/// 返回合成的音频数据WAV 或 PCM16 格式)
async fn synthesize(
text: Option<String>,
file: Option<std::path::PathBuf>,
voice: &str,
format: &str,
style: Option<&str>,
stream: bool,
) -> Result<Vec<u8>> {
// 获取要合成的文本
let content = if let Some(t) = text {
tone::apply_tone(&t)
} else if let Some(f) = file {
// 从文件读取文本
let mut file = fs::File::open(&f)
.with_context(|| format!("无法打开文件: {:?}", f))?;
let mut content = String::new();
file.read_to_string(&mut content)
.with_context(|| format!("无法读取文件: {:?}", f))?;
tone::apply_tone(&content)
} else {
return Err(anyhow::anyhow!("没有提供文本内容"));
};
// 验证音色是否合法,不合法则使用默认值
let validated_voice = validate_voice(voice);
// 加载配置
let config_manager = ConfigManager::new()
.context("无法加载配置")?;
let config = config_manager.get_config();
// 检查 API Key 是否设置
if config.api_key.is_empty() {
return Err(anyhow::anyhow!(
"API Key 未设置\n请使用: mimo-tts config set --api-key <YOUR_API_KEY>"
));
}
// 创建 TTS 客户端
let client = api::TtsClient::builder()
.base_url(config.base_url.clone())
.api_key(config.api_key.clone())
.build()
.context("无法创建 TTS 客户端")?;
// 流式输出时自动使用 pcm16 格式
let actual_format = if stream { "pcm16" } else { format };
// 构建请求(如果指定了风格,添加到 user 消息)
let mut builder = api::TtsRequest::builder()
.audio(api::AudioConfig {
format: actual_format.to_string(),
voice: validated_voice,
});
// 添加消息:如果指定了风格,先添加 user 消息描述风格
if let Some(s) = style {
builder = builder.add_message(api::Message {
role: "user".to_string(),
content: s.to_string(),
});
}
// 添加 assistant 消息(实际要合成的文本)
builder = builder.add_message(api::Message {
role: "assistant".to_string(),
content: content.clone(),
});
let request = builder.build();
// 调用 API 合成语音
let audio_data = if stream {
// 流式请求已在 api.rs 中处理
client
.synthesize_with_request(&request)
.await
.context("流式语音合成失败")?
} else {
client
.synthesize_with_request(&request)
.await
.context("语音合成失败")?
};
Ok(audio_data)
}
/// 单元测试模块
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_exit_code() {
assert_eq!(i32::from(ExitCode::Success), 0);
assert_eq!(i32::from(ExitCode::ArgumentError), 1);
assert_eq!(i32::from(ExitCode::ConfigError), 2);
assert_eq!(i32::from(ExitCode::ApiError), 3);
assert_eq!(i32::from(ExitCode::FileError), 4);
}
}
/// 播放音频数据
///
/// 使用 rodio 直接从内存播放 WAV 音频
/// # 参数
/// - data: WAV 格式的音频数据
fn play_audio(data: &[u8]) -> Result<()> {
// 创建 rodio 音频输出流
let (_stream, stream_handle) = rodio::OutputStream::try_default()
.context("无法创建音频输出流")?;
// 从内存数据创建音频源
let cursor = std::io::Cursor::new(data.to_vec());
let source = rodio::Decoder::new(cursor)
.context("无法解码音频数据")?;
// 创建播放器并播放(单次播放,不循环)
let sink = rodio::Sink::try_new(&stream_handle)
.context("无法创建音频播放器")?;
sink.append(source);
// 等待播放完成
sink.sleep_until_end();
Ok(())
}
/// 流式播放:边下载 PCM16 chunk 边播放
///
/// 先缓冲约 1 秒数据再开始播放,抗网络抖动
/// 每收到一块就解码为 i16 采样,追加到 rodio Sink 队列
async fn handle_stream_play(
text: Option<String>,
file: Option<std::path::PathBuf>,
voice: &str,
style: Option<&str>,
) -> Result<()> {
let content = if let Some(t) = text {
tone::apply_tone(&t)
} else if let Some(f) = file {
let mut file = fs::File::open(&f)
.with_context(|| format!("无法打开文件: {:?}", f))?;
let mut content = String::new();
file.read_to_string(&mut content)
.with_context(|| format!("无法读取文件: {:?}", f))?;
tone::apply_tone(&content)
} else {
return Err(anyhow::anyhow!("没有提供文本内容"));
};
let validated_voice = validate_voice(voice);
let config_manager = ConfigManager::new()
.context("无法加载配置")?;
let config = config_manager.get_config();
if config.api_key.is_empty() {
return Err(anyhow::anyhow!(
"API Key 未设置\n请使用: mimo-tts config set --api-key <YOUR_API_KEY>"
));
}
let client = api::TtsClient::builder()
.base_url(config.base_url.clone())
.api_key(config.api_key.clone())
.build()
.context("无法创建 TTS 客户端")?;
let mut builder = api::TtsRequest::builder()
.audio(api::AudioConfig {
format: "pcm16".to_string(),
voice: validated_voice,
});
if let Some(s) = style {
builder = builder.add_message(api::Message {
role: "user".to_string(),
content: s.to_string(),
});
}
builder = builder.add_message(api::Message {
role: "assistant".to_string(),
content,
});
builder = builder.stream(true);
let request = builder.build();
let (_stream, stream_handle) = rodio::OutputStream::try_default()
.context("无法创建音频输出流")?;
let sink = std::sync::Arc::new(rodio::Sink::try_new(&stream_handle)
.context("无法创建音频播放器")?);
let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::<Vec<u8>>();
// 接收任务:先缓冲 ~1 秒 PCM16 再开始播放,后续逐块追加
let play_sink = sink.clone();
let play_handle = tokio::spawn(async move {
let mut buffer = Vec::new();
// 24000Hz * 16bit * 1ch = 48000 字节/秒
let threshold = 48000;
let mut started = false;
while let Some(chunk) = rx.recv().await {
if !started {
buffer.extend_from_slice(&chunk);
if buffer.len() >= threshold {
let samples: Vec<i16> = buffer.chunks(2)
.filter(|c| c.len() == 2)
.map(|c| i16::from_le_bytes([c[0], c[1]]))
.collect();
if !samples.is_empty() {
play_sink.append(rodio::buffer::SamplesBuffer::new(1, 24000, samples));
}
buffer.clear();
started = true;
}
} else {
let samples: Vec<i16> = chunk.chunks(2)
.filter(|c| c.len() == 2)
.map(|c| i16::from_le_bytes([c[0], c[1]]))
.collect();
if !samples.is_empty() {
play_sink.append(rodio::buffer::SamplesBuffer::new(1, 24000, samples));
}
}
}
// 刷新剩余缓冲(文本较短下次未达到阈值)
if !buffer.is_empty() {
let samples: Vec<i16> = buffer.chunks(2)
.filter(|c| c.len() == 2)
.map(|c| i16::from_le_bytes([c[0], c[1]]))
.collect();
if !samples.is_empty() {
play_sink.append(rodio::buffer::SamplesBuffer::new(1, 24000, samples));
}
}
});
client.synthesize_stream_to_channel(&request, tx).await?;
play_handle.await.context("流式播放任务异常")?;
sink.sleep_until_end();
Ok(())
}
/// 将 PCM16 原始数据转换为 WAV 格式
///
/// # 参数
/// - pcm_data: PCM16 原始音频数据16bit, 单声道, 24000Hz
///
/// # 返回
/// 完整的 WAV 格式数据(包含 44 字节头部)
#[allow(dead_code)]
fn pcm16_to_wav(pcm_data: &[u8]) -> Vec<u8> {
let sample_rate: u32 = 24000; // Mimo-TTS PCM16 输出通常是 24kHz
let bits_per_sample: u16 = 16;
let channels: u16 = 1;
let byte_rate = sample_rate * channels as u32 * bits_per_sample as u32 / 8;
let block_align = channels * bits_per_sample / 8;
let data_size = pcm_data.len() as u32;
let file_size = 36 + data_size;
let mut wav = Vec::with_capacity(44 + pcm_data.len());
// RIFF 头
wav.extend_from_slice(b"RIFF");
wav.extend_from_slice(&file_size.to_le_bytes());
wav.extend_from_slice(b"WAVE");
// fmt 子块
wav.extend_from_slice(b"fmt ");
wav.extend_from_slice(&16u32.to_le_bytes()); // PCM 格式大小
wav.extend_from_slice(&1u16.to_le_bytes()); // PCM 格式
wav.extend_from_slice(&channels.to_le_bytes());
wav.extend_from_slice(&sample_rate.to_le_bytes());
wav.extend_from_slice(&byte_rate.to_le_bytes());
wav.extend_from_slice(&block_align.to_le_bytes());
wav.extend_from_slice(&bits_per_sample.to_le_bytes());
// data 子块
wav.extend_from_slice(b"data");
wav.extend_from_slice(&data_size.to_le_bytes());
wav.extend_from_slice(pcm_data);
wav
}