feat: 添加 TTS 语音朗读功能 (v0.2.0)
Some checks failed
Release / build (push) Failing after 6m27s

This commit is contained in:
2026-04-26 03:01:28 +08:00
parent 3f9443c14b
commit e4e5cd82c3
6 changed files with 167 additions and 9 deletions

View File

@@ -2,7 +2,19 @@
## 版本记录
### v0.1.0 (规划中)
### v0.2.0
- 新增 TTS 语音朗读功能
- 集成 mimo-tts client 功能,通过 TCP 连接本地 daemon
- 支持配置文件开关tts.enabled
- 支持命令行切换(/tts on/off/status
- 支持临时 TTS 前缀(`T 消息` 临时开启)
- 动态提示符显示 TTS 状态(👀 🔊)
- 静默失败处理(网络异常时警告日志)
---
### v0.1.0
- 创建 hxclaw 项目
- 实现流式输出功能

View File

@@ -12,6 +12,7 @@ type ProjectConfig struct {
Streaming StreamingConfig `yaml:"streaming"`
Markdown MarkdownConfig `yaml:"markdown"`
UI UIConfig `yaml:"ui"`
TTS TTSConfig `yaml:"tts"`
}
type StreamingConfig struct {
@@ -29,6 +30,12 @@ type UIConfig struct {
UserPrefix string `yaml:"user_prefix"`
}
type TTSConfig struct {
Enabled bool `yaml:"enabled"`
Port int `yaml:"port"`
Auto bool `yaml:"auto"`
}
var (
defaultCfg = ProjectConfig{
Streaming: StreamingConfig{
@@ -43,6 +50,11 @@ var (
Logo: "🦐",
UserPrefix: "👀 ",
},
TTS: TTSConfig{
Enabled: false,
Port: 9876,
Auto: true,
},
}
projCfg *ProjectConfig
projCfgLock sync.RWMutex
@@ -90,6 +102,9 @@ func LoadProjectConfig() error {
if cfg.UI.UserPrefix == "" {
cfg.UI.UserPrefix = defaultCfg.UI.UserPrefix
}
if cfg.TTS.Port <= 0 {
cfg.TTS.Port = defaultCfg.TTS.Port
}
projCfg = &cfg
return nil

View File

@@ -49,7 +49,8 @@ func GetConfigPath() string {
// Readline 实例包装
type Readline struct {
rl *readline.Instance
rl *readline.Instance
basePrompt string
}
// NewReadline 创建一个新的 Readline 实例
@@ -68,7 +69,18 @@ func NewReadline(prompt string) (*Readline, error) {
if err != nil {
return nil, err
}
return &Readline{rl: rl}, nil
return &Readline{rl: rl, basePrompt: prompt}, nil
}
// SetPrompt 更新提示符
func (r *Readline) SetPrompt(prompt string) {
r.basePrompt = prompt
r.rl.SetPrompt(prompt)
}
// GetBasePrompt 返回基础提示符
func (r *Readline) GetBasePrompt() string {
return r.basePrompt
}
// Readline 读取一行输入

View File

@@ -66,7 +66,8 @@ func main() {
}
func interactiveMode(agentLoop *agent.AgentLoop, sessionKey string) {
prompt := internal.GetProjectConfig().UI.UserPrefix
basePrompt := internal.GetProjectConfig().UI.UserPrefix
prompt := internal.GetTTSPrompt(basePrompt)
rl, err := internal.NewReadline(prompt)
if err != nil {
@@ -77,6 +78,11 @@ func interactiveMode(agentLoop *agent.AgentLoop, sessionKey string) {
}
defer rl.Close()
ttsCfg := internal.GetProjectConfig().TTS
if ttsCfg.Enabled {
internal.SetTTSEnabled(true)
}
for {
line, err := rl.Readline()
if err != nil {
@@ -98,14 +104,37 @@ func interactiveMode(agentLoop *agent.AgentLoop, sessionKey string) {
return
}
runWithStreaming(agentLoop, input, sessionKey)
isTempTTS := false
if len(input) > 0 && input[0] == 'T' && (len(input) == 1 || input[1] == ' ') {
input = strings.TrimPrefix(input, "T")
input = strings.TrimPrefix(input, " ")
isTempTTS = true
}
if strings.HasPrefix(input, "/tts") {
handleTTSCommand(input, rl, basePrompt)
continue
}
if isTempTTS {
enabled := internal.ToggleTTS()
if enabled {
rl.SetPrompt(internal.GetTTSPrompt(basePrompt))
}
}
runWithStreaming(agentLoop, input, sessionKey, isTempTTS)
}
}
func simpleInteractiveMode(agentLoop *agent.AgentLoop, sessionKey string) {
reader := internal.NewSimpleReader()
ttsCfg := internal.GetProjectConfig().TTS
if ttsCfg.Enabled {
internal.SetTTSEnabled(true)
}
for {
fmt.Print(internal.GetProjectConfig().UI.UserPrefix)
fmt.Print(internal.GetTTSPrompt(internal.GetProjectConfig().UI.UserPrefix))
line, err := reader.ReadString()
if err != nil {
if err == internal.ErrEOF {
@@ -126,12 +155,28 @@ func simpleInteractiveMode(agentLoop *agent.AgentLoop, sessionKey string) {
return
}
runWithStreaming(agentLoop, input, sessionKey)
isTempTTS := false
if len(input) > 0 && input[0] == 'T' && (len(input) == 1 || input[1] == ' ') {
input = strings.TrimPrefix(input, "T")
input = strings.TrimPrefix(input, " ")
isTempTTS = true
}
if strings.HasPrefix(input, "/tts") {
handleTTSCommandSimple(input)
continue
}
if isTempTTS {
internal.ToggleTTS()
}
runWithStreaming(agentLoop, input, sessionKey, isTempTTS)
}
}
// runWithStreaming 使用 ProcessDirect 处理请求,支持工具调用和结果显示
func runWithStreaming(agentLoop *agent.AgentLoop, input, sessionKey string) {
func runWithStreaming(agentLoop *agent.AgentLoop, input, sessionKey string, tempTTS bool) {
startTime := time.Now()
spinner := internal.NewSpinner("思考中...")
@@ -150,6 +195,11 @@ func runWithStreaming(agentLoop *agent.AgentLoop, input, sessionKey string) {
clearSpinnerLine()
outputLineByLine(rendered)
ttsCfg := internal.GetProjectConfig().TTS
if ttsCfg.Enabled || tempTTS || internal.IsTTSEnabled() {
go internal.SpeakText(resp)
}
elapsed := time.Since(startTime)
printElapsed(elapsed)
}
@@ -218,3 +268,66 @@ func formatDuration(s float64) string {
}
return fmt.Sprintf("%.1fs", s)
}
func handleTTSCommand(input string, rl *internal.Readline, basePrompt string) {
args := strings.Fields(input)
if len(args) == 1 {
enabled := internal.ToggleTTS()
rl.SetPrompt(internal.GetTTSPrompt(basePrompt))
status := "关闭"
if enabled {
status = "开启"
}
fmt.Printf("TTS 已%s\n", status)
return
}
switch args[1] {
case "on":
internal.SetTTSEnabled(true)
rl.SetPrompt(internal.GetTTSPrompt(basePrompt))
fmt.Println("TTS 已开启")
case "off":
internal.SetTTSEnabled(false)
rl.SetPrompt(internal.GetTTSPrompt(basePrompt))
fmt.Println("TTS 已关闭")
case "status":
status := "关闭"
if internal.IsTTSEnabled() {
status = "开启"
}
fmt.Printf("TTS 状态: %s\n", status)
default:
fmt.Println("用法: /tts [on|off|status]")
}
}
func handleTTSCommandSimple(input string) {
args := strings.Fields(input)
if len(args) == 1 {
internal.ToggleTTS()
status := "关闭"
if internal.IsTTSEnabled() {
status = "开启"
}
fmt.Printf("TTS 已%s\n", status)
return
}
switch args[1] {
case "on":
internal.SetTTSEnabled(true)
fmt.Println("TTS 已开启")
case "off":
internal.SetTTSEnabled(false)
fmt.Println("TTS 已关闭")
case "status":
status := "关闭"
if internal.IsTTSEnabled() {
status = "开启"
}
fmt.Printf("TTS 状态: %s\n", status)
default:
fmt.Println("用法: /tts [on|off|status]")
}
}

BIN
hxclaw

Binary file not shown.

View File

@@ -13,4 +13,10 @@ markdown:
# UI 配置
ui:
logo: "🦐"
user_prefix: "👀 "
user_prefix: "👀 "
# TTS 语音配置
tts:
enabled: false # 全局开关(默认关闭)
port: 9876 # mimo-tts daemon 端口
auto: true # AI 回复后自动朗读