Files
HxClaw/cmd/hxclaw/internal/tts.go

145 lines
2.8 KiB
Go
Raw Normal View History

package internal
import (
"bufio"
"context"
"encoding/json"
"fmt"
"net"
"os/exec"
"sync"
"time"
"github.com/sipeed/picoclaw/pkg/logger"
)
var (
ttsEnabled bool
ttsEnabledMu sync.RWMutex
)
type TTSRequest struct {
Text string `json:"text"`
Voice *string `json:"voice,omitempty"`
Format *string `json:"format,omitempty"`
Style *string `json:"style,omitempty"`
}
type TTSResponse struct {
Status string `json:"status"`
Message string `json:"message"`
}
func SetTTSEnabled(enabled bool) {
ttsEnabledMu.Lock()
defer ttsEnabledMu.Unlock()
ttsEnabled = enabled
}
func IsTTSEnabled() bool {
ttsEnabledMu.RLock()
defer ttsEnabledMu.RUnlock()
return ttsEnabled
}
func ToggleTTS() bool {
ttsEnabledMu.Lock()
defer ttsEnabledMu.Unlock()
ttsEnabled = !ttsEnabled
return ttsEnabled
}
func GetTTSPrompt(basePrompt string) string {
if IsTTSEnabled() {
return basePrompt + "🔊 "
}
return basePrompt
}
func SpeakText(text string) {
cfg := GetProjectConfig()
if !cfg.TTS.Enabled && !IsTTSEnabled() {
return
}
port := cfg.TTS.Port
if port <= 0 {
port = 9876
}
addr := fmt.Sprintf("127.0.0.1:%d", port)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
err := speakTextWithContext(ctx, text, addr)
if err != nil {
logger.WarnCF("tts", "网络语音暂时异常", map[string]any{
"error": err.Error(),
})
}
}
func speakTextWithContext(ctx context.Context, text, addr string) error {
conn, err := dialWithContext(ctx, addr)
if err != nil {
return fmt.Errorf("连接失败: %w", err)
}
defer conn.Close()
reader := bufio.NewReader(conn)
voiceStr := "mimo_default"
formatStr := "wav"
request := TTSRequest{
Text: text,
Voice: &voiceStr,
Format: &formatStr,
}
requestData, err := json.Marshal(request)
if err != nil {
return fmt.Errorf("序列化请求失败: %w", err)
}
_, err = conn.Write(append(requestData, '\n'))
if err != nil {
return fmt.Errorf("发送请求失败: %w", err)
}
responseLine, err := reader.ReadString('\n')
if err != nil {
return fmt.Errorf("读取响应失败: %w", err)
}
var response TTSResponse
if err := json.Unmarshal([]byte(responseLine), &response); err != nil {
return fmt.Errorf("解析响应失败: %w", err)
}
if response.Status != "ok" {
return fmt.Errorf("服务错误: %s", response.Message)
}
audioFile := response.Message
if audioFile == "" {
return fmt.Errorf("未收到音频文件路径")
}
if err := playAudio(audioFile); err != nil {
return fmt.Errorf("播放失败: %w", err)
}
return nil
}
func dialWithContext(ctx context.Context, addr string) (net.Conn, error) {
d := &net.Dialer{}
return d.DialContext(ctx, "tcp", addr)
}
func playAudio(filePath string) error {
cmd := exec.Command("afplay", filePath)
return cmd.Run()
}