145 lines
2.8 KiB
Go
145 lines
2.8 KiB
Go
|
|
package internal
|
||
|
|
|
||
|
|
import (
|
||
|
|
"bufio"
|
||
|
|
"context"
|
||
|
|
"encoding/json"
|
||
|
|
"fmt"
|
||
|
|
"net"
|
||
|
|
"os/exec"
|
||
|
|
"sync"
|
||
|
|
"time"
|
||
|
|
|
||
|
|
"github.com/sipeed/picoclaw/pkg/logger"
|
||
|
|
)
|
||
|
|
|
||
|
|
var (
|
||
|
|
ttsEnabled bool
|
||
|
|
ttsEnabledMu sync.RWMutex
|
||
|
|
)
|
||
|
|
|
||
|
|
type TTSRequest struct {
|
||
|
|
Text string `json:"text"`
|
||
|
|
Voice *string `json:"voice,omitempty"`
|
||
|
|
Format *string `json:"format,omitempty"`
|
||
|
|
Style *string `json:"style,omitempty"`
|
||
|
|
}
|
||
|
|
|
||
|
|
type TTSResponse struct {
|
||
|
|
Status string `json:"status"`
|
||
|
|
Message string `json:"message"`
|
||
|
|
}
|
||
|
|
|
||
|
|
func SetTTSEnabled(enabled bool) {
|
||
|
|
ttsEnabledMu.Lock()
|
||
|
|
defer ttsEnabledMu.Unlock()
|
||
|
|
ttsEnabled = enabled
|
||
|
|
}
|
||
|
|
|
||
|
|
func IsTTSEnabled() bool {
|
||
|
|
ttsEnabledMu.RLock()
|
||
|
|
defer ttsEnabledMu.RUnlock()
|
||
|
|
return ttsEnabled
|
||
|
|
}
|
||
|
|
|
||
|
|
func ToggleTTS() bool {
|
||
|
|
ttsEnabledMu.Lock()
|
||
|
|
defer ttsEnabledMu.Unlock()
|
||
|
|
ttsEnabled = !ttsEnabled
|
||
|
|
return ttsEnabled
|
||
|
|
}
|
||
|
|
|
||
|
|
func GetTTSPrompt(basePrompt string) string {
|
||
|
|
if IsTTSEnabled() {
|
||
|
|
return basePrompt + "🔊 "
|
||
|
|
}
|
||
|
|
return basePrompt
|
||
|
|
}
|
||
|
|
|
||
|
|
func SpeakText(text string) {
|
||
|
|
cfg := GetProjectConfig()
|
||
|
|
if !cfg.TTS.Enabled && !IsTTSEnabled() {
|
||
|
|
return
|
||
|
|
}
|
||
|
|
|
||
|
|
port := cfg.TTS.Port
|
||
|
|
if port <= 0 {
|
||
|
|
port = 9876
|
||
|
|
}
|
||
|
|
|
||
|
|
addr := fmt.Sprintf("127.0.0.1:%d", port)
|
||
|
|
|
||
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||
|
|
defer cancel()
|
||
|
|
|
||
|
|
err := speakTextWithContext(ctx, text, addr)
|
||
|
|
if err != nil {
|
||
|
|
logger.WarnCF("tts", "网络语音暂时异常", map[string]any{
|
||
|
|
"error": err.Error(),
|
||
|
|
})
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func speakTextWithContext(ctx context.Context, text, addr string) error {
|
||
|
|
conn, err := dialWithContext(ctx, addr)
|
||
|
|
if err != nil {
|
||
|
|
return fmt.Errorf("连接失败: %w", err)
|
||
|
|
}
|
||
|
|
defer conn.Close()
|
||
|
|
|
||
|
|
reader := bufio.NewReader(conn)
|
||
|
|
|
||
|
|
voiceStr := "mimo_default"
|
||
|
|
formatStr := "wav"
|
||
|
|
|
||
|
|
request := TTSRequest{
|
||
|
|
Text: text,
|
||
|
|
Voice: &voiceStr,
|
||
|
|
Format: &formatStr,
|
||
|
|
}
|
||
|
|
|
||
|
|
requestData, err := json.Marshal(request)
|
||
|
|
if err != nil {
|
||
|
|
return fmt.Errorf("序列化请求失败: %w", err)
|
||
|
|
}
|
||
|
|
|
||
|
|
_, err = conn.Write(append(requestData, '\n'))
|
||
|
|
if err != nil {
|
||
|
|
return fmt.Errorf("发送请求失败: %w", err)
|
||
|
|
}
|
||
|
|
|
||
|
|
responseLine, err := reader.ReadString('\n')
|
||
|
|
if err != nil {
|
||
|
|
return fmt.Errorf("读取响应失败: %w", err)
|
||
|
|
}
|
||
|
|
|
||
|
|
var response TTSResponse
|
||
|
|
if err := json.Unmarshal([]byte(responseLine), &response); err != nil {
|
||
|
|
return fmt.Errorf("解析响应失败: %w", err)
|
||
|
|
}
|
||
|
|
|
||
|
|
if response.Status != "ok" {
|
||
|
|
return fmt.Errorf("服务错误: %s", response.Message)
|
||
|
|
}
|
||
|
|
|
||
|
|
audioFile := response.Message
|
||
|
|
if audioFile == "" {
|
||
|
|
return fmt.Errorf("未收到音频文件路径")
|
||
|
|
}
|
||
|
|
|
||
|
|
if err := playAudio(audioFile); err != nil {
|
||
|
|
return fmt.Errorf("播放失败: %w", err)
|
||
|
|
}
|
||
|
|
|
||
|
|
return nil
|
||
|
|
}
|
||
|
|
|
||
|
|
func dialWithContext(ctx context.Context, addr string) (net.Conn, error) {
|
||
|
|
d := &net.Dialer{}
|
||
|
|
return d.DialContext(ctx, "tcp", addr)
|
||
|
|
}
|
||
|
|
|
||
|
|
func playAudio(filePath string) error {
|
||
|
|
cmd := exec.Command("afplay", filePath)
|
||
|
|
return cmd.Run()
|
||
|
|
}
|