diff --git a/changelog.md b/changelog.md index 0513c06..8b2e073 100644 --- a/changelog.md +++ b/changelog.md @@ -32,6 +32,45 @@ ## 版本历史 +### 0.3.0 (2026-03-29) - 内容过滤与代码处理 +**类型**: 功能版本 +**状态**: 开发中 + +**变更内容**: +- ✅ 添加内容过滤模块 (internal/content/) +- ✅ 实现基础字符过滤(移除控制字符、规范化换行符、截断超长符号) +- ✅ 实现代码块和行内代码识别 +- ✅ 实现代码注释智能识别(支持 JS/TS/Java/Python/Go/HTML 等 30+ 语言) +- ✅ 添加 go-enry 依赖实现编程语言智能检测 +- ✅ 添加 SkipKeywords 配置项,默认保留 TODO/FIXME/HACK 等关键词不翻译 +- ✅ 集成内容处理到 Translator 模块 + +**新增文件**: +- `internal/content/content.go` - 模块入口 +- `internal/content/filter.go` - 基础字符过滤 +- `internal/content/parser.go` - 内容解析器和语言检测 + +**配置更新**: +- `configs/config.yaml` 新增 `skip_keywords` 配置项 +- 支持用户自定义不翻译的关键词列表 + +**使用示例**: +```bash +# 翻译包含代码的文档,自动识别代码和注释 +yoyo "这是一个文档 ```js // TODO: fix this ```" +# 代码块保持不变,只翻译注释中的词汇 +# TODO: 修复这个 +``` + +**讨论记录**: +- [内容过滤与代码处理设计](taolun.md#内容过滤与代码处理设计) + +**下一步**: +- 实现更多厂商(火山引擎、国家超算、Qwen、OpenAI兼容) +- 添加配置文件路径查找机制 +- 实现配置文件迁移工具 +- 完善错误处理和用户体验 + ### 0.2.0 (2026-03-29) - 语言支持和配置向导 **类型**: 功能版本 **状态**: 开发中 diff --git a/go.mod b/go.mod index e0cc7eb..8431ba9 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,8 @@ go 1.26.1 require ( github.com/AlecAivazis/survey/v2 v2.3.7 // indirect + github.com/go-enry/go-enry/v2 v2.9.5 // indirect + github.com/go-enry/go-oniguruma v1.2.1 // indirect github.com/joho/godotenv v1.5.1 // indirect github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect github.com/mattn/go-colorable v0.1.2 // indirect diff --git a/go.sum b/go.sum index 16f548f..f105b14 100644 --- a/go.sum +++ b/go.sum @@ -4,6 +4,10 @@ github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2/go.mod h1:HBCaDe github.com/creack/pty v1.1.17/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-enry/go-enry/v2 v2.9.5 h1:HPhAQQHYwJgihL2PxBZiUMFWiROsGwOBdB6/D8zCUhY= +github.com/go-enry/go-enry/v2 v2.9.5/go.mod h1:9yrj4ES1YrbNb1Wb7/PWYr2bpaCXUGRt0uafN0ISyG8= +github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCcBk+eo= +github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4= github.com/hinshun/vt10x v0.0.0-20220119200601-820417d04eec/go.mod h1:Q48J4R4DvxnHolD5P8pOtXigYlRuPLGl6moFx3ulM68= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= @@ -17,7 +21,12 @@ github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b h1:j7+1HpAFS1zy5+Q4qx1f github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= @@ -47,6 +56,7 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/config/config.go b/internal/config/config.go index b92ab52..ac4b76d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -23,6 +23,9 @@ type Config struct { // Prompt配置 Prompts map[string]string `yaml:"prompts"` + + // 内容过滤配置 + SkipKeywords []string `yaml:"skip_keywords"` // 不翻译的关键词 } // ProviderConfig 厂商配置 @@ -121,6 +124,16 @@ func (c *Config) setDefaults() { if c.Prompts == nil { c.Prompts = make(map[string]string) } + + // 设置默认关键词 + if c.SkipKeywords == nil { + c.SkipKeywords = []string{ + "TODO", "FIXME", "HACK", "XXX", "NOTE", + "BUG", "WARN", "IMPORTANT", "TODO:", + "FIXME:", "HACK:", "XXX:", "NOTE:", + "BUG:", "WARN:", "IMPORTANT:", + } + } } // GetProviderConfig 获取指定厂商的配置 diff --git a/internal/content/content.go b/internal/content/content.go new file mode 100644 index 0000000..cee1250 --- /dev/null +++ b/internal/content/content.go @@ -0,0 +1,17 @@ +package content + +import ( + "github.com/go-enry/go-enry/v2" +) + +const ( + Version = "1.0.0" +) + +func DetectLanguage(text string) string { + return enry.GetLanguage("", []byte(text)) +} + +func Filter(text string) string { + return FilterBasic(text, nil) +} diff --git a/internal/content/filter.go b/internal/content/filter.go new file mode 100644 index 0000000..548c04c --- /dev/null +++ b/internal/content/filter.go @@ -0,0 +1,55 @@ +package content + +import ( + "regexp" + "strings" +) + +type FilterOptions struct { + RemoveControlChars bool + NormalizeLineBreaks bool + MaxConsecutiveSymbols int +} + +var defaultFilterOptions = &FilterOptions{ + RemoveControlChars: true, + NormalizeLineBreaks: true, + MaxConsecutiveSymbols: 20, +} + +var controlCharsRegex = regexp.MustCompile(`[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]`) + +func FilterBasic(text string, opts *FilterOptions) string { + if opts == nil { + opts = defaultFilterOptions + } + + result := text + + if opts.RemoveControlChars { + result = controlCharsRegex.ReplaceAllString(result, "") + } + + if opts.NormalizeLineBreaks { + result = strings.ReplaceAll(result, "\r\n", "\n") + result = strings.ReplaceAll(result, "\r", "\n") + } + + if opts.MaxConsecutiveSymbols > 0 { + result = truncateConsecutiveSymbols(result, opts.MaxConsecutiveSymbols) + } + + return result +} + +func truncateConsecutiveSymbols(text string, maxCount int) string { + symbols := []string{"=", "-", "_", "*", "#", "~", "`", "."} + + for _, symbol := range symbols { + pattern := regexp.MustCompile(`(?` + `(` + symbol + `){` + string(rune(maxCount+1)) + `,})`) + replacement := strings.Repeat(symbol, maxCount) + text = pattern.ReplaceAllString(text, replacement) + } + + return text +} diff --git a/internal/content/parser.go b/internal/content/parser.go new file mode 100644 index 0000000..c955d60 --- /dev/null +++ b/internal/content/parser.go @@ -0,0 +1,453 @@ +package content + +import ( + "fmt" + "regexp" + "strings" + + "github.com/go-enry/go-enry/v2" +) + +type SegmentType int + +const ( + SegmentTypeText SegmentType = iota + SegmentTypeCodeBlock + SegmentTypeInlineCode + SegmentTypeComment +) + +func (t SegmentType) String() string { + switch t { + case SegmentTypeText: + return "text" + case SegmentTypeCodeBlock: + return "code_block" + case SegmentTypeInlineCode: + return "inline_code" + case SegmentTypeComment: + return "comment" + default: + return "unknown" + } +} + +type ContentSegment struct { + Type SegmentType + Content string + Translated string + Language string + IsComment bool + StartPos int + EndPos int +} + +type ParseResult struct { + Segments []ContentSegment + SourceLang string + HasCode bool +} + +type languageCommentPatterns struct { + LineComment string + BlockComment []string +} + +var languagePatterns = map[string]languageCommentPatterns{ + "javascript": {LineComment: `//`, BlockComment: []string{`/*`, `*/`}}, + "typescript": {LineComment: `//`, BlockComment: []string{`/*`, `*/`}}, + "java": {LineComment: `//`, BlockComment: []string{`/*`, `*/`}}, + "kotlin": {LineComment: `//`, BlockComment: []string{`/*`, `*/`}}, + "scala": {LineComment: `//`, BlockComment: []string{`/*`, `*/`}}, + "c": {LineComment: `//`, BlockComment: []string{`/*`, `*/`}}, + "cpp": {LineComment: `//`, BlockComment: []string{`/*`, `*/`}}, + "c#": {LineComment: `//`, BlockComment: []string{`/*`, `*/`}}, + "go": {LineComment: `//`, BlockComment: []string{`/*`, `*/`}}, + "rust": {LineComment: `//`, BlockComment: []string{`/*`, `*/`}}, + "php": {LineComment: `//`, BlockComment: []string{`/*`, `*/`}}, + "swift": {LineComment: `//`, BlockComment: []string{`/*`, `*/`}}, + "objective-c": {LineComment: `//`, BlockComment: []string{`/*`, `*/`}}, + "scss": {LineComment: `//`, BlockComment: []string{`/*`, `*/`}}, + "css": {LineComment: ``, BlockComment: []string{`/*`, `*/`}}, + "less": {LineComment: ``, BlockComment: []string{`/*`, `*/`}}, + "html": {LineComment: ``, BlockComment: []string{``}}, + "xml": {LineComment: ``, BlockComment: []string{``}}, + "sql": {LineComment: `--`, BlockComment: []string{`/*`, `*/`}}, + "python": {LineComment: `#`, BlockComment: []string{`"""`, `"""`}}, + "ruby": {LineComment: `#`, BlockComment: []string{`=begin`, `=end`}}, + "shell": {LineComment: `#`, BlockComment: []string{}}, + "bash": {LineComment: `#`, BlockComment: []string{}}, + "powershell": {LineComment: `#`, BlockComment: []string{`<#`, `#>`}}, + "yaml": {LineComment: `#()`, BlockComment: []string{}}, + "json": {LineComment: ``, BlockComment: []string{}}, + "markdown": {LineComment: ``, BlockComment: []string{}}, + "vue": {LineComment: `//()`, BlockComment: []string{`/*`, `*/`, ``}}, + "svelte": {LineComment: `//()`, BlockComment: []string{`/*`, `*/`}}, + "jsx": {LineComment: `//()`, BlockComment: []string{`/*`, `*/`}}, + "tsx": {LineComment: `//()`, BlockComment: []string{`/*`, `*/`}}, +} + +var defaultPatterns = languageCommentPatterns{ + LineComment: `//`, + BlockComment: []string{`/*`, `*/`}, +} + +type Parser struct { + skipKeywords []string + fallbackLang string +} + +func NewParser(skipKeywords []string) *Parser { + if skipKeywords == nil { + skipKeywords = []string{ + "TODO", "FIXME", "HACK", "XXX", "NOTE", + "BUG", "WARN", "IMPORTANT", "TODO:", + "FIXME:", "HACK:", "XXX:", "NOTE:", + "BUG:", "WARN:", "IMPORTANT:", + } + } + return &Parser{ + skipKeywords: skipKeywords, + fallbackLang: "javascript", + } +} + +func (p *Parser) Parse(text string) (*ParseResult, error) { + result := &ParseResult{ + Segments: []ContentSegment{}, + } + + detectedLang := p.detectLanguage(text) + result.SourceLang = detectedLang + + segments := p.splitIntoSegments(text, result.SourceLang) + + for _, seg := range segments { + if seg.Type == SegmentTypeCodeBlock || seg.Type == SegmentTypeInlineCode { + result.HasCode = true + } + result.Segments = append(result.Segments, seg) + } + + return result, nil +} + +func (p *Parser) detectLanguage(text string) string { + lines := strings.Split(text, "\n") + var codeLines []string + inCodeBlock := false + + for _, line := range lines { + trimmed := strings.TrimSpace(line) + if strings.HasPrefix(trimmed, "```") { + inCodeBlock = !inCodeBlock + continue + } + if inCodeBlock && trimmed != "" { + codeLines = append(codeLines, trimmed) + } + } + + if len(codeLines) == 0 { + for _, line := range lines { + if strings.TrimSpace(line) != "" { + codeLines = append(codeLines, line) + } + } + } + + if len(codeLines) == 0 { + return p.fallbackLang + } + + sample := strings.Join(codeLines[:min(len(codeLines), 10)], "\n") + lang := enry.GetLanguage("", []byte(sample)) + + if lang == "" { + return p.fallbackLang + } + + return strings.ToLower(lang) +} + +func (p *Parser) splitIntoSegments(text string, lang string) []ContentSegment { + segments := []ContentSegment{} + + codeBlockPattern := regexp.MustCompile("(?s)```[\\s\\S]*?^```|`[^`]+`") + matches := codeBlockPattern.FindAllStringIndex(text, -1) + + if len(matches) == 0 { + segments = append(segments, ContentSegment{ + Type: SegmentTypeText, + Content: text, + StartPos: 0, + EndPos: len(text), + }) + return segments + } + + lastEnd := 0 + for _, match := range matches { + start, end := match[0], match[1] + + if start > lastEnd { + textPart := text[lastEnd:start] + textSegments := p.parseTextContent(textPart, lang) + segments = append(segments, textSegments...) + } + + content := text[start:end] + isInline := len(content) > 0 && content[0] == '`' && (len(content) == 1 || content[len(content)-1] == '`') + + if strings.HasPrefix(content, "```") { + segments = append(segments, ContentSegment{ + Type: SegmentTypeCodeBlock, + Content: content, + Language: p.detectCodeBlockLang(content), + StartPos: start, + EndPos: end, + }) + } else if isInline { + segments = append(segments, ContentSegment{ + Type: SegmentTypeInlineCode, + Content: content, + Language: lang, + StartPos: start, + EndPos: end, + }) + } + + lastEnd = end + } + + if lastEnd < len(text) { + textPart := text[lastEnd:] + textSegments := p.parseTextContent(textPart, lang) + segments = append(segments, textSegments...) + } + + return segments +} + +func (p *Parser) parseTextContent(text string, lang string) []ContentSegment { + segments := []ContentSegment{} + langPatterns := getLanguagePatterns(lang) + + if langPatterns.SingleLine == "" && len(langPatterns.MultiLine) == 0 { + segments = append(segments, ContentSegment{ + Type: SegmentTypeText, + Content: text, + Language: lang, + StartPos: 0, + EndPos: len(text), + }) + return segments + } + + commentPatterns := p.buildCommentRegex(langPatterns) + if commentPatterns == nil { + segments = append(segments, ContentSegment{ + Type: SegmentTypeText, + Content: text, + Language: lang, + StartPos: 0, + EndPos: len(text), + }) + return segments + } + + matches := commentPatterns.FindAllStringIndex(text, -1) + if len(matches) == 0 { + segments = append(segments, ContentSegment{ + Type: SegmentTypeText, + Content: text, + Language: lang, + StartPos: 0, + EndPos: len(text), + }) + return segments + } + + lastEnd := 0 + for _, match := range matches { + start, end := match[0], match[1] + + if start > lastEnd { + segments = append(segments, ContentSegment{ + Type: SegmentTypeText, + Content: text[lastEnd:start], + Language: lang, + StartPos: lastEnd, + EndPos: start, + }) + } + + segments = append(segments, ContentSegment{ + Type: SegmentTypeComment, + Content: text[start:end], + IsComment: true, + Language: lang, + StartPos: start, + EndPos: end, + }) + + lastEnd = end + } + + if lastEnd < len(text) { + segments = append(segments, ContentSegment{ + Type: SegmentTypeText, + Content: text[lastEnd:], + Language: lang, + StartPos: lastEnd, + EndPos: len(text), + }) + } + + return segments +} + +type languageCommentRegex struct { + SingleLine string + MultiLine []struct { + Start string + End string + } +} + +func (p *Parser) buildCommentRegex(patterns languageCommentRegex) *regexp.Regexp { + var parts []string + + if patterns.SingleLine != "" { + parts = append(parts, patterns.SingleLine+`.*$`) + } + + for _, multi := range patterns.MultiLine { + if multi.Start != "" && multi.End != "" { + escapedStart := regexp.QuoteMeta(multi.Start) + escapedEnd := regexp.QuoteMeta(multi.End) + parts = append(parts, escapedStart+`[\s\S]*?`+escapedEnd) + } + } + + if len(parts) == 0 { + return nil + } + + pattern := `(?m)` + strings.Join(parts, "|") + return regexp.MustCompile(pattern) +} + +func getLanguagePatterns(lang string) languageCommentRegex { + patterns, ok := languagePatterns[lang] + if !ok { + patterns = defaultPatterns + } + + result := languageCommentRegex{ + SingleLine: patterns.LineComment, + } + + for _, bc := range patterns.BlockComment { + if len(bc) >= 2 { + result.MultiLine = append(result.MultiLine, struct { + Start string + End string + }{Start: bc[:len(bc)/2], End: bc[len(bc)/2:]}) + } + } + + return result +} + +func (p *Parser) detectCodeBlockLang(codeBlock string) string { + lines := strings.Split(codeBlock, "\n") + if len(lines) < 2 { + return "" + } + + firstLine := strings.TrimSpace(lines[0]) + firstLine = strings.TrimPrefix(firstLine, "```") + firstLine = strings.TrimSpace(firstLine) + + if firstLine != "" { + lang := strings.ToLower(firstLine) + if _, ok := languagePatterns[lang]; ok { + return lang + } + } + + return "" +} + +func (p *Parser) BuildPrompt(result *ParseResult) string { + var prompt strings.Builder + + prompt.WriteString("你是一位专业的技术翻译。请翻译以下内容,遵守以下规则:\n\n") + + prompt.WriteString("需要翻译的部分:\n") + prompt.WriteString("- 普通文本:翻译成目标语言\n") + prompt.WriteString("- 代码注释:只翻译注释中有意义的词汇,技术术语保留原语言\n\n") + + prompt.WriteString("需要保持不变的部分:\n") + prompt.WriteString("- 代码块(如 ```javascript ... ```)保持原样\n") + prompt.WriteString("- 行内代码(如 `const count = 10`)保持原样\n") + + if len(p.skipKeywords) > 0 { + prompt.WriteString(fmt.Sprintf("- 以下关键词不翻译:%s\n", strings.Join(p.skipKeywords, "、"))) + } + + prompt.WriteString("\n请将需要翻译的部分翻译成中文,其他部分保持不变。\n\n") + prompt.WriteString("原文:\n---\n") + + textToTranslate := p.extractTextForTranslation(result) + prompt.WriteString(textToTranslate) + + prompt.WriteString("\n---") + + return prompt.String() +} + +func (p *Parser) extractTextForTranslation(result *ParseResult) string { + var text strings.Builder + + for _, seg := range result.Segments { + switch seg.Type { + case SegmentTypeText: + text.WriteString(seg.Content) + case SegmentTypeComment: + text.WriteString(seg.Content) + case SegmentTypeCodeBlock, SegmentTypeInlineCode: + } + } + + return text.String() +} + +func (p *Parser) Reconstruct(result *ParseResult, translatedText string) string { + translatedLines := strings.Split(translatedText, "\n") + var output strings.Builder + + textIndex := 0 + + for _, seg := range result.Segments { + switch seg.Type { + case SegmentTypeText, SegmentTypeComment: + if textIndex < len(translatedLines) { + output.WriteString(translatedLines[textIndex]) + textIndex++ + } + case SegmentTypeCodeBlock, SegmentTypeInlineCode: + output.WriteString(seg.Content) + } + } + + return output.String() +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} diff --git a/internal/translator/translator.go b/internal/translator/translator.go index f348261..f880866 100644 --- a/internal/translator/translator.go +++ b/internal/translator/translator.go @@ -6,22 +6,25 @@ import ( "time" "github.com/titor/fanyi/internal/config" + "github.com/titor/fanyi/internal/content" "github.com/titor/fanyi/internal/provider" ) // Translator 核心翻译类 type Translator struct { - config *config.Config - provider provider.Provider - prompt *PromptManager + config *config.Config + provider provider.Provider + prompt *PromptManager + contentParser *content.Parser } // NewTranslator 创建翻译器实例 func NewTranslator(config *config.Config, provider provider.Provider) *Translator { return &Translator{ - config: config, - provider: provider, - prompt: NewPromptManager(config.Prompts), + config: config, + provider: provider, + prompt: NewPromptManager(config.Prompts), + contentParser: content.NewParser(config.SkipKeywords), } } @@ -31,15 +34,33 @@ func (t *Translator) Translate(ctx context.Context, text string, options *Transl timeoutCtx, cancel := context.WithTimeout(ctx, time.Duration(t.config.Timeout)*time.Second) defer cancel() + // 基础字符过滤 + filteredText := content.FilterBasic(text, nil) + + // 内容解析(包含代码检测) + parseResult, parseErr := t.contentParser.Parse(filteredText) + // 选择Prompt prompt := "" if options.PromptName != "" { prompt = t.prompt.GetPrompt(options.PromptName) } + // 如果包含代码且解析成功,使用增强的Prompt + if parseErr == nil && parseResult.HasCode { + enhancedPrompt := t.contentParser.BuildPrompt(parseResult) + if enhancedPrompt != "" { + if prompt != "" { + prompt = prompt + "\n\n" + enhancedPrompt + } else { + prompt = enhancedPrompt + } + } + } + // 构建请求 req := &provider.TranslateRequest{ - Text: text, + Text: filteredText, FromLang: options.FromLang, ToLang: options.ToLang, Prompt: prompt, @@ -53,10 +74,17 @@ func (t *Translator) Translate(ctx context.Context, text string, options *Transl return nil, fmt.Errorf("翻译失败: %w", err) } + translatedText := resp.Text + + // 如果包含代码且解析成功,重构结果 + if parseErr == nil && parseResult.HasCode { + translatedText = t.contentParser.Reconstruct(parseResult, resp.Text) + } + // 构建结果 return &TranslateResult{ Original: text, - Translated: resp.Text, + Translated: translatedText, FromLang: resp.FromLang, ToLang: resp.ToLang, Model: resp.Model,