namespace Service; public interface ISmartHandleService { Task SmartClassifyAsync(long[] transactionIds, Action<(string , string)> chunkAction); } public class SmartHandleService( ITransactionRecordRepository transactionRepository, ITextSegmentService textSegmentService, ILogger logger, ITransactionCategoryRepository categoryRepository, IOpenAiService openAiService ) : ISmartHandleService { public async Task SmartClassifyAsync(long[] transactionIds, Action<(string , string)> chunkAction) { try { // 获取指定ID的账单(作为样本) var sampleRecords = await transactionRepository.GetByIdsAsync(transactionIds); if (sampleRecords.Length == 0) { // await WriteEventAsync("error", "找不到指定的账单"); chunkAction(("error", "找不到指定的账单")); return; } // 重新按Reason分组所有待分类账单 var groupedRecords = sampleRecords .GroupBy(r => r.Reason) .Select(g => new { Reason = g.Key, Ids = g.Select(r => r.Id).ToList(), Count = g.Count(), TotalAmount = g.Sum(r => r.Amount), SampleType = g.First().Type }) .OrderByDescending(g => Math.Abs(g.TotalAmount)) .ToList(); // 【增强功能】对每个分组的摘要进行分词,查询已分类的相似账单 var referenceRecords = new Dictionary>(); foreach (var group in groupedRecords) { // 使用专业分词库提取关键词 var keywords = textSegmentService.ExtractKeywords(group.Reason); if (keywords.Count > 0) { // 查询包含这些关键词且已分类的账单(带相关度评分) // minMatchRate=0.4 表示至少匹配40%的关键词才被认为是相似的 var similarClassifiedWithScore = await transactionRepository.GetClassifiedByKeywordsWithScoreAsync(keywords, minMatchRate: 0.4, limit: 10); if (similarClassifiedWithScore.Count > 0) { // 只取前5个最相关的 var topSimilar = similarClassifiedWithScore.Take(5).Select(x => x.record).ToList(); referenceRecords[group.Reason] = topSimilar; // 记录调试信息 logger.LogDebug("摘要 '{Reason}' 提取关键词: {Keywords}, 找到 {Count} 个相似账单,相关度分数: {Scores}", group.Reason, string.Join(", ", keywords), similarClassifiedWithScore.Count, string.Join(", ", similarClassifiedWithScore.Select(x => $"{x.record.Reason}({x.relevanceScore:F2})"))); } else { logger.LogDebug("摘要 '{Reason}' 提取关键词: {Keywords}, 未找到高相关度的相似账单", group.Reason, string.Join(", ", keywords)); } } } // 获取所有分类 var categories = await categoryRepository.GetAllAsync(); // 构建分类信息 var categoryInfo = new StringBuilder(); foreach (var type in new[] { 0, 1, 2 }) { var typeName = GetTypeName((TransactionType)type); categoryInfo.AppendLine($"{typeName}: "); var categoriesOfType = categories.Where(c => (int)c.Type == type).ToList(); foreach (var category in categoriesOfType) { categoryInfo.AppendLine($"- {category.Name}"); } } // 构建账单分组信息 var billsInfo = new StringBuilder(); foreach (var (group, index) in groupedRecords.Select((g, i) => (g, i))) { billsInfo.AppendLine($"{index + 1}. 摘要={group.Reason}, 当前类型={GetTypeName(group.SampleType)}, 当前分类={(string.IsNullOrEmpty(group.SampleType.ToString()) ? "未分类" : group.SampleType.ToString())}, 涉及金额={group.TotalAmount}"); // 如果有相似的已分类账单,添加参考信息 if (referenceRecords.TryGetValue(group.Reason, out var references)) { billsInfo.AppendLine(" 【参考】相似且已分类的账单:"); foreach (var refer in references.Take(3)) // 最多显示3个参考 { billsInfo.AppendLine($" - 摘要={refer.Reason}, 分类={refer.Classify}, 类型={GetTypeName(refer.Type)}, 金额={refer.Amount}"); } } } var systemPrompt = $$""" 你是一个专业的账单分类助手。请根据提供的账单分组信息和分类列表,为每个分组选择最合适的分类。 可用的分类列表: {{categoryInfo}} 分类规则: 1. 根据账单的摘要和涉及金额,选择最匹配的分类 2. 如果提供了【参考】信息,优先参考相似账单的分类,这些是历史上已分类的相似账单 3. 如果无法确定分类,可以选择"其他" 4. 每个分组可能包含多条账单,你需要为整个分组选择一个分类 请对每个分组进行分类,每次输出一个分组的分类结果,格式如下: {"reason": "交易摘要", "type": 0:支出/1:收入/2:不计入收支(Type为Number枚举值) ,"classify": "分类名称"} 只输出JSON,不要有其他文字说明。 """; var userPrompt = $$""" 请为以下账单分组进行分类: {{billsInfo}} 请逐个输出分类结果。 """; // 流式调用AI chunkAction(("start", $"开始分类,共 {sampleRecords.Length} 条账单")); // 用于存储AI返回的分组分类结果 var classifyResults = new List<(string Reason, string Classify, TransactionType Type)>(); var buffer = new StringBuilder(); var sendedIds = new HashSet(); await foreach (var chunk in openAiService.ChatStreamAsync(systemPrompt, userPrompt)) { buffer.Append(chunk); // 尝试解析完整的JSON对象 var bufferStr = buffer.ToString(); var startIdx = 0; while (startIdx < bufferStr.Length) { var openBrace = bufferStr.IndexOf('{', startIdx); if (openBrace == -1) break; var closeBrace = FindMatchingBrace(bufferStr, openBrace); if (closeBrace == -1) break; var jsonStr = bufferStr.Substring(openBrace, closeBrace - openBrace + 1); try { var result = JsonSerializer.Deserialize(jsonStr); if (result != null && !string.IsNullOrEmpty(result.Reason)) { classifyResults.Add((result.Reason, result.Classify ?? "", result.Type)); // 每一条结果单独通知 var group = groupedRecords.FirstOrDefault(g => g.Reason == result.Reason); if (group != null) { // 为该分组的所有账单ID返回分类结果 foreach (var id in group.Ids) { if (!sendedIds.Contains(id)) { sendedIds.Add(id); var resultJson = JsonSerializer.Serialize(new { id, result.Classify, result.Type }); chunkAction(("data", resultJson)); } } } } } catch (Exception ex) { logger.LogWarning(ex, "解析AI分类结果失败: {JsonStr}", jsonStr); } startIdx = closeBrace + 1; } } chunkAction(("end", "分类完成")); } catch (Exception ex) { logger.LogError(ex, "智能分类失败"); chunkAction(("error", $"智能分类失败: {ex.Message}")); } } /// /// 查找匹配的右括号 /// private static int FindMatchingBrace(string str, int startPos) { int braceCount = 0; for (int i = startPos; i < str.Length; i++) { if (str[i] == '{') braceCount++; else if (str[i] == '}') { braceCount--; if (braceCount == 0) return i; } } return -1; } private static string GetTypeName(TransactionType type) { return type switch { TransactionType.Expense => "支出", TransactionType.Income => "收入", TransactionType.None => "不计入收支", _ => "未知" }; } } /// /// 分组分类结果DTO(用于AI返回结果解析) /// public record GroupClassifyResult { [JsonPropertyName("reason")] public string Reason { get; set; } = string.Empty; [JsonPropertyName("classify")] public string? Classify { get; set; } [JsonPropertyName("type")] public TransactionType Type { get; set; } }