Files
EmailBill/Service/SmartClassify.cs

247 lines
10 KiB
C#
Raw Normal View History

2025-12-30 18:49:46 +08:00
namespace Service;
public interface ISmartHandleService
{
Task SmartClassifyAsync(long[] transactionIds, Action<(string , string)> chunkAction);
}
public class SmartHandleService(
ITransactionRecordRepository transactionRepository,
ITextSegmentService textSegmentService,
ILogger<SmartHandleService> logger,
ITransactionCategoryRepository categoryRepository,
IOpenAiService openAiService
) : ISmartHandleService
{
public async Task SmartClassifyAsync(long[] transactionIds, Action<(string , string)> chunkAction)
{
try
{
// 获取指定ID的账单作为样本
var sampleRecords = await transactionRepository.GetByIdsAsync(transactionIds);
if (sampleRecords.Length == 0)
{
// await WriteEventAsync("error", "找不到指定的账单");
chunkAction(("error", "找不到指定的账单"));
return;
}
// 重新按Reason分组所有待分类账单
var groupedRecords = sampleRecords
.GroupBy(r => r.Reason)
.Select(g => new
{
Reason = g.Key,
Ids = g.Select(r => r.Id).ToList(),
Count = g.Count(),
TotalAmount = g.Sum(r => r.Amount),
SampleType = g.First().Type
})
.OrderByDescending(g => Math.Abs(g.TotalAmount))
.ToList();
// 【增强功能】对每个分组的摘要进行分词,查询已分类的相似账单
var referenceRecords = new Dictionary<string, List<TransactionRecord>>();
foreach (var group in groupedRecords)
{
// 使用专业分词库提取关键词
var keywords = textSegmentService.ExtractKeywords(group.Reason);
if (keywords.Count > 0)
{
// 查询包含这些关键词且已分类的账单(带相关度评分)
// minMatchRate=0.4 表示至少匹配40%的关键词才被认为是相似的
var similarClassifiedWithScore = await transactionRepository.GetClassifiedByKeywordsWithScoreAsync(keywords, minMatchRate: 0.4, limit: 10);
if (similarClassifiedWithScore.Count > 0)
{
// 只取前5个最相关的
var topSimilar = similarClassifiedWithScore.Take(5).Select(x => x.record).ToList();
referenceRecords[group.Reason] = topSimilar;
// 记录调试信息
logger.LogDebug("摘要 '{Reason}' 提取关键词: {Keywords}, 找到 {Count} 个相似账单,相关度分数: {Scores}",
group.Reason,
string.Join(", ", keywords),
similarClassifiedWithScore.Count,
string.Join(", ", similarClassifiedWithScore.Select(x => $"{x.record.Reason}({x.relevanceScore:F2})")));
}
else
{
logger.LogDebug("摘要 '{Reason}' 提取关键词: {Keywords}, 未找到高相关度的相似账单",
group.Reason,
string.Join(", ", keywords));
}
}
}
// 获取所有分类
var categories = await categoryRepository.GetAllAsync();
// 构建分类信息
var categoryInfo = new StringBuilder();
foreach (var type in new[] { 0, 1, 2 })
{
var typeName = GetTypeName((TransactionType)type);
categoryInfo.AppendLine($"{typeName}: ");
var categoriesOfType = categories.Where(c => (int)c.Type == type).ToList();
foreach (var category in categoriesOfType)
{
categoryInfo.AppendLine($"- {category.Name}");
}
}
// 构建账单分组信息
var billsInfo = new StringBuilder();
foreach (var (group, index) in groupedRecords.Select((g, i) => (g, i)))
{
billsInfo.AppendLine($"{index + 1}. 摘要={group.Reason}, 当前类型={GetTypeName(group.SampleType)}, 当前分类={(string.IsNullOrEmpty(group.SampleType.ToString()) ? "" : group.SampleType.ToString())}, 涉及金额={group.TotalAmount}");
// 如果有相似的已分类账单,添加参考信息
if (referenceRecords.TryGetValue(group.Reason, out var references))
{
billsInfo.AppendLine(" 【参考】相似且已分类的账单:");
foreach (var refer in references.Take(3)) // 最多显示3个参考
{
billsInfo.AppendLine($" - 摘要={refer.Reason}, 分类={refer.Classify}, 类型={GetTypeName(refer.Type)}, 金额={refer.Amount}");
}
}
}
var systemPrompt = $$"""
{{categoryInfo}}
1.
2.
3. "其他"
4.
{"reason": "交易摘要", "type": 0:/1:/2:(Type为Number枚举值) ,"classify": "分类名称"}
JSON
""";
var userPrompt = $$"""
{{billsInfo}}
""";
// 流式调用AI
chunkAction(("start", $"开始分类,共 {sampleRecords.Length} 条账单"));
// 用于存储AI返回的分组分类结果
var classifyResults = new List<(string Reason, string Classify, TransactionType Type)>();
var buffer = new StringBuilder();
var sendedIds = new HashSet<long>();
await foreach (var chunk in openAiService.ChatStreamAsync(systemPrompt, userPrompt))
{
buffer.Append(chunk);
// 尝试解析完整的JSON对象
var bufferStr = buffer.ToString();
var startIdx = 0;
while (startIdx < bufferStr.Length)
{
var openBrace = bufferStr.IndexOf('{', startIdx);
if (openBrace == -1) break;
var closeBrace = FindMatchingBrace(bufferStr, openBrace);
if (closeBrace == -1) break;
var jsonStr = bufferStr.Substring(openBrace, closeBrace - openBrace + 1);
try
{
var result = JsonSerializer.Deserialize<GroupClassifyResult>(jsonStr);
if (result != null && !string.IsNullOrEmpty(result.Reason))
{
classifyResults.Add((result.Reason, result.Classify ?? "", result.Type));
// 每一条结果单独通知
var group = groupedRecords.FirstOrDefault(g => g.Reason == result.Reason);
if (group != null)
{
// 为该分组的所有账单ID返回分类结果
foreach (var id in group.Ids)
{
if (!sendedIds.Contains(id))
{
sendedIds.Add(id);
var resultJson = JsonSerializer.Serialize(new { id, result.Classify, result.Type });
chunkAction(("data", resultJson));
}
}
}
}
}
catch (Exception ex)
{
logger.LogWarning(ex, "解析AI分类结果失败: {JsonStr}", jsonStr);
}
startIdx = closeBrace + 1;
}
}
chunkAction(("end", "分类完成"));
}
catch (Exception ex)
{
logger.LogError(ex, "智能分类失败");
chunkAction(("error", $"智能分类失败: {ex.Message}"));
}
}
/// <summary>
/// 查找匹配的右括号
/// </summary>
private static int FindMatchingBrace(string str, int startPos)
{
int braceCount = 0;
for (int i = startPos; i < str.Length; i++)
{
if (str[i] == '{') braceCount++;
else if (str[i] == '}')
{
braceCount--;
if (braceCount == 0) return i;
}
}
return -1;
}
private static string GetTypeName(TransactionType type)
{
return type switch
{
TransactionType.Expense => "支出",
TransactionType.Income => "收入",
TransactionType.None => "不计入收支",
_ => "未知"
};
}
}
/// <summary>
/// 分组分类结果DTO用于AI返回结果解析
/// </summary>
public record GroupClassifyResult
{
[JsonPropertyName("reason")]
public string Reason { get; set; } = string.Empty;
[JsonPropertyName("classify")]
public string? Classify { get; set; }
[JsonPropertyName("type")]
public TransactionType Type { get; set; }
}