Files
EmailBill/Service/SmartClassify.cs
孙诚 4b322494ba
Some checks failed
Docker Build & Deploy / Build Docker Image (push) Failing after 6s
Docker Build & Deploy / Deploy to Production (push) Has been skipped
fix
2025-12-30 18:49:46 +08:00

247 lines
10 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
namespace Service;
public interface ISmartHandleService
{
Task SmartClassifyAsync(long[] transactionIds, Action<(string , string)> chunkAction);
}
public class SmartHandleService(
ITransactionRecordRepository transactionRepository,
ITextSegmentService textSegmentService,
ILogger<SmartHandleService> logger,
ITransactionCategoryRepository categoryRepository,
IOpenAiService openAiService
) : ISmartHandleService
{
public async Task SmartClassifyAsync(long[] transactionIds, Action<(string , string)> chunkAction)
{
try
{
// 获取指定ID的账单作为样本
var sampleRecords = await transactionRepository.GetByIdsAsync(transactionIds);
if (sampleRecords.Length == 0)
{
// await WriteEventAsync("error", "找不到指定的账单");
chunkAction(("error", "找不到指定的账单"));
return;
}
// 重新按Reason分组所有待分类账单
var groupedRecords = sampleRecords
.GroupBy(r => r.Reason)
.Select(g => new
{
Reason = g.Key,
Ids = g.Select(r => r.Id).ToList(),
Count = g.Count(),
TotalAmount = g.Sum(r => r.Amount),
SampleType = g.First().Type
})
.OrderByDescending(g => Math.Abs(g.TotalAmount))
.ToList();
// 【增强功能】对每个分组的摘要进行分词,查询已分类的相似账单
var referenceRecords = new Dictionary<string, List<TransactionRecord>>();
foreach (var group in groupedRecords)
{
// 使用专业分词库提取关键词
var keywords = textSegmentService.ExtractKeywords(group.Reason);
if (keywords.Count > 0)
{
// 查询包含这些关键词且已分类的账单(带相关度评分)
// minMatchRate=0.4 表示至少匹配40%的关键词才被认为是相似的
var similarClassifiedWithScore = await transactionRepository.GetClassifiedByKeywordsWithScoreAsync(keywords, minMatchRate: 0.4, limit: 10);
if (similarClassifiedWithScore.Count > 0)
{
// 只取前5个最相关的
var topSimilar = similarClassifiedWithScore.Take(5).Select(x => x.record).ToList();
referenceRecords[group.Reason] = topSimilar;
// 记录调试信息
logger.LogDebug("摘要 '{Reason}' 提取关键词: {Keywords}, 找到 {Count} 个相似账单,相关度分数: {Scores}",
group.Reason,
string.Join(", ", keywords),
similarClassifiedWithScore.Count,
string.Join(", ", similarClassifiedWithScore.Select(x => $"{x.record.Reason}({x.relevanceScore:F2})")));
}
else
{
logger.LogDebug("摘要 '{Reason}' 提取关键词: {Keywords}, 未找到高相关度的相似账单",
group.Reason,
string.Join(", ", keywords));
}
}
}
// 获取所有分类
var categories = await categoryRepository.GetAllAsync();
// 构建分类信息
var categoryInfo = new StringBuilder();
foreach (var type in new[] { 0, 1, 2 })
{
var typeName = GetTypeName((TransactionType)type);
categoryInfo.AppendLine($"{typeName}: ");
var categoriesOfType = categories.Where(c => (int)c.Type == type).ToList();
foreach (var category in categoriesOfType)
{
categoryInfo.AppendLine($"- {category.Name}");
}
}
// 构建账单分组信息
var billsInfo = new StringBuilder();
foreach (var (group, index) in groupedRecords.Select((g, i) => (g, i)))
{
billsInfo.AppendLine($"{index + 1}. 摘要={group.Reason}, 当前类型={GetTypeName(group.SampleType)}, 当前分类={(string.IsNullOrEmpty(group.SampleType.ToString()) ? "" : group.SampleType.ToString())}, 涉及金额={group.TotalAmount}");
// 如果有相似的已分类账单,添加参考信息
if (referenceRecords.TryGetValue(group.Reason, out var references))
{
billsInfo.AppendLine(" 【参考】相似且已分类的账单:");
foreach (var refer in references.Take(3)) // 最多显示3个参考
{
billsInfo.AppendLine($" - 摘要={refer.Reason}, 分类={refer.Classify}, 类型={GetTypeName(refer.Type)}, 金额={refer.Amount}");
}
}
}
var systemPrompt = $$"""
你是一个专业的账单分类助手。请根据提供的账单分组信息和分类列表,为每个分组选择最合适的分类。
可用的分类列表:
{{categoryInfo}}
分类规则:
1. 根据账单的摘要和涉及金额,选择最匹配的分类
2. 如果提供了【参考】信息,优先参考相似账单的分类,这些是历史上已分类的相似账单
3. 如果无法确定分类,可以选择""
4.
{"reason": "交易摘要", "type": 0:/1:/2:(Type为Number枚举值) ,"classify": "分类名称"}
JSON
""";
var userPrompt = $$"""
请为以下账单分组进行分类:
{{billsInfo}}
请逐个输出分类结果。
""";
// 流式调用AI
chunkAction(("start", $"开始分类,共 {sampleRecords.Length} 条账单"));
// 用于存储AI返回的分组分类结果
var classifyResults = new List<(string Reason, string Classify, TransactionType Type)>();
var buffer = new StringBuilder();
var sendedIds = new HashSet<long>();
await foreach (var chunk in openAiService.ChatStreamAsync(systemPrompt, userPrompt))
{
buffer.Append(chunk);
// 尝试解析完整的JSON对象
var bufferStr = buffer.ToString();
var startIdx = 0;
while (startIdx < bufferStr.Length)
{
var openBrace = bufferStr.IndexOf('{', startIdx);
if (openBrace == -1) break;
var closeBrace = FindMatchingBrace(bufferStr, openBrace);
if (closeBrace == -1) break;
var jsonStr = bufferStr.Substring(openBrace, closeBrace - openBrace + 1);
try
{
var result = JsonSerializer.Deserialize<GroupClassifyResult>(jsonStr);
if (result != null && !string.IsNullOrEmpty(result.Reason))
{
classifyResults.Add((result.Reason, result.Classify ?? "", result.Type));
// 每一条结果单独通知
var group = groupedRecords.FirstOrDefault(g => g.Reason == result.Reason);
if (group != null)
{
// 为该分组的所有账单ID返回分类结果
foreach (var id in group.Ids)
{
if (!sendedIds.Contains(id))
{
sendedIds.Add(id);
var resultJson = JsonSerializer.Serialize(new { id, result.Classify, result.Type });
chunkAction(("data", resultJson));
}
}
}
}
}
catch (Exception ex)
{
logger.LogWarning(ex, "解析AI分类结果失败: {JsonStr}", jsonStr);
}
startIdx = closeBrace + 1;
}
}
chunkAction(("end", "分类完成"));
}
catch (Exception ex)
{
logger.LogError(ex, "智能分类失败");
chunkAction(("error", $"智能分类失败: {ex.Message}"));
}
}
/// <summary>
/// 查找匹配的右括号
/// </summary>
private static int FindMatchingBrace(string str, int startPos)
{
int braceCount = 0;
for (int i = startPos; i < str.Length; i++)
{
if (str[i] == '{') braceCount++;
else if (str[i] == '}')
{
braceCount--;
if (braceCount == 0) return i;
}
}
return -1;
}
private static string GetTypeName(TransactionType type)
{
return type switch
{
TransactionType.Expense => "支出",
TransactionType.Income => "收入",
TransactionType.None => "不计入收支",
_ => "未知"
};
}
}
/// <summary>
/// 分组分类结果DTO用于AI返回结果解析
/// </summary>
public record GroupClassifyResult
{
[JsonPropertyName("reason")]
public string Reason { get; set; } = string.Empty;
[JsonPropertyName("classify")]
public string? Classify { get; set; }
[JsonPropertyName("type")]
public TransactionType Type { get; set; }
}