Files
EmailBill/Service/SmartHandleService.cs

476 lines
21 KiB
C#
Raw Normal View History

2025-12-30 18:49:46 +08:00
namespace Service;
public interface ISmartHandleService
{
2025-12-31 11:10:10 +08:00
Task SmartClassifyAsync(long[] transactionIds, Action<(string type, string data)> chunkAction);
Task AnalyzeBillAsync(string userInput, Action<string> chunkAction);
Task<TransactionParseResult?> ParseOneLineBillAsync(string text);
2025-12-30 18:49:46 +08:00
}
public class SmartHandleService(
ITransactionRecordRepository transactionRepository,
ITextSegmentService textSegmentService,
ILogger<SmartHandleService> logger,
ITransactionCategoryRepository categoryRepository,
IOpenAiService openAiService
) : ISmartHandleService
{
2025-12-31 11:10:10 +08:00
public async Task SmartClassifyAsync(long[] transactionIds, Action<(string, string)> chunkAction)
2025-12-30 18:49:46 +08:00
{
try
{
// 获取指定ID的账单作为样本
var sampleRecords = await transactionRepository.GetByIdsAsync(transactionIds);
if (sampleRecords.Length == 0)
{
// await WriteEventAsync("error", "找不到指定的账单");
chunkAction(("error", "找不到指定的账单"));
return;
}
// 重新按Reason分组所有待分类账单
var groupedRecords = sampleRecords
.GroupBy(r => r.Reason)
.Select(g => new
{
Reason = g.Key,
Ids = g.Select(r => r.Id).ToList(),
Count = g.Count(),
TotalAmount = g.Sum(r => r.Amount),
SampleType = g.First().Type
})
.OrderByDescending(g => Math.Abs(g.TotalAmount))
.ToList();
// 【增强功能】对每个分组的摘要进行分词,查询已分类的相似账单
var referenceRecords = new Dictionary<string, List<TransactionRecord>>();
foreach (var group in groupedRecords)
{
// 使用专业分词库提取关键词
var keywords = textSegmentService.ExtractKeywords(group.Reason);
if (keywords.Count > 0)
{
// 查询包含这些关键词且已分类的账单(带相关度评分)
// minMatchRate=0.4 表示至少匹配40%的关键词才被认为是相似的
var similarClassifiedWithScore = await transactionRepository.GetClassifiedByKeywordsWithScoreAsync(keywords, minMatchRate: 0.4, limit: 10);
if (similarClassifiedWithScore.Count > 0)
{
// 只取前5个最相关的
var topSimilar = similarClassifiedWithScore.Take(5).Select(x => x.record).ToList();
referenceRecords[group.Reason] = topSimilar;
// 记录调试信息
logger.LogDebug("摘要 '{Reason}' 提取关键词: {Keywords}, 找到 {Count} 个相似账单,相关度分数: {Scores}",
group.Reason,
string.Join(", ", keywords),
similarClassifiedWithScore.Count,
string.Join(", ", similarClassifiedWithScore.Select(x => $"{x.record.Reason}({x.relevanceScore:F2})")));
}
else
{
logger.LogDebug("摘要 '{Reason}' 提取关键词: {Keywords}, 未找到高相关度的相似账单",
group.Reason,
string.Join(", ", keywords));
}
}
}
// 获取所有分类
var categories = await categoryRepository.GetAllAsync();
// 构建分类信息
var categoryInfo = new StringBuilder();
foreach (var type in new[] { 0, 1, 2 })
{
var typeName = GetTypeName((TransactionType)type);
categoryInfo.AppendLine($"{typeName}: ");
var categoriesOfType = categories.Where(c => (int)c.Type == type).ToList();
foreach (var category in categoriesOfType)
{
categoryInfo.AppendLine($"- {category.Name}");
}
}
// 构建账单分组信息
var billsInfo = new StringBuilder();
foreach (var (group, index) in groupedRecords.Select((g, i) => (g, i)))
{
billsInfo.AppendLine($"{index + 1}. 摘要={group.Reason}, 当前类型={GetTypeName(group.SampleType)}, 当前分类={(string.IsNullOrEmpty(group.SampleType.ToString()) ? "" : group.SampleType.ToString())}, 涉及金额={group.TotalAmount}");
// 如果有相似的已分类账单,添加参考信息
if (referenceRecords.TryGetValue(group.Reason, out var references))
{
billsInfo.AppendLine(" 【参考】相似且已分类的账单:");
foreach (var refer in references.Take(3)) // 最多显示3个参考
{
billsInfo.AppendLine($" - 摘要={refer.Reason}, 分类={refer.Classify}, 类型={GetTypeName(refer.Type)}, 金额={refer.Amount}");
}
}
}
var systemPrompt = $$"""
2026-01-01 12:32:08 +08:00
2025-12-30 18:49:46 +08:00
2026-01-01 12:32:08 +08:00
{{categoryInfo}}
2025-12-30 18:49:46 +08:00
2026-01-01 12:32:08 +08:00
1.
2.
3. "其他"
4.
2025-12-30 18:49:46 +08:00
2026-01-01 12:32:08 +08:00
- 使 NDJSON JSON
- JSON格式严格为{"reason": "交易摘要", "type": 0, "classify": "分类名称"}
-
- "classify" "其他" JSON
2025-12-30 18:49:46 +08:00
2026-01-01 12:32:08 +08:00
JSON对象NDJSON
""";
2025-12-30 18:49:46 +08:00
var userPrompt = $$"""
2026-01-01 12:32:08 +08:00
2025-12-30 18:49:46 +08:00
2026-01-01 12:32:08 +08:00
{{billsInfo}}
2025-12-30 18:49:46 +08:00
2026-01-01 12:32:08 +08:00
""";
2025-12-30 18:49:46 +08:00
// 流式调用AI
chunkAction(("start", $"开始分类,共 {sampleRecords.Length} 条账单"));
var classifyResults = new List<(string Reason, string Classify, TransactionType Type)>();
var sendedIds = new HashSet<long>();
2026-01-01 11:58:21 +08:00
// 将流解析逻辑提取为本地函数以减少嵌套
void HandleResult(GroupClassifyResult? result)
{
if (result is null || string.IsNullOrEmpty(result.Reason)) return;
classifyResults.Add((result.Reason, result.Classify ?? string.Empty, result.Type));
var group = groupedRecords.FirstOrDefault(g => g.Reason == result.Reason);
if (group == null) return;
foreach (var id in group.Ids)
2025-12-30 18:49:46 +08:00
{
2026-01-01 11:58:21 +08:00
if (sendedIds.Add(id))
{
var resultJson = JsonSerializer.Serialize(new { id, result.Classify, result.Type });
chunkAction(("data", resultJson));
}
}
}
2025-12-30 18:49:46 +08:00
2026-01-01 11:58:21 +08:00
// 解析缓冲区中的所有完整 JSON 对象或数组
void FlushBuffer(StringBuilder buffer)
{
var buf = buffer.ToString();
if (string.IsNullOrWhiteSpace(buf)) return;
2025-12-30 18:49:46 +08:00
2026-01-01 11:58:21 +08:00
// 优先尝试解析完整数组
var trimmed = buf.TrimStart();
if (trimmed.Length > 0 && trimmed[0] == '[')
{
var lastArrEnd = buf.LastIndexOf(']');
if (lastArrEnd > -1)
2025-12-30 18:49:46 +08:00
{
2026-01-01 11:58:21 +08:00
var arrJson = buf.Substring(0, lastArrEnd + 1);
try
2025-12-30 18:49:46 +08:00
{
2026-01-01 11:58:21 +08:00
var results = JsonSerializer.Deserialize<GroupClassifyResult[]>(arrJson);
if (results != null)
2025-12-30 18:49:46 +08:00
{
2026-01-01 11:58:21 +08:00
foreach (var r in results) HandleResult(r);
2025-12-30 18:49:46 +08:00
}
2026-01-01 11:58:21 +08:00
buffer.Remove(0, lastArrEnd + 1);
buf = buffer.ToString();
2025-12-30 18:49:46 +08:00
}
2026-01-01 11:58:21 +08:00
catch (Exception exArr)
{
logger.LogDebug(exArr, "按数组解析AI返回失败回退到逐对象解析。预览: {Preview}", arrJson?.Length > 200 ? arrJson.Substring(0, 200) + "..." : arrJson);
}
}
}
// 逐对象解析
var startIdx = 0;
while (startIdx < buf.Length)
{
var openBrace = buf.IndexOf('{', startIdx);
if (openBrace == -1) break;
var closeBrace = FindMatchingBrace(buf, openBrace);
if (closeBrace == -1) break;
var jsonStr = buf.Substring(openBrace, closeBrace - openBrace + 1);
try
{
var result = JsonSerializer.Deserialize<GroupClassifyResult>(jsonStr);
HandleResult(result);
2025-12-30 18:49:46 +08:00
}
catch (Exception ex)
{
2026-01-01 11:58:21 +08:00
logger.LogWarning(ex, "解析AI分类结果失败: {JsonStr}", jsonStr.Length > 200 ? jsonStr.Substring(0, 200) + "..." : jsonStr);
2025-12-30 18:49:46 +08:00
}
startIdx = closeBrace + 1;
}
2026-01-01 11:58:21 +08:00
if (startIdx > 0)
{
buffer.Remove(0, startIdx);
}
2025-12-30 18:49:46 +08:00
}
2026-01-01 11:58:21 +08:00
var buffer = new StringBuilder();
await foreach (var chunk in openAiService.ChatStreamAsync(systemPrompt, userPrompt))
{
buffer.Append(chunk);
FlushBuffer(buffer);
}
// 如果AI流结束但没有任何分类结果发出错误提示
if (classifyResults.Count == 0)
{
logger.LogWarning("AI未返回任何分类结果buffer最终内容: {BufferPreview}", buffer.ToString().Length > 500 ? buffer.ToString().Substring(0, 500) + "..." : buffer.ToString());
chunkAction(("error", "智能分类未返回任何结果,请重试或手动分类"));
}
else
{
chunkAction(("end", "分类完成"));
}
2025-12-30 18:49:46 +08:00
}
catch (Exception ex)
{
logger.LogError(ex, "智能分类失败");
chunkAction(("error", $"智能分类失败: {ex.Message}"));
}
}
2025-12-31 11:10:10 +08:00
public async Task AnalyzeBillAsync(string userInput, Action<string> chunkAction)
{
try
{
// 第一步使用AI生成聚合SQL查询
var now = DateTime.Now;
var sqlPrompt = $"""
2026-01-01 12:32:08 +08:00
{now:yyyy年M月d日}{now:yyyy-MM-dd}
{userInput}
SQLite
TransactionRecord
- Id: bigint
- Card: nvarchar
- Reason: nvarchar /
- Amount: decimal
- OccurredAt: datetime TEXT类型'2025-12-26 10:30:00'
- Type: int 0=, 1=, 2=
- Classify: nvarchar
SQL
1.
2. 使 GROUP BY
3. 使SUM(ABS(Amount)) COUNT(*) AVG()MAX()MIN()
4. 使 OccurredAt "最近X个月/天"
5. Type = 0 Type = 1
6. TotalAmount, TransactionCount, AvgAmount
7. 使 ORDER BY
8. SQL语句
SQLite日期函数
- strftime('%Y', OccurredAt)
- strftime('%m', OccurredAt)
- strftime('%Y-%m-%d', OccurredAt)
- 使 YEAR()MONTH()DAY() SQLite不支持
1
SELECT Classify, COUNT(*) as TransactionCount, SUM(ABS(Amount)) as TotalAmount, AVG(ABS(Amount)) as AvgAmount FROM TransactionRecord WHERE Type = 0 AND OccurredAt >= '2025-10-01' AND OccurredAt < '2026-01-01' AND (Classify LIKE '%%' OR Reason LIKE '%%' OR Reason LIKE '%%' OR Reason LIKE '%%') GROUP BY Classify ORDER BY TotalAmount DESC
2
SELECT strftime('%Y', OccurredAt) as Year, strftime('%m', OccurredAt) as Month, COUNT(*) as TransactionCount, SUM(ABS(Amount)) as TotalAmount FROM TransactionRecord WHERE Type = 0 AND OccurredAt >= '2025-06-01' GROUP BY strftime('%Y', OccurredAt), strftime('%m', OccurredAt) ORDER BY Year, Month
3
SELECT COUNT(*) as TransactionCount, SUM(ABS(Amount)) as TotalAmount, AVG(ABS(Amount)) as AvgAmount, MAX(ABS(Amount)) as MaxAmount FROM TransactionRecord WHERE Type = 0 AND OccurredAt >= '2025-12-01' AND OccurredAt < '2026-01-01'
4 - 使
1000
SELECT OccurredAt, Classify, Reason, ABS(Amount) as Amount FROM TransactionRecord WHERE Type = 0 AND ABS(Amount) > 1000 ORDER BY Amount DESC LIMIT 50
SQL语句
""";
2025-12-31 11:10:10 +08:00
var sqlText = await openAiService.ChatAsync(sqlPrompt);
// 清理SQL文本
sqlText = sqlText?.Trim() ?? "";
sqlText = sqlText.TrimStart('`').TrimEnd('`');
if (sqlText.StartsWith("sql", StringComparison.OrdinalIgnoreCase))
{
sqlText = sqlText.Substring(3).Trim();
}
logger.LogInformation("AI生成的SQL: {Sql}", sqlText);
// 第二步执行动态SQL查询
List<dynamic> queryResults;
try
{
queryResults = await transactionRepository.ExecuteDynamicSqlAsync(sqlText);
}
catch (Exception ex)
{
logger.LogError(ex, "执行AI生成的SQL失败: {Sql}", sqlText);
// 如果SQL执行失败返回错误
var errorData = JsonSerializer.Serialize(new { content = "<div class='error-message'>SQL执行失败请重新描述您的问题</div>" });
chunkAction(errorData);
return;
}
// 第三步将查询结果序列化为JSON直接传递给AI生成分析报告
var dataJson = JsonSerializer.Serialize(queryResults, new JsonSerializerOptions
{
WriteIndented = true,
Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping
});
var dataPrompt = $"""
2026-01-01 12:32:08 +08:00
{DateTime.Now:yyyy年M月d日}
{userInput}
JSON格式
{dataJson}
1. 使HTML格式H5页面风格
2.
3. 使table > thead/tbody > tr > th/td
4. 使HTML标签h2h3ptableul/listrong
5. <span class='expense-value'></span>
6. <span class='income-value'></span>
7. <span class='highlight'></span>
8. htmlbodyhead
9. 使 style <style>
10. backgroundbackground-colorcolor
11. 使 div
12. JSON数据转换为易读的表格和文字说明
13.
14.
15.
HTML内容markdown代码块标记
""";
2025-12-31 11:10:10 +08:00
// 第四步流式输出AI分析结果
await foreach (var chunk in openAiService.ChatStreamAsync(dataPrompt))
{
var sseData = JsonSerializer.Serialize(new { content = chunk });
chunkAction(sseData);
}
// 发送完成标记
chunkAction("[DONE]");
}
catch (Exception ex)
{
logger.LogError(ex, "智能分析账单失败");
var errorData = JsonSerializer.Serialize(new { content = $"<div class='error-message'>分析失败:{ex.Message}</div>" });
chunkAction(errorData);
}
}
2025-12-30 18:49:46 +08:00
/// <summary>
/// 查找匹配的右括号
/// </summary>
private static int FindMatchingBrace(string str, int startPos)
{
int braceCount = 0;
for (int i = startPos; i < str.Length; i++)
{
if (str[i] == '{') braceCount++;
else if (str[i] == '}')
{
braceCount--;
if (braceCount == 0) return i;
}
}
return -1;
}
private static string GetTypeName(TransactionType type)
{
return type switch
{
TransactionType.Expense => "支出",
TransactionType.Income => "收入",
TransactionType.None => "不计入收支",
_ => "未知"
};
}
public async Task<TransactionParseResult?> ParseOneLineBillAsync(string text)
{
// 获取所有分类
var categories = await categoryRepository.GetAllAsync();
var categoryList = string.Join("、", categories.Select(c => $"{GetTypeName(c.Type)}-{c.Name}"));
var sysPrompt = $"""
JSON
- OccurredAt: yyyy-MM-dd HH:mm:ss{DateTime.Now:yyyy-MM-dd HH:mm:ss}
- Amount:
- Reason: /
- Type: 0=1=2=
- Classify: {categoryList}""""
JSON markdown
""";
var json = await openAiService.ChatAsync(sysPrompt, text);
if (string.IsNullOrWhiteSpace(json)) return null;
try
{
// 清理可能的 markdown 标记
json = json.Replace("```json", "").Replace("```", "").Trim();
var options = new JsonSerializerOptions { PropertyNameCaseInsensitive = true };
return JsonSerializer.Deserialize<TransactionParseResult>(json, options);
}
catch (Exception ex)
{
logger.LogError(ex, "解析账单失败");
return null;
}
}
2025-12-30 18:49:46 +08:00
}
/// <summary>
/// 分组分类结果DTO用于AI返回结果解析
/// </summary>
public record GroupClassifyResult
{
[JsonPropertyName("reason")]
public string Reason { get; set; } = string.Empty;
[JsonPropertyName("classify")]
public string? Classify { get; set; }
[JsonPropertyName("type")]
public TransactionType Type { get; set; }
}
public record TransactionParseResult(string OccurredAt, string Classify, decimal Amount, string Reason, TransactionType Type);