All checks were successful
Docker Build & Deploy / Build Docker Image (push) Successful in 27s
Docker Build & Deploy / Deploy to Production (push) Successful in 9s
Docker Build & Deploy / Cleanup Dangling Images (push) Successful in 2s
Docker Build & Deploy / WeChat Notification (push) Successful in 2s
558 lines
23 KiB
C#
558 lines
23 KiB
C#
using Service.Transaction;
|
||
|
||
namespace Service.AI;
|
||
|
||
public interface ISmartHandleService
|
||
{
|
||
Task SmartClassifyAsync(long[] transactionIds, Action<(string type, string data)> chunkAction);
|
||
|
||
Task AnalyzeBillAsync(string userInput, Action<string> chunkAction);
|
||
|
||
Task<TransactionParseResult?> ParseOneLineBillAsync(string text);
|
||
}
|
||
|
||
public class SmartHandleService(
|
||
ITransactionRecordRepository transactionRepository,
|
||
ITransactionStatisticsService transactionStatisticsService,
|
||
ITextSegmentService textSegmentService,
|
||
ILogger<SmartHandleService> logger,
|
||
ITransactionCategoryRepository categoryRepository,
|
||
IOpenAiService openAiService,
|
||
IConfigService configService
|
||
) : ISmartHandleService
|
||
{
|
||
public async Task SmartClassifyAsync(long[] transactionIds, Action<(string, string)> chunkAction)
|
||
{
|
||
try
|
||
{
|
||
// 获取指定ID的账单(作为样本)
|
||
var sampleRecords = await transactionRepository.GetByIdsAsync(transactionIds);
|
||
|
||
sampleRecords = sampleRecords
|
||
.Where(x => string.IsNullOrEmpty(x.Classify))
|
||
.ToArray();
|
||
|
||
if (sampleRecords.Length == 0)
|
||
{
|
||
// await WriteEventAsync("error", "找不到指定的账单");
|
||
chunkAction(("error", "找不到指定的账单"));
|
||
return;
|
||
}
|
||
|
||
// 重新按Reason分组所有待分类账单
|
||
var groupedRecords = sampleRecords
|
||
.GroupBy(r => r.Reason)
|
||
.Select(g => new
|
||
{
|
||
Reason = g.Key,
|
||
Ids = g.Select(r => r.Id).ToList(),
|
||
Count = g.Count(),
|
||
TotalAmount = g.Sum(r => r.Amount),
|
||
SampleType = g.First().Type
|
||
})
|
||
.OrderByDescending(g => Math.Abs(g.TotalAmount))
|
||
.ToList();
|
||
|
||
// 【增强功能】对每个分组的摘要进行分词,查询已分类的相似账单
|
||
var referenceRecords = new Dictionary<string, List<TransactionRecord>>();
|
||
foreach (var group in groupedRecords)
|
||
{
|
||
// 使用专业分词库提取关键词
|
||
var keywords = textSegmentService.ExtractKeywords(group.Reason);
|
||
|
||
if (keywords.Count > 0)
|
||
{
|
||
// 查询包含这些关键词且已分类的账单(带相关度评分)
|
||
// minMatchRate=0.4 表示至少匹配40%的关键词才被认为是相似的
|
||
var similarClassifiedWithScore = await transactionStatisticsService.GetClassifiedByKeywordsWithScoreAsync(keywords, minMatchRate: 0.4, limit: 10);
|
||
|
||
if (similarClassifiedWithScore.Count > 0)
|
||
{
|
||
// 只取前5个最相关的
|
||
var topSimilar = similarClassifiedWithScore.Take(5).Select(x => x.record).ToList();
|
||
referenceRecords[group.Reason] = topSimilar;
|
||
|
||
// 记录调试信息
|
||
logger.LogDebug("摘要 '{Reason}' 提取关键词: {Keywords}, 找到 {Count} 个相似账单,相关度分数: {Scores}",
|
||
group.Reason,
|
||
string.Join(", ", keywords),
|
||
similarClassifiedWithScore.Count,
|
||
string.Join(", ", similarClassifiedWithScore.Select(x => $"{x.record.Reason}({x.relevanceScore:F2})")));
|
||
}
|
||
else
|
||
{
|
||
logger.LogDebug("摘要 '{Reason}' 提取关键词: {Keywords}, 未找到高相关度的相似账单",
|
||
group.Reason,
|
||
string.Join(", ", keywords));
|
||
}
|
||
}
|
||
}
|
||
|
||
// 构建分类信息
|
||
var categoryInfo = await GetCategoryInfoAsync();
|
||
|
||
// 构建账单分组信息
|
||
var billsInfo = new StringBuilder();
|
||
foreach (var (group, index) in groupedRecords.Select((g, i) => (g, i)))
|
||
{
|
||
billsInfo.AppendLine($"{index + 1}. 摘要={group.Reason}, 当前类型={GetTypeName(group.SampleType)}, 当前分类={(string.IsNullOrEmpty(group.SampleType.ToString()) ? "未分类" : group.SampleType.ToString())}, 涉及金额={group.TotalAmount}");
|
||
|
||
// 如果有相似的已分类账单,添加参考信息
|
||
if (referenceRecords.TryGetValue(group.Reason, out var references))
|
||
{
|
||
billsInfo.AppendLine(" 【参考】相似且已分类的账单:");
|
||
foreach (var refer in references.Take(3)) // 最多显示3个参考
|
||
{
|
||
billsInfo.AppendLine($" - 摘要={refer.Reason}, 分类={refer.Classify}, 类型={GetTypeName(refer.Type)}, 金额={refer.Amount}");
|
||
}
|
||
}
|
||
}
|
||
|
||
var systemPrompt = $$"""
|
||
你是一个专业的账单分类助手。请根据提供的账单分组信息和分类列表,为每个分组选择最合适的分类。
|
||
|
||
可用的分类列表:
|
||
{{categoryInfo}}
|
||
|
||
分类规则:
|
||
1. 根据账单的摘要和涉及金额,选择最匹配的分类
|
||
2. 如果提供了【参考】信息,优先参考相似账单的分类,这些是历史上已分类的相似账单
|
||
3. 如果无法确定分类,可以选择"其他"
|
||
4. 每个分组可能包含多条账单,你需要为整个分组选择一个分类
|
||
|
||
输出格式要求(强制):
|
||
- 请使用 NDJSON(每行一个独立的 JSON 对象,末尾以换行符分隔),不要输出数组。
|
||
- 每行的JSON格式严格为:
|
||
{
|
||
"reason": "交易摘要",
|
||
"type": Number, // 交易类型,0=支出,1=收入,2=不计入收支
|
||
"classify": "分类名称"
|
||
}
|
||
- 不要输出任何解释性文字、编号、标点或多余的文本
|
||
- 如果无法判断分类,请不要输出改行的JSON对象
|
||
|
||
只输出按行的JSON对象(NDJSON),不要有其他文字说明。
|
||
""";
|
||
|
||
var userPrompt = $$"""
|
||
请为以下账单分组进行分类:
|
||
|
||
{{billsInfo}}
|
||
|
||
请逐个输出分类结果。
|
||
""";
|
||
|
||
// 流式调用AI
|
||
chunkAction(("start", $"开始分类,共 {sampleRecords.Length} 条账单"));
|
||
|
||
var classifyResults = new List<(string Reason, string Classify, TransactionType Type)>();
|
||
var sentIds = new HashSet<long>();
|
||
|
||
// 将流解析逻辑提取为本地函数以减少嵌套
|
||
void HandleResult(GroupClassifyResult? result)
|
||
{
|
||
if (result is null || string.IsNullOrEmpty(result.Reason)) return;
|
||
classifyResults.Add((result.Reason, result.Classify ?? string.Empty, result.Type));
|
||
var group = groupedRecords.FirstOrDefault(g => g.Reason == result.Reason);
|
||
if (group == null) return;
|
||
foreach (var id in group.Ids)
|
||
{
|
||
if (!sentIds.Add(id))
|
||
{
|
||
continue;
|
||
}
|
||
|
||
var resultJson = JsonSerializer.Serialize(new
|
||
{
|
||
id,
|
||
result.Classify,
|
||
result.Type
|
||
});
|
||
chunkAction(("data", resultJson));
|
||
}
|
||
}
|
||
|
||
// 解析缓冲区中的所有完整 JSON 对象或数组
|
||
void FlushBuffer(StringBuilder buffer)
|
||
{
|
||
var buf = buffer.ToString();
|
||
if (string.IsNullOrWhiteSpace(buf)) return;
|
||
|
||
// 优先尝试解析完整数组
|
||
var trimmed = buf.TrimStart();
|
||
if (trimmed.Length > 0 && trimmed[0] == '[')
|
||
{
|
||
var lastArrEnd = buf.LastIndexOf(']');
|
||
if (lastArrEnd > -1)
|
||
{
|
||
var arrJson = buf.Substring(0, lastArrEnd + 1);
|
||
try
|
||
{
|
||
var results = JsonSerializer.Deserialize<GroupClassifyResult[]>(arrJson);
|
||
if (results != null)
|
||
{
|
||
foreach (var r in results) HandleResult(r);
|
||
}
|
||
buffer.Remove(0, lastArrEnd + 1);
|
||
buf = buffer.ToString();
|
||
}
|
||
catch (Exception exArr)
|
||
{
|
||
logger.LogDebug(exArr, "按数组解析AI返回失败,回退到逐对象解析。预览: {Preview}", arrJson.Length > 200 ? arrJson.Substring(0, 200) + "..." : arrJson);
|
||
}
|
||
}
|
||
}
|
||
|
||
// 逐对象解析
|
||
var startIdx = 0;
|
||
while (startIdx < buf.Length)
|
||
{
|
||
var openBrace = buf.IndexOf('{', startIdx);
|
||
if (openBrace == -1) break;
|
||
var closeBrace = FindMatchingBrace(buf, openBrace);
|
||
if (closeBrace == -1) break;
|
||
var jsonStr = buf.Substring(openBrace, closeBrace - openBrace + 1);
|
||
try
|
||
{
|
||
var result = JsonSerializer.Deserialize<GroupClassifyResult>(jsonStr);
|
||
HandleResult(result);
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
logger.LogWarning(ex, "解析AI分类结果失败: {JsonStr}", jsonStr.Length > 200 ? jsonStr.Substring(0, 200) + "..." : jsonStr);
|
||
}
|
||
startIdx = closeBrace + 1;
|
||
}
|
||
|
||
if (startIdx > 0)
|
||
{
|
||
buffer.Remove(0, startIdx);
|
||
}
|
||
}
|
||
|
||
var buffer = new StringBuilder();
|
||
await foreach (var chunk in openAiService.ChatStreamAsync(systemPrompt, userPrompt))
|
||
{
|
||
buffer.Append(chunk);
|
||
FlushBuffer(buffer);
|
||
}
|
||
|
||
// 如果AI流结束但没有任何分类结果,发出错误提示
|
||
if (classifyResults.Count == 0)
|
||
{
|
||
logger.LogWarning("AI未返回任何分类结果,buffer最终内容: {BufferPreview}", buffer.ToString().Length > 500 ? buffer.ToString().Substring(0, 500) + "..." : buffer.ToString());
|
||
chunkAction(("error", "智能分类未返回任何结果,请重试或手动分类"));
|
||
}
|
||
else
|
||
{
|
||
chunkAction(("end", "分类完成"));
|
||
}
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
logger.LogError(ex, "智能分类失败");
|
||
chunkAction(("error", $"智能分类失败: {ex.Message}"));
|
||
}
|
||
}
|
||
|
||
public async Task AnalyzeBillAsync(string userInput, Action<string> chunkAction)
|
||
{
|
||
try
|
||
{
|
||
// 构建分类信息
|
||
var categoryInfo = await GetCategoryInfoAsync();
|
||
|
||
// 第一步:使用AI生成聚合SQL查询
|
||
var now = DateTime.Now;
|
||
var sqlPrompt = $$"""
|
||
当前日期:{{now:yyyy年M月d日}}({{now:yyyy-MM-dd}})
|
||
用户问题:{{userInput}}
|
||
|
||
数据库类型:SQLite
|
||
数据库表名:TransactionRecord
|
||
字段说明:
|
||
- Id: bigint 主键
|
||
- Card: nvarchar 卡号
|
||
- Reason: nvarchar 交易原因/摘要
|
||
- Amount: decimal 交易金额(支出为负数,收入为正数)
|
||
- OccurredAt: datetime 交易发生时间(TEXT类型,格式:'2025-12-26 10:30:00')
|
||
- Type: int 交易类型(0=支出, 1=收入, 2=不计入收支)
|
||
- Classify: nvarchar 交易分类(如:交通、餐饮、购物等)
|
||
|
||
【核心原则】直接生成用户所需的聚合统计SQL,而不是查询原始记录后再统计
|
||
|
||
要求:
|
||
1. 根据用户问题判断需要什么维度的聚合数据
|
||
2. 使用 GROUP BY 按分类、时间等维度分组
|
||
3. 使用聚合函数:SUM(ABS(Amount)) 计算金额总和、COUNT(*) 计数、AVG()平均、MAX()最大、MIN()最小
|
||
4. 时间范围使用 OccurredAt 字段,"最近X个月/天"基于当前日期计算
|
||
5. 支出用 Type = 0,收入用 Type = 1
|
||
6. 给聚合字段起有意义的别名(如 TotalAmount, TransactionCount, AvgAmount)
|
||
7. 使用 ORDER BY 对结果排序(通常按金额降序)
|
||
8. 只返回SQL语句,不要解释
|
||
|
||
【重要】SQLite日期函数:
|
||
- 提取年份:strftime('%Y', OccurredAt)
|
||
- 提取月份:strftime('%m', OccurredAt)
|
||
- 提取日期:strftime('%Y-%m-%d', OccurredAt)
|
||
- 不要使用 YEAR()、MONTH()、DAY() 函数,SQLite不支持
|
||
|
||
【重要】最终的SQL会被一下DOTNET代码执行, 请确保你生成的代码可执行,不报错
|
||
```C#
|
||
public async Task<List<dynamic>> ExecuteDynamicSqlAsync(string completeSql)
|
||
{
|
||
var dt = await FreeSql.Ado.ExecuteDataTableAsync(completeSql);
|
||
var result = new List<dynamic>();
|
||
|
||
foreach (System.Data.DataRow row in dt.Rows)
|
||
{
|
||
var expando = new System.Dynamic.ExpandoObject() as IDictionary<string, object>;
|
||
foreach (System.Data.DataColumn column in dt.Columns)
|
||
{
|
||
expando[column.ColumnName] = row[column];
|
||
}
|
||
result.Add(expando);
|
||
}
|
||
|
||
return result;
|
||
}
|
||
```
|
||
|
||
【重要】必须从以下分类列表中选择分类:
|
||
{{categoryInfo}}
|
||
|
||
只返回SQL语句。
|
||
""";
|
||
|
||
var sqlText = await openAiService.ChatAsync(sqlPrompt);
|
||
|
||
// 清理SQL文本
|
||
sqlText = sqlText?.Trim() ?? "";
|
||
sqlText = sqlText.TrimStart('`').TrimEnd('`');
|
||
if (sqlText.StartsWith("sql", StringComparison.OrdinalIgnoreCase))
|
||
{
|
||
sqlText = sqlText.Substring(3).Trim();
|
||
}
|
||
|
||
logger.LogInformation("AI生成的SQL: {Sql}", sqlText);
|
||
|
||
chunkAction(
|
||
JsonSerializer.Serialize(new
|
||
{
|
||
content = $"""
|
||
<pre style="max-height: 80px; font-size: 8px; overflow-y: auto; padding: 8px; border: 1px solid #3c3c3c">
|
||
{WebUtility.HtmlEncode(sqlText)}
|
||
</pre>
|
||
"""
|
||
})
|
||
);
|
||
|
||
// 第二步:执行动态SQL查询
|
||
List<dynamic> queryResults;
|
||
try
|
||
{
|
||
queryResults = await transactionRepository.ExecuteDynamicSqlAsync(sqlText);
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
logger.LogError(ex, "执行AI生成的SQL失败: {Sql}", sqlText);
|
||
// 如果SQL执行失败,返回错误
|
||
var errorData = JsonSerializer.Serialize(new { content = "<div class='error-message'>SQL执行失败,请重新描述您的问题</div>" });
|
||
chunkAction(errorData);
|
||
return;
|
||
}
|
||
|
||
// 第三步:将查询结果序列化为JSON,直接传递给AI生成分析报告
|
||
var dataJson = JsonSerializer.Serialize(queryResults, new JsonSerializerOptions
|
||
{
|
||
WriteIndented = true,
|
||
Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping
|
||
});
|
||
|
||
var userPromptExtra = await configService.GetConfigByKeyAsync<string>("BillAnalysisPrompt");
|
||
|
||
var dataPrompt = $"""
|
||
当前日期:{DateTime.Now:yyyy年M月d日}
|
||
用户问题:{userInput}
|
||
|
||
【用户要求(重要)】
|
||
{userInput}
|
||
|
||
查询结果数据(JSON格式):
|
||
{dataJson}
|
||
|
||
说明:以上数据是根据用户问题查询出的聚合统计结果,请基于这些数据生成分析报告。
|
||
|
||
请生成一份专业的数据分析报告,严格遵守以下要求:
|
||
|
||
【格式要求】
|
||
1. 使用HTML格式(移动端H5页面风格)
|
||
2. 生成清晰的报告标题(基于用户问题)
|
||
3. 使用表格展示统计数据(table > thead/tbody > tr > th/td)
|
||
4. 使用合适的HTML标签:h2(标题)、h3(小节)、p(段落)、table(表格)、ul/li(列表)、strong(强调)
|
||
5. 支出金额用 <span class='expense-value'>金额</span> 包裹
|
||
6. 收入金额用 <span class='income-value'>金额</span> 包裹
|
||
7. 重要结论用 <span class='highlight'>内容</span> 高亮
|
||
|
||
【样式限制(重要)】
|
||
8. 不要包含 html、body、head 标签
|
||
9. 不要使用任何 style 属性或 <style> 标签
|
||
10. 不要设置 background、background-color、color 等样式属性
|
||
11. 不要使用 div 包裹大段内容
|
||
|
||
【内容要求】
|
||
12. 准确解读数据:将JSON数据转换为易读的表格和文字说明
|
||
13. 提供洞察分析:根据数据给出有价值的发现和趋势分析
|
||
14. 给出实用建议:基于数据提供合理的财务建议
|
||
15. 语言专业、清晰、简洁
|
||
|
||
【用户补充(重要)】
|
||
{userPromptExtra}
|
||
|
||
直接输出纯净的HTML内容,不要markdown代码块标记。
|
||
""";
|
||
|
||
// 第四步:流式输出AI分析结果
|
||
await foreach (var chunk in openAiService.ChatStreamAsync(dataPrompt))
|
||
{
|
||
var sseData = JsonSerializer.Serialize(new { content = chunk });
|
||
chunkAction(sseData);
|
||
}
|
||
|
||
// 发送完成标记
|
||
chunkAction("[DONE]");
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
logger.LogError(ex, "智能分析账单失败");
|
||
var errorData = JsonSerializer.Serialize(new { content = $"<div class='error-message'>分析失败:{ex.Message}</div>" });
|
||
chunkAction(errorData);
|
||
}
|
||
}
|
||
|
||
public async Task<TransactionParseResult?> ParseOneLineBillAsync(string text)
|
||
{
|
||
// 获取所有分类
|
||
var categories = await categoryRepository.GetAllAsync();
|
||
|
||
// 构建分类信息
|
||
var categoryInfo = new StringBuilder();
|
||
foreach (var type in new[] { 0, 1, 2 })
|
||
{
|
||
var typeName = GetTypeName((TransactionType)type);
|
||
categoryInfo.AppendLine($"{typeName}: ");
|
||
var categoriesOfType = categories.Where(c => (int)c.Type == type).ToList();
|
||
foreach (var category in categoriesOfType)
|
||
{
|
||
categoryInfo.AppendLine($"- {category.Name}");
|
||
}
|
||
}
|
||
|
||
var sysPrompt = $$"""
|
||
你是一个智能账单解析助手。请从用户提供的文本中提取交易信息,包括日期、金额、摘要、类型和分类。
|
||
|
||
可用的分类列表:
|
||
{{categoryInfo}}
|
||
|
||
请返回 JSON 格式,包含以下字段:
|
||
- OccurredAt: 日期时间,格式 yyyy-MM-dd HH:mm:ss。当前系统时间为{{DateTime.Now:yyyy-MM-dd HH:mm:ss}}。
|
||
- Amount: 金额,数字。
|
||
- Reason: 备注/摘要,原文或其他补充信息。
|
||
- Type: 交易类型,0=支出,1=收入,2=不计入收支。根据语义判断。
|
||
- Classify: 分类,请从以下现有分类中选择最匹配的一个:如果无法匹配,请留空。
|
||
|
||
返回示例
|
||
{
|
||
"OccurredAt": "2024-06-15 14:30:00",
|
||
"Amount": 150.75,
|
||
"Reason": "午餐消费",
|
||
"Type": 0,
|
||
"Classify": "餐饮"
|
||
}
|
||
|
||
只返回 JSON,不要包含 markdown 标记。
|
||
""";
|
||
var json = await openAiService.ChatAsync(sysPrompt, text);
|
||
if (string.IsNullOrWhiteSpace(json)) return null;
|
||
|
||
try
|
||
{
|
||
// 清理可能的 markdown 标记
|
||
json = json.Replace("```json", "").Replace("```", "").Trim();
|
||
var options = new JsonSerializerOptions { PropertyNameCaseInsensitive = true };
|
||
return JsonSerializer.Deserialize<TransactionParseResult>(json, options);
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
logger.LogError(ex, "解析账单失败");
|
||
return null;
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// 查找匹配的右括号
|
||
/// </summary>
|
||
private static int FindMatchingBrace(string str, int startPos)
|
||
{
|
||
var braceCount = 0;
|
||
for (var i = startPos; i < str.Length; i++)
|
||
{
|
||
if (str[i] == '{') braceCount++;
|
||
else if (str[i] == '}')
|
||
{
|
||
braceCount--;
|
||
if (braceCount == 0) return i;
|
||
}
|
||
}
|
||
return -1;
|
||
}
|
||
|
||
private static string GetTypeName(TransactionType type)
|
||
{
|
||
return type switch
|
||
{
|
||
TransactionType.Expense => "支出",
|
||
TransactionType.Income => "收入",
|
||
TransactionType.None => "不计入收支",
|
||
_ => "未知"
|
||
};
|
||
}
|
||
|
||
private async Task<string> GetCategoryInfoAsync()
|
||
{
|
||
// 获取所有分类
|
||
var categories = await categoryRepository.GetAllAsync();
|
||
|
||
// 构建分类信息
|
||
var categoryInfo = new StringBuilder();
|
||
foreach (var type in new[] { 0, 1, 2 })
|
||
{
|
||
var typeName = GetTypeName((TransactionType)type);
|
||
categoryInfo.AppendLine($"{typeName}: ");
|
||
var categoriesOfType = categories.Where(c => (int)c.Type == type).ToList();
|
||
foreach (var category in categoriesOfType)
|
||
{
|
||
categoryInfo.AppendLine($"- {category.Name}");
|
||
}
|
||
}
|
||
|
||
return categoryInfo.ToString();
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// 分组分类结果DTO(用于AI返回结果解析)
|
||
/// </summary>
|
||
public record GroupClassifyResult
|
||
{
|
||
[JsonPropertyName("reason")]
|
||
public string Reason { get; init; } = string.Empty;
|
||
|
||
[JsonPropertyName("classify")]
|
||
public string? Classify { get; init; }
|
||
|
||
[JsonPropertyName("type")]
|
||
public TransactionType Type { get; init; }
|
||
}
|
||
|
||
public record TransactionParseResult(string OccurredAt, string Classify, decimal Amount, string Reason, TransactionType Type); |