first commot
All checks were successful
Docker Build & Deploy / Build Docker Image (push) Successful in 8s
Docker Build & Deploy / Deploy to Production (push) Successful in 7s

This commit is contained in:
孙诚
2025-12-25 11:20:56 +08:00
commit 4526cc6396
104 changed files with 11070 additions and 0 deletions

View File

@@ -0,0 +1,71 @@
namespace Service.EmailParseServices;
public class EmailParseForm95555(
ILogger<EmailParseForm95555> logger,
IOpenAiService openAiService
) : EmailParseServicesBase(logger, openAiService)
{
public override bool CanParse(string from, string body)
{
if (!from.Contains("95555@message.cmbchina.com"))
{
return false;
}
// 不能包含HTML标签
if (Regex.IsMatch(body, "<.*?>"))
{
return false;
}
return true;
}
public override async Task<(
string card,
string reason,
decimal amount,
decimal balance,
TransactionType type,
DateTime? occurredAt
)[]> ParseEmailContentAsync(string emailContent)
{
var pattern = "您账户(?<card>\\d+)于.*?(?<type>收入|支出|消费|转入|转出)?.*?在?(?<reason>.+?)(?<amount>\\d+\\.\\d{1,2})元,余额(?<balance>\\d+\\.\\d{1,2})";
var matches = Regex.Matches(emailContent, pattern);
if (matches.Count <= 0)
{
logger.LogWarning("未能从招商银行邮件内容中解析出交易信息");
return [];
}
var results = new List<(
string card,
string reason,
decimal amount,
decimal balance,
TransactionType type,
DateTime? occurredAt
)>();
foreach (Match match in matches)
{
var card = match.Groups["card"].Value;
var reason = match.Groups["reason"].Value;
var amountStr = match.Groups["amount"].Value;
var balanceStr = match.Groups["balance"].Value;
var typeStr = match.Groups["type"].Value;
if (!string.IsNullOrEmpty(card) &&
!string.IsNullOrEmpty(reason) &&
decimal.TryParse(amountStr, out var amount) &&
decimal.TryParse(balanceStr, out var balance))
{
var type = DetermineTransactionType(typeStr, reason, amount);
results.Add((card, reason, amount, balance, type, null));
}
}
return results.ToArray();
}
}

View File

@@ -0,0 +1,153 @@
using HtmlAgilityPack;
namespace Service.EmailParseServices;
public class EmailParseFormCCSVC(
ILogger<EmailParseFormCCSVC> logger,
IOpenAiService openAiService
) : EmailParseServicesBase(logger, openAiService)
{
public override bool CanParse(string from, string body)
{
if (!from.Contains("ccsvc@message.cmbchina.com"))
{
return false;
}
// 必须包含HTML标签
if (!Regex.IsMatch(body, "<.*?>"))
{
return false;
}
return true;
}
public override async Task<(
string card,
string reason,
decimal amount,
decimal balance,
TransactionType type,
DateTime? occurredAt
)[]> ParseEmailContentAsync(string emailContent)
{
var doc = new HtmlDocument();
doc.LoadHtml(emailContent);
var result = new List<(string, string, decimal, decimal, TransactionType, DateTime?)>();
// 1. Get Date
var dateNode = doc.DocumentNode.SelectSingleNode("//font[contains(text(), '您的消费明细如下')]");
if (dateNode == null)
{
logger.LogWarning("Date node not found");
return Array.Empty<(string, string, decimal, decimal, TransactionType, DateTime?)>();
}
var dateText = dateNode.InnerText.Trim();
// "2025/12/21&nbsp;您的消费明细如下:"
var dateMatch = Regex.Match(dateText, @"\d{4}/\d{1,2}/\d{1,2}");
if (!dateMatch.Success || !DateTime.TryParse(dateMatch.Value, out var date))
{
logger.LogWarning("Failed to parse date from: {DateText}", dateText);
return Array.Empty<(string, string, decimal, decimal, TransactionType, DateTime?)>();
}
// 2. Get Balance (Available Limit)
decimal balance = 0;
// Find "可用额度" label
var limitLabelNode = doc.DocumentNode.SelectSingleNode("//font[contains(text(), '可用额度')]");
if (limitLabelNode != null)
{
// Go up to TR
var tr = limitLabelNode.Ancestors("tr").FirstOrDefault();
if (tr != null)
{
var prevTr = tr.PreviousSibling;
while (prevTr != null && prevTr.Name != "tr") prevTr = prevTr.PreviousSibling;
if (prevTr != null)
{
var balanceNode = prevTr.SelectSingleNode(".//font[contains(text(), '¥')]");
if (balanceNode != null)
{
var balanceStr = balanceNode.InnerText.Replace("¥", "").Replace(",", "").Trim();
decimal.TryParse(balanceStr, out balance);
}
}
}
}
// 3. Get Transactions
var transactionNodes = doc.DocumentNode.SelectNodes("//span[@id='fixBand4']");
if (transactionNodes != null)
{
foreach (var node in transactionNodes)
{
try
{
// Time
var timeNode = node.SelectSingleNode(".//span[@id='fixBand5']//font");
var timeText = timeNode?.InnerText.Trim(); // "10:13:43"
DateTime? occurredAt = date;
if (!string.IsNullOrEmpty(timeText) && DateTime.TryParse($"{date:yyyy-MM-dd} {timeText}", out var dt))
{
occurredAt = dt;
}
// Info Block
var infoNode = node.SelectSingleNode(".//span[@id='fixBand12']");
if (infoNode == null) continue;
// Amount
var amountNode = infoNode.SelectSingleNode(".//font[contains(text(), 'CNY')]");
var amountText = amountNode?.InnerText.Replace("CNY", "").Replace("&nbsp;", "").Trim();
if (!decimal.TryParse(amountText, out var amount))
{
continue;
}
// Description
var descNode = infoNode.SelectSingleNode(".//tr[2]//font");
var descText = descNode?.InnerText ?? "";
// Replace &nbsp; and non-breaking space (\u00A0) with normal space
descText = descText.Replace("&nbsp;", " ");
descText = HtmlEntity.DeEntitize(descText).Replace((char)160, ' ').Trim();
// Parse Description: "尾号4390 消费 财付通-luckincoffee瑞幸咖啡"
var parts = descText.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
string card = "";
string reason = descText;
TransactionType type = TransactionType.Expense;
if (parts.Length > 0 && parts[0].StartsWith("尾号"))
{
card = parts[0].Replace("尾号", "");
}
if (parts.Length > 1)
{
var typeStr = parts[1];
type = DetermineTransactionType(typeStr, reason, amount);
}
if (parts.Length > 2)
{
reason = string.Join(" ", parts.Skip(2));
}
result.Add((card, reason, amount, balance, type, occurredAt));
}
catch (Exception ex)
{
logger.LogError(ex, "Error parsing transaction node");
}
}
}
return await Task.FromResult(result.ToArray());
}
}

View File

@@ -0,0 +1,211 @@
namespace Service.EmailParseServices;
public interface IEmailParseServices
{
bool CanParse(string from, string body);
/// <summary>
/// 解析邮件内容,提取交易信息
/// </summary>
Task<(
string card,
string reason,
decimal amount,
decimal balance,
TransactionType type,
DateTime? occurredAt
)[]> ParseAsync(string emailContent);
}
public abstract class EmailParseServicesBase(
ILogger<EmailParseServicesBase> logger,
IOpenAiService openAiService
) : IEmailParseServices
{
public abstract bool CanParse(string from, string body);
public async Task<(
string card,
string reason,
decimal amount,
decimal balance,
TransactionType type,
DateTime? occurredAt
)[]> ParseAsync(string emailContent)
{
var result = await ParseEmailContentAsync(emailContent);
if (result.Length > 0)
{
logger.LogInformation("使用规则成功解析邮件内容,提取到 {Count} 条交易记录", result.Length);
return result;
}
logger.LogInformation("规则解析邮件内容失败尝试使用AI进行解析");
// AI兜底
result = await ParseByAiAsync(emailContent) ?? [];
if(result.Length == 0)
{
logger.LogWarning("AI解析邮件内容也未能提取到任何交易记录");
}
return result;
}
public abstract Task<(
string card,
string reason,
decimal amount,
decimal balance,
TransactionType type,
DateTime? occurredAt
)[]> ParseEmailContentAsync(string emailContent);
private async Task<(
string card,
string reason,
decimal amount,
decimal balance,
TransactionType type,
DateTime? occurredAt
)[]?> ParseByAiAsync(string body)
{
var systemPrompt = "你是一个信息抽取助手。仅输出严格的JSON数组不要包含任何多余文本。每个交易记录包含字段: card(字符串), reason(字符串), amount(数字), balance(数字), type(字符串,值为'收入'或'支出'), occurredAt(字符串yyyy-MM-dd HH:mm:ss格式日期时间)。如果缺失,请推断或置空。";
var userPrompt = $"从下面这封银行账单相关邮件正文中提取所有交易记录返回JSON数组格式每个元素包含: card, reason, amount, balance, type(收入或支出), occurredAt(非必要)。正文如下:\n\n{body}";
var contentText = await openAiService.ChatAsync(systemPrompt, userPrompt);
if (string.IsNullOrWhiteSpace(contentText))
{
logger.LogWarning("AI未返回任何内容无法解析邮件");
return null;
}
logger.LogDebug("AI返回的内容: {Content}", contentText);
// 清理可能的 markdown 代码块标记
contentText = contentText.Trim();
if (contentText.StartsWith("```"))
{
// 移除开头的 ```json 或 ```
var firstNewLine = contentText.IndexOf('\n');
if (firstNewLine > 0)
{
contentText = contentText.Substring(firstNewLine + 1);
}
// 移除结尾的 ```
if (contentText.EndsWith("```"))
{
contentText = contentText.Substring(0, contentText.Length - 3);
}
contentText = contentText.Trim();
}
// contentText 期望是 JSON 数组
using var jsonDoc = JsonDocument.Parse(contentText);
var arrayElement = jsonDoc.RootElement;
// 如果返回的是单个对象而不是数组,尝试兼容处理
if (arrayElement.ValueKind == JsonValueKind.Object)
{
logger.LogWarning("AI返回的内容是单个对象而非数组尝试兼容处理");
var result = ParseSingleRecord(arrayElement);
return result != null ? [result.Value] : null;
}
if (arrayElement.ValueKind != JsonValueKind.Array)
{
logger.LogWarning("AI返回的内容不是JSON数组无法解析邮件");
return null;
}
var results = new List<(string card, string reason, decimal amount, decimal balance, TransactionType type, DateTime? occurredAt)>();
foreach (var obj in arrayElement.EnumerateArray())
{
var record = ParseSingleRecord(obj);
if (record != null)
{
logger.LogInformation("解析到一条交易记录: {@Record}", record.Value);
results.Add(record.Value);
}
}
logger.LogInformation("使用AI成功解析邮件内容提取到 {Count} 条交易记录", results.Count);
return results.Count > 0 ? results.ToArray() : null;
}
private (string card, string reason, decimal amount, decimal balance, TransactionType type, DateTime? occurredAt)? ParseSingleRecord(JsonElement obj)
{
string card = obj.TryGetProperty("card", out var pCard) ? pCard.GetString() ?? string.Empty : string.Empty;
string reason = obj.TryGetProperty("reason", out var pReason) ? pReason.GetString() ?? string.Empty : string.Empty;
string typeStr = obj.TryGetProperty("type", out var pType) ? pType.GetString() ?? string.Empty : string.Empty;
string occurredAtStr = obj.TryGetProperty("occurredAt", out var pOccurredAt) ? pOccurredAt.GetString() ?? string.Empty : string.Empty;
decimal amount = 0m;
if (obj.TryGetProperty("amount", out var pAmount))
{
if (pAmount.ValueKind == JsonValueKind.Number && pAmount.TryGetDecimal(out var d)) amount = d;
else if (pAmount.ValueKind == JsonValueKind.String && decimal.TryParse(pAmount.GetString(), NumberStyles.Any, CultureInfo.InvariantCulture, out var ds)) amount = ds;
}
decimal balance = 0m;
if (obj.TryGetProperty("balance", out var pBalance))
{
if (pBalance.ValueKind == JsonValueKind.Number && pBalance.TryGetDecimal(out var d2)) balance = d2;
else if (pBalance.ValueKind == JsonValueKind.String && decimal.TryParse(pBalance.GetString(), NumberStyles.Any, CultureInfo.InvariantCulture, out var ds2)) balance = ds2;
}
if (string.IsNullOrWhiteSpace(card) || string.IsNullOrWhiteSpace(reason))
{
return null;
}
var occurredAt = (DateTime?)null;
if(DateTime.TryParse(occurredAtStr, out var occurredAtValue))
{
occurredAt = occurredAtValue;
}
var type = DetermineTransactionType(typeStr, reason, amount);
return (card, reason, amount, balance, type, occurredAt);
}
/// <summary>
/// 判断交易类型
/// </summary>
protected TransactionType DetermineTransactionType(string typeStr, string reason, decimal amount)
{
// 优先使用明确的类型字符串
if (!string.IsNullOrWhiteSpace(typeStr))
{
if (typeStr.Contains("收入") || typeStr.Contains("income") || typeStr.Equals("收", StringComparison.OrdinalIgnoreCase))
return TransactionType.Income;
if (typeStr.Contains("支出") || typeStr.Contains("expense") || typeStr.Equals("支", StringComparison.OrdinalIgnoreCase))
return TransactionType.Expense;
}
// 根据交易原因中的关键词判断
var lowerReason = reason.ToLower();
// 收入关键词
string[] incomeKeywords = { "工资", "奖金", "退款", "返现", "收入", "转入", "存入", "利息", "分红" };
if (incomeKeywords.Any(k => lowerReason.Contains(k)))
return TransactionType.Income;
// 支出关键词
string[] expenseKeywords = { "消费", "支付", "购买", "转出", "取款", "支出", "扣款", "缴费" };
if (expenseKeywords.Any(k => lowerReason.Contains(k)))
return TransactionType.Expense;
// 根据金额正负判断(如果金额为负数,可能是支出)
if (amount < 0)
return TransactionType.Expense;
if (amount > 0)
return TransactionType.Income;
// 默认为支出
return TransactionType.Expense;
}
}