结构调整
This commit is contained in:
107
Service/EmailServices/EmailParse/EmailParseForm95555.cs
Normal file
107
Service/EmailServices/EmailParse/EmailParseForm95555.cs
Normal file
@@ -0,0 +1,107 @@
|
||||
namespace Service.EmailParseServices;
|
||||
|
||||
public class EmailParseForm95555(
|
||||
ILogger<EmailParseForm95555> logger,
|
||||
IOpenAiService openAiService
|
||||
) : EmailParseServicesBase(logger, openAiService)
|
||||
{
|
||||
public override bool CanParse(string from, string subject, string body)
|
||||
{
|
||||
if (!from.Contains("95555@message.cmbchina.com"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!subject.Contains("账户变动通知"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// 不能包含HTML标签
|
||||
if (Regex.IsMatch(body, "<.*?>"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public override async Task<(
|
||||
string card,
|
||||
string reason,
|
||||
decimal amount,
|
||||
decimal balance,
|
||||
TransactionType type,
|
||||
DateTime? occurredAt
|
||||
)[]> ParseEmailContentAsync(string emailContent)
|
||||
{
|
||||
// 示例1:您账户8826于12月31日09:34在财付通-微信支付-这有电快捷支付1.00元,余额30.21
|
||||
// 示例2: 您账户8826于12月31日10:47入账款项,人民币1000.00,余额人民币1030.21。
|
||||
var pattern =
|
||||
"您账户(?<card>\\d+)" + // 卡号
|
||||
"于(?<time>\\d{1,2}月\\d{1,2}日\\d{1,2}:\\d{2})" + // 交易时间
|
||||
"(?:(?<type>收入|支出|消费|转入|转出|入账款项))?" + // 交易类型(可选)
|
||||
"(?:在(?<reason>[^\\d,。]*?))?" + // 交易原因(可选)
|
||||
",?(?:人民币)?(?<amount>\\d+\\.\\d{1,2})(?:元)?" + // 金额,“元” 可有可无
|
||||
",余额(?:人民币)?(?<balance>\\d+\\.\\d{1,2})" + // 余额
|
||||
"。?"; // 句号可有可无
|
||||
|
||||
var matches = Regex.Matches(emailContent, pattern);
|
||||
|
||||
if (matches.Count <= 0)
|
||||
{
|
||||
logger.LogWarning("未能从招商银行邮件内容中解析出交易信息");
|
||||
return [];
|
||||
}
|
||||
|
||||
var results = new List<(
|
||||
string card,
|
||||
string reason,
|
||||
decimal amount,
|
||||
decimal balance,
|
||||
TransactionType type,
|
||||
DateTime? occurredAt
|
||||
)>();
|
||||
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
var card = match.Groups["card"].Value;
|
||||
var amountStr = match.Groups["amount"].Value;
|
||||
var balanceStr = match.Groups["balance"].Value;
|
||||
var typeStr = match.Groups["type"].Value;
|
||||
var reason = match.Groups["reason"].Value;
|
||||
if(string.IsNullOrEmpty(reason))
|
||||
{
|
||||
reason = typeStr;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(card) &&
|
||||
!string.IsNullOrEmpty(reason) &&
|
||||
decimal.TryParse(amountStr, out var amount) &&
|
||||
decimal.TryParse(balanceStr, out var balance))
|
||||
{
|
||||
var type = DetermineTransactionType(typeStr, reason, amount);
|
||||
var occurredAt = ParseOccurredAt(match.Groups["time"].Value);
|
||||
results.Add((card, reason, amount, balance, type, occurredAt));
|
||||
}
|
||||
}
|
||||
return results.ToArray();
|
||||
}
|
||||
|
||||
private DateTime? ParseOccurredAt(string value)
|
||||
{
|
||||
// "12月31日09:34"
|
||||
var now = DateTime.Now;
|
||||
var dateTimeStr = $"{now.Year}年{value}";
|
||||
if (DateTime.TryParse(dateTimeStr, out var occurredAt))
|
||||
{
|
||||
// 如果解析结果在未来,说明是上一年的交易
|
||||
if (occurredAt > now)
|
||||
{
|
||||
occurredAt = occurredAt.AddYears(-1);
|
||||
}
|
||||
return occurredAt;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
169
Service/EmailServices/EmailParse/EmailParseFormCCSVC.cs
Normal file
169
Service/EmailServices/EmailParse/EmailParseFormCCSVC.cs
Normal file
@@ -0,0 +1,169 @@
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace Service.EmailParseServices;
|
||||
|
||||
public class EmailParseFormCCSVC(
|
||||
ILogger<EmailParseFormCCSVC> logger,
|
||||
IOpenAiService openAiService
|
||||
) : EmailParseServicesBase(logger, openAiService)
|
||||
{
|
||||
public override bool CanParse(string from, string subject, string body)
|
||||
{
|
||||
if (!from.Contains("ccsvc@message.cmbchina.com"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!subject.Contains("每日信用管家"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// 必须包含HTML标签
|
||||
if (!Regex.IsMatch(body, "<.*?>"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public override async Task<(
|
||||
string card,
|
||||
string reason,
|
||||
decimal amount,
|
||||
decimal balance,
|
||||
TransactionType type,
|
||||
DateTime? occurredAt
|
||||
)[]> ParseEmailContentAsync(string emailContent)
|
||||
{
|
||||
var doc = new HtmlDocument();
|
||||
doc.LoadHtml(emailContent);
|
||||
|
||||
var result = new List<(string, string, decimal, decimal, TransactionType, DateTime?)>();
|
||||
|
||||
// 1. Get Date
|
||||
var dateNode = doc.DocumentNode.SelectSingleNode("//font[contains(text(), '您的消费明细如下')]");
|
||||
if (dateNode == null)
|
||||
{
|
||||
logger.LogWarning("Date node not found");
|
||||
return Array.Empty<(string, string, decimal, decimal, TransactionType, DateTime?)>();
|
||||
}
|
||||
|
||||
var dateText = dateNode.InnerText.Trim();
|
||||
// "2025/12/21 您的消费明细如下:"
|
||||
var dateMatch = Regex.Match(dateText, @"\d{4}/\d{1,2}/\d{1,2}");
|
||||
if (!dateMatch.Success || !DateTime.TryParse(dateMatch.Value, out var date))
|
||||
{
|
||||
logger.LogWarning("Failed to parse date from: {DateText}", dateText);
|
||||
return Array.Empty<(string, string, decimal, decimal, TransactionType, DateTime?)>();
|
||||
}
|
||||
|
||||
// 2. Get Balance (Available Limit)
|
||||
decimal balance = 0;
|
||||
// Find "可用额度" label
|
||||
var limitLabelNode = doc.DocumentNode.SelectSingleNode("//font[contains(text(), '可用额度')]");
|
||||
if (limitLabelNode != null)
|
||||
{
|
||||
// Go up to TR
|
||||
var tr = limitLabelNode.Ancestors("tr").FirstOrDefault();
|
||||
if (tr != null)
|
||||
{
|
||||
var prevTr = tr.PreviousSibling;
|
||||
while (prevTr != null && prevTr.Name != "tr") prevTr = prevTr.PreviousSibling;
|
||||
|
||||
if (prevTr != null)
|
||||
{
|
||||
var balanceNode = prevTr.SelectSingleNode(".//font[contains(text(), '¥')]");
|
||||
if (balanceNode != null)
|
||||
{
|
||||
var balanceStr = balanceNode.InnerText.Replace("¥", "").Replace(",", "").Trim();
|
||||
decimal.TryParse(balanceStr, out balance);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Get Transactions
|
||||
var transactionNodes = doc.DocumentNode.SelectNodes("//span[@id='fixBand4']");
|
||||
if (transactionNodes != null)
|
||||
{
|
||||
foreach (var node in transactionNodes)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Time
|
||||
var timeNode = node.SelectSingleNode(".//span[@id='fixBand5']//font");
|
||||
var timeText = timeNode?.InnerText.Trim(); // "10:13:43"
|
||||
|
||||
DateTime? occurredAt = date;
|
||||
if (!string.IsNullOrEmpty(timeText) && DateTime.TryParse($"{date:yyyy-MM-dd} {timeText}", out var dt))
|
||||
{
|
||||
occurredAt = dt;
|
||||
}
|
||||
|
||||
// Info Block
|
||||
var infoNode = node.SelectSingleNode(".//span[@id='fixBand12']");
|
||||
if (infoNode == null) continue;
|
||||
|
||||
// Amount
|
||||
var amountNode = infoNode.SelectSingleNode(".//font[contains(text(), 'CNY')]");
|
||||
var amountText = amountNode?.InnerText.Replace("CNY", "").Replace(" ", "").Trim();
|
||||
if (!decimal.TryParse(amountText, out var amount))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Description
|
||||
var descNode = infoNode.SelectSingleNode(".//tr[2]//font");
|
||||
var descText = descNode?.InnerText ?? "";
|
||||
// Replace and non-breaking space (\u00A0) with normal space
|
||||
descText = descText.Replace(" ", " ");
|
||||
descText = HtmlEntity.DeEntitize(descText).Replace((char)160, ' ').Trim();
|
||||
|
||||
// Parse Description: "尾号4390 消费 财付通-luckincoffee瑞幸咖啡"
|
||||
var parts = descText.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
string card = "";
|
||||
string reason = descText;
|
||||
TransactionType type = TransactionType.Expense;
|
||||
|
||||
if (parts.Length > 0 && parts[0].StartsWith("尾号"))
|
||||
{
|
||||
card = parts[0].Replace("尾号", "");
|
||||
}
|
||||
|
||||
if (parts.Length > 1)
|
||||
{
|
||||
var typeStr = parts[1];
|
||||
type = DetermineTransactionType(typeStr, reason, amount);
|
||||
}
|
||||
|
||||
if (parts.Length > 2)
|
||||
{
|
||||
reason = string.Join(" ", parts.Skip(2));
|
||||
}
|
||||
|
||||
// 招商信用卡特殊,消费金额为正数,退款为负数
|
||||
if(amount > 0)
|
||||
{
|
||||
type = TransactionType.Expense;
|
||||
}
|
||||
else
|
||||
{
|
||||
type = TransactionType.Income;
|
||||
amount = Math.Abs(amount);
|
||||
}
|
||||
|
||||
result.Add((card, reason, amount, balance, type, occurredAt));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogError(ex, "Error parsing transaction node");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return await Task.FromResult(result.ToArray());
|
||||
}
|
||||
}
|
||||
285
Service/EmailServices/EmailParse/IEmailParseServices.cs
Normal file
285
Service/EmailServices/EmailParse/IEmailParseServices.cs
Normal file
@@ -0,0 +1,285 @@
|
||||
namespace Service.EmailParseServices;
|
||||
|
||||
public interface IEmailParseServices
|
||||
{
|
||||
bool CanParse(string from, string subject, string body);
|
||||
|
||||
/// <summary>
|
||||
/// 解析邮件内容,提取交易信息
|
||||
/// </summary>
|
||||
Task<(
|
||||
string card,
|
||||
string reason,
|
||||
decimal amount,
|
||||
decimal balance,
|
||||
TransactionType type,
|
||||
DateTime? occurredAt
|
||||
)[]> ParseAsync(string emailContent);
|
||||
}
|
||||
|
||||
public abstract class EmailParseServicesBase(
|
||||
ILogger<EmailParseServicesBase> logger,
|
||||
IOpenAiService openAiService
|
||||
) : IEmailParseServices
|
||||
{
|
||||
public abstract bool CanParse(string from, string subject, string body);
|
||||
|
||||
public async Task<(
|
||||
string card,
|
||||
string reason,
|
||||
decimal amount,
|
||||
decimal balance,
|
||||
TransactionType type,
|
||||
DateTime? occurredAt
|
||||
)[]> ParseAsync(string emailContent)
|
||||
{
|
||||
var result = await ParseEmailContentAsync(emailContent);
|
||||
|
||||
if (result.Length > 0)
|
||||
{
|
||||
logger.LogInformation("使用规则成功解析邮件内容,提取到 {Count} 条交易记录", result.Length);
|
||||
return result;
|
||||
}
|
||||
|
||||
logger.LogInformation("规则解析邮件内容失败,尝试使用AI进行解析");
|
||||
// AI兜底
|
||||
result = await ParseByAiAsync(emailContent) ?? [];
|
||||
|
||||
if(result.Length == 0)
|
||||
{
|
||||
logger.LogWarning("AI解析邮件内容也未能提取到任何交易记录");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public abstract Task<(
|
||||
string card,
|
||||
string reason,
|
||||
decimal amount,
|
||||
decimal balance,
|
||||
TransactionType type,
|
||||
DateTime? occurredAt
|
||||
)[]> ParseEmailContentAsync(string emailContent);
|
||||
|
||||
private async Task<(
|
||||
string card,
|
||||
string reason,
|
||||
decimal amount,
|
||||
decimal balance,
|
||||
TransactionType type,
|
||||
DateTime? occurredAt
|
||||
)[]?> ParseByAiAsync(string body)
|
||||
{
|
||||
var systemPrompt = $"""
|
||||
你是一个信息抽取助手。
|
||||
仅输出严格的JSON数组,不要包含任何多余文本。
|
||||
每个交易记录包含字段: card(字符串), reason(字符串), amount(数字), balance(数字), type(字符串,值为'收入'或'支出'), occurredAt(字符串,yyyy-MM-dd HH:mm:ss格式日期时间)。
|
||||
如果缺失,请推断或置空。
|
||||
[重要] 当前时间为{DateTime.Now:yyyy-MM-dd HH:mm:ss},请根据当前时间推断交易发生的时间。
|
||||
""";
|
||||
var userPrompt = $"""
|
||||
从下面这封银行账单相关邮件正文中提取所有交易记录,返回JSON数组格式,
|
||||
每个元素包含: card, reason, amount, balance, type(收入或支出), occurredAt(非必要)。
|
||||
正文如下:\n\n{body}
|
||||
""";
|
||||
|
||||
var contentText = await openAiService.ChatAsync(systemPrompt, userPrompt);
|
||||
if (string.IsNullOrWhiteSpace(contentText))
|
||||
{
|
||||
logger.LogWarning("AI未返回任何内容,无法解析邮件");
|
||||
return null;
|
||||
}
|
||||
|
||||
logger.LogDebug("AI返回的内容: {Content}", contentText);
|
||||
// 清理可能的 markdown 代码块标记
|
||||
contentText = contentText.Trim();
|
||||
if (contentText.StartsWith("```"))
|
||||
{
|
||||
// 移除开头的 ```json 或 ```
|
||||
var firstNewLine = contentText.IndexOf('\n');
|
||||
if (firstNewLine > 0)
|
||||
{
|
||||
contentText = contentText.Substring(firstNewLine + 1);
|
||||
}
|
||||
|
||||
// 移除结尾的 ```
|
||||
if (contentText.EndsWith("```"))
|
||||
{
|
||||
contentText = contentText.Substring(0, contentText.Length - 3);
|
||||
}
|
||||
|
||||
contentText = contentText.Trim();
|
||||
}
|
||||
|
||||
// contentText 期望是 JSON 数组
|
||||
using var jsonDoc = JsonDocument.Parse(contentText);
|
||||
var arrayElement = jsonDoc.RootElement;
|
||||
|
||||
// 如果返回的是单个对象而不是数组,尝试兼容处理
|
||||
if (arrayElement.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
logger.LogWarning("AI返回的内容是单个对象而非数组,尝试兼容处理");
|
||||
var result = ParseSingleRecord(arrayElement);
|
||||
return result != null ? [result.Value] : null;
|
||||
}
|
||||
|
||||
if (arrayElement.ValueKind != JsonValueKind.Array)
|
||||
{
|
||||
logger.LogWarning("AI返回的内容不是JSON数组,无法解析邮件");
|
||||
return null;
|
||||
}
|
||||
|
||||
var results = new List<(string card, string reason, decimal amount, decimal balance, TransactionType type, DateTime? occurredAt)>();
|
||||
|
||||
foreach (var obj in arrayElement.EnumerateArray())
|
||||
{
|
||||
var record = ParseSingleRecord(obj);
|
||||
if (record != null)
|
||||
{
|
||||
logger.LogInformation("解析到一条交易记录: {@Record}", record.Value);
|
||||
results.Add(record.Value);
|
||||
}
|
||||
}
|
||||
|
||||
logger.LogInformation("使用AI成功解析邮件内容,提取到 {Count} 条交易记录", results.Count);
|
||||
return results.Count > 0 ? results.ToArray() : null;
|
||||
}
|
||||
|
||||
private (string card, string reason, decimal amount, decimal balance, TransactionType type, DateTime? occurredAt)? ParseSingleRecord(JsonElement obj)
|
||||
{
|
||||
string card = obj.TryGetProperty("card", out var pCard) ? pCard.GetString() ?? string.Empty : string.Empty;
|
||||
string reason = obj.TryGetProperty("reason", out var pReason) ? pReason.GetString() ?? string.Empty : string.Empty;
|
||||
string typeStr = obj.TryGetProperty("type", out var pType) ? pType.GetString() ?? string.Empty : string.Empty;
|
||||
string occurredAtStr = obj.TryGetProperty("occurredAt", out var pOccurredAt) ? pOccurredAt.GetString() ?? string.Empty : string.Empty;
|
||||
|
||||
decimal amount = 0m;
|
||||
if (obj.TryGetProperty("amount", out var pAmount))
|
||||
{
|
||||
if (pAmount.ValueKind == JsonValueKind.Number && pAmount.TryGetDecimal(out var d)) amount = d;
|
||||
else if (pAmount.ValueKind == JsonValueKind.String && decimal.TryParse(pAmount.GetString(), NumberStyles.Any, CultureInfo.InvariantCulture, out var ds)) amount = ds;
|
||||
}
|
||||
|
||||
decimal balance = 0m;
|
||||
if (obj.TryGetProperty("balance", out var pBalance))
|
||||
{
|
||||
if (pBalance.ValueKind == JsonValueKind.Number && pBalance.TryGetDecimal(out var d2)) balance = d2;
|
||||
else if (pBalance.ValueKind == JsonValueKind.String && decimal.TryParse(pBalance.GetString(), NumberStyles.Any, CultureInfo.InvariantCulture, out var ds2)) balance = ds2;
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(card) || string.IsNullOrWhiteSpace(reason))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var occurredAt = (DateTime?)null;
|
||||
if(DateTime.TryParse(occurredAtStr, out var occurredAtValue))
|
||||
{
|
||||
occurredAt = occurredAtValue;
|
||||
}
|
||||
|
||||
var type = DetermineTransactionType(typeStr, reason, amount);
|
||||
return (card, reason, amount, balance, type, occurredAt);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 判断交易类型
|
||||
/// </summary>
|
||||
protected TransactionType DetermineTransactionType(string typeStr, string reason, decimal amount)
|
||||
{
|
||||
// 优先使用明确的类型字符串
|
||||
if (!string.IsNullOrWhiteSpace(typeStr))
|
||||
{
|
||||
if (typeStr.Contains("收入") || typeStr.Contains("income") || typeStr.Equals("收", StringComparison.OrdinalIgnoreCase))
|
||||
return TransactionType.Income;
|
||||
if (typeStr.Contains("支出") || typeStr.Contains("expense") || typeStr.Equals("支", StringComparison.OrdinalIgnoreCase))
|
||||
return TransactionType.Expense;
|
||||
}
|
||||
|
||||
// 根据交易原因中的关键词判断
|
||||
var lowerReason = reason.ToLower();
|
||||
|
||||
// 收入关键词
|
||||
string[] incomeKeywords =
|
||||
{
|
||||
"工资", "奖金", "退款",
|
||||
"返现", "收入", "转入",
|
||||
"存入", "利息", "分红",
|
||||
"入账", "收款",
|
||||
|
||||
// 常见扩展
|
||||
"实发工资", "薪资", "薪水", "薪酬",
|
||||
"提成", "佣金", "劳务费",
|
||||
"报销入账", "报销款", "补贴", "补助",
|
||||
|
||||
"退款成功", "退回", "退货退款",
|
||||
"返现入账", "返利", "返佣",
|
||||
|
||||
"到账", "已到账", "入账成功",
|
||||
"收款成功", "收到款项", "到账金额",
|
||||
"资金转入", "资金收入",
|
||||
|
||||
"转账收入", "转账入账", "他行来账",
|
||||
"工资代发", "代发工资", "单位打款",
|
||||
|
||||
"利息收入", "收益", "收益发放", "理财收益",
|
||||
"分红收入", "股息", "红利",
|
||||
|
||||
// 平台常用词
|
||||
"红包", "红包收入", "红包入账",
|
||||
"奖励金", "活动奖励", "补贴金",
|
||||
"现金奖励", "推广奖励", "返现奖励",
|
||||
|
||||
// 存取类
|
||||
"现金存入", "柜台存入", "ATM存入",
|
||||
"他人转入", "他人汇入"
|
||||
};
|
||||
if (incomeKeywords.Any(k => lowerReason.Contains(k)))
|
||||
return TransactionType.Income;
|
||||
|
||||
// 支出关键词
|
||||
string[] expenseKeywords =
|
||||
{
|
||||
"消费", "支付", "购买",
|
||||
"转出", "取款", "支出",
|
||||
"扣款", "缴费", "付款",
|
||||
"刷卡",
|
||||
|
||||
// 常见扩展
|
||||
"支出金额", "支出人民币", "已支出",
|
||||
"已消费", "消费支出", "消费人民币",
|
||||
"已支付", "成功支付", "支付成功", "交易支付",
|
||||
"已扣款", "扣款成功", "扣费", "扣费成功",
|
||||
"转账", "转账支出", "向外转账", "已转出",
|
||||
"提现", "现金支出", "现金取款",
|
||||
"扣除", "扣除金额", "记账支出",
|
||||
|
||||
// 账单/通知类用语
|
||||
"本期应还", "本期应还金额", "本期账单金额",
|
||||
"本期应还人民币", "最低还款额",
|
||||
"本期欠款", "欠款金额",
|
||||
|
||||
// 线上平台常见用语
|
||||
"订单支付", "订单扣款", "订单消费",
|
||||
"交易支出", "交易扣款", "交易成功支出",
|
||||
"话费充值", "流量充值", "水费", "电费", "燃气费",
|
||||
"物业费", "服务费", "手续费", "年费", "会费",
|
||||
"利息支出", "还款支出", "代扣", "代缴",
|
||||
|
||||
// 信用卡/花呗等场景
|
||||
"信用卡还款", "花呗还款", "白条还款",
|
||||
"分期还款", "账单还款", "自动还款"
|
||||
};
|
||||
if (expenseKeywords.Any(k => lowerReason.Contains(k)))
|
||||
return TransactionType.Expense;
|
||||
|
||||
// 根据金额正负判断(如果金额为负数,可能是支出)
|
||||
if (amount < 0)
|
||||
return TransactionType.Expense;
|
||||
if (amount > 0)
|
||||
return TransactionType.Income;
|
||||
|
||||
// 默认为支出
|
||||
return TransactionType.Expense;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user