using HtmlAgilityPack; namespace Service.EmailParseServices; public class EmailParseFormCCSVC( ILogger logger, IOpenAiService openAiService ) : EmailParseServicesBase(logger, openAiService) { public override bool CanParse(string from, string body) { if (!from.Contains("ccsvc@message.cmbchina.com")) { return false; } // 必须包含HTML标签 if (!Regex.IsMatch(body, "<.*?>")) { return false; } return true; } public override async Task<( string card, string reason, decimal amount, decimal balance, TransactionType type, DateTime? occurredAt )[]> ParseEmailContentAsync(string emailContent) { var doc = new HtmlDocument(); doc.LoadHtml(emailContent); var result = new List<(string, string, decimal, decimal, TransactionType, DateTime?)>(); // 1. Get Date var dateNode = doc.DocumentNode.SelectSingleNode("//font[contains(text(), '您的消费明细如下')]"); if (dateNode == null) { logger.LogWarning("Date node not found"); return Array.Empty<(string, string, decimal, decimal, TransactionType, DateTime?)>(); } var dateText = dateNode.InnerText.Trim(); // "2025/12/21 您的消费明细如下:" var dateMatch = Regex.Match(dateText, @"\d{4}/\d{1,2}/\d{1,2}"); if (!dateMatch.Success || !DateTime.TryParse(dateMatch.Value, out var date)) { logger.LogWarning("Failed to parse date from: {DateText}", dateText); return Array.Empty<(string, string, decimal, decimal, TransactionType, DateTime?)>(); } // 2. Get Balance (Available Limit) decimal balance = 0; // Find "可用额度" label var limitLabelNode = doc.DocumentNode.SelectSingleNode("//font[contains(text(), '可用额度')]"); if (limitLabelNode != null) { // Go up to TR var tr = limitLabelNode.Ancestors("tr").FirstOrDefault(); if (tr != null) { var prevTr = tr.PreviousSibling; while (prevTr != null && prevTr.Name != "tr") prevTr = prevTr.PreviousSibling; if (prevTr != null) { var balanceNode = prevTr.SelectSingleNode(".//font[contains(text(), '¥')]"); if (balanceNode != null) { var balanceStr = balanceNode.InnerText.Replace("¥", "").Replace(",", "").Trim(); decimal.TryParse(balanceStr, out balance); } } } } // 3. Get Transactions var transactionNodes = doc.DocumentNode.SelectNodes("//span[@id='fixBand4']"); if (transactionNodes != null) { foreach (var node in transactionNodes) { try { // Time var timeNode = node.SelectSingleNode(".//span[@id='fixBand5']//font"); var timeText = timeNode?.InnerText.Trim(); // "10:13:43" DateTime? occurredAt = date; if (!string.IsNullOrEmpty(timeText) && DateTime.TryParse($"{date:yyyy-MM-dd} {timeText}", out var dt)) { occurredAt = dt; } // Info Block var infoNode = node.SelectSingleNode(".//span[@id='fixBand12']"); if (infoNode == null) continue; // Amount var amountNode = infoNode.SelectSingleNode(".//font[contains(text(), 'CNY')]"); var amountText = amountNode?.InnerText.Replace("CNY", "").Replace(" ", "").Trim(); if (!decimal.TryParse(amountText, out var amount)) { continue; } // Description var descNode = infoNode.SelectSingleNode(".//tr[2]//font"); var descText = descNode?.InnerText ?? ""; // Replace   and non-breaking space (\u00A0) with normal space descText = descText.Replace(" ", " "); descText = HtmlEntity.DeEntitize(descText).Replace((char)160, ' ').Trim(); // Parse Description: "尾号4390 消费 财付通-luckincoffee瑞幸咖啡" var parts = descText.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); string card = ""; string reason = descText; TransactionType type = TransactionType.Expense; if (parts.Length > 0 && parts[0].StartsWith("尾号")) { card = parts[0].Replace("尾号", ""); } if (parts.Length > 1) { var typeStr = parts[1]; type = DetermineTransactionType(typeStr, reason, amount); } if (parts.Length > 2) { reason = string.Join(" ", parts.Skip(2)); } result.Add((card, reason, amount, balance, type, occurredAt)); } catch (Exception ex) { logger.LogError(ex, "Error parsing transaction node"); } } } return await Task.FromResult(result.ToArray()); } }