重构: 将 LogCleanupService 转为 Quartz Job 服务
All checks were successful
Docker Build & Deploy / Build Docker Image (push) Successful in 22s
Docker Build & Deploy / Deploy to Production (push) Successful in 7s
Docker Build & Deploy / Cleanup Dangling Images (push) Successful in 1s
Docker Build & Deploy / WeChat Notification (push) Successful in 1s
All checks were successful
Docker Build & Deploy / Build Docker Image (push) Successful in 22s
Docker Build & Deploy / Deploy to Production (push) Successful in 7s
Docker Build & Deploy / Cleanup Dangling Images (push) Successful in 1s
Docker Build & Deploy / WeChat Notification (push) Successful in 1s
- 创建 LogCleanupJob 替代 LogCleanupService (BackgroundService) - 在 Expand.cs 中注册 LogCleanupJob (每天凌晨2点执行, 保留30天日志) - 从 Program.cs 移除 LogCleanupService 的 HostedService 注册 - 删除 Service/LogCleanupService.cs - 删除 Service/PeriodicBillBackgroundService.cs (已无用的重复服务) 所有后台任务现在统一通过 Quartz.NET 管理, 支持运行时控制
This commit is contained in:
151
Service/AI/TextSegmentService.cs
Normal file
151
Service/AI/TextSegmentService.cs
Normal file
@@ -0,0 +1,151 @@
|
||||
using JiebaNet.Analyser;
|
||||
using JiebaNet.Segmenter;
|
||||
|
||||
namespace Service.AI;
|
||||
|
||||
/// <summary>
|
||||
/// 文本分词服务接口
|
||||
/// </summary>
|
||||
public interface ITextSegmentService
|
||||
{
|
||||
/// <summary>
|
||||
/// 从文本中提取关键词
|
||||
/// </summary>
|
||||
/// <param name="text">待分析的文本</param>
|
||||
/// <param name="topN">返回前N个关键词,默认5个</param>
|
||||
/// <returns>关键词列表</returns>
|
||||
List<string> ExtractKeywords(string text, int topN = 5);
|
||||
|
||||
/// <summary>
|
||||
/// 对文本进行分词
|
||||
/// </summary>
|
||||
/// <param name="text">待分词的文本</param>
|
||||
/// <returns>分词结果列表</returns>
|
||||
List<string> Segment(string text);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 基于 JiebaNet 的文本分词服务实现
|
||||
/// </summary>
|
||||
public class TextSegmentService : ITextSegmentService
|
||||
{
|
||||
private readonly JiebaSegmenter _segmenter;
|
||||
private readonly TfidfExtractor _extractor;
|
||||
private readonly ILogger<TextSegmentService> _logger;
|
||||
|
||||
public TextSegmentService(ILogger<TextSegmentService> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
_segmenter = new JiebaSegmenter();
|
||||
_extractor = new TfidfExtractor();
|
||||
|
||||
// 仅添加JiebaNet词典中可能缺失的特定业务词汇
|
||||
AddCustomWords();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 添加自定义词典 - 仅添加JiebaNet词典中可能缺失的特定词汇
|
||||
/// </summary>
|
||||
private void AddCustomWords()
|
||||
{
|
||||
try
|
||||
{
|
||||
// 只添加可能缺失的特定业务词汇
|
||||
// 大部分常用词(如"美团"、"支付宝"等)JiebaNet已内置
|
||||
var customWords = new[]
|
||||
{
|
||||
"水电费", "物业费", "燃气费" // 复合词,确保作为整体识别 // TODO 做成配置文件 让 AI定期提取复合词汇填入到这边
|
||||
};
|
||||
|
||||
foreach (var word in customWords)
|
||||
{
|
||||
_segmenter.AddWord(word);
|
||||
}
|
||||
|
||||
if (customWords.Length > 0)
|
||||
{
|
||||
_logger.LogDebug("已加载 {Count} 个自定义词汇", customWords.Length);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "添加自定义词典失败");
|
||||
}
|
||||
}
|
||||
|
||||
public List<string> ExtractKeywords(string text, int topN = 5)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
// 使用 TF-IDF 算法提取关键词(已内置停用词过滤)
|
||||
var keywords = _extractor.ExtractTags(text, topN, new List<string>());
|
||||
|
||||
// 过滤单字,保留有意义的词
|
||||
var filteredKeywords = keywords
|
||||
.Where(k => k.Length >= 2)
|
||||
.Distinct()
|
||||
.ToList();
|
||||
|
||||
// 如果过滤后没有关键词,使用基础分词并选择最长的词
|
||||
if (filteredKeywords.Count == 0)
|
||||
{
|
||||
var segments = Segment(text);
|
||||
filteredKeywords = segments
|
||||
.Where(s => s.Length >= 2)
|
||||
.OrderByDescending(s => s.Length)
|
||||
.Take(topN)
|
||||
.Distinct()
|
||||
.ToList();
|
||||
}
|
||||
|
||||
// 如果还是没有,返回原文的前10个字符
|
||||
if (filteredKeywords.Count == 0 && text.Length > 0)
|
||||
{
|
||||
filteredKeywords.Add(text.Length > 10 ? text.Substring(0, 10) : text);
|
||||
}
|
||||
|
||||
_logger.LogDebug("从文本 '{Text}' 中提取关键词: {Keywords}",
|
||||
text, string.Join(", ", filteredKeywords));
|
||||
|
||||
return filteredKeywords;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "提取关键词失败,文本: {Text}", text);
|
||||
// 降级处理:返回原文
|
||||
return [text.Length > 10 ? text.Substring(0, 10) : text];
|
||||
}
|
||||
}
|
||||
|
||||
public List<string> Segment(string text)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
// 执行分词
|
||||
var segments = _segmenter.Cut(text).ToList();
|
||||
|
||||
// 过滤空白和停用词
|
||||
var filteredSegments = segments
|
||||
.Where(s => !string.IsNullOrWhiteSpace(s) && s.Trim().Length > 0)
|
||||
.Select(s => s.Trim())
|
||||
.ToList();
|
||||
|
||||
return filteredSegments;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "分词失败,文本: {Text}", text);
|
||||
return [text];
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user