优化文字审核

This commit is contained in:
zpc 2024-08-12 20:03:00 +08:00
parent 9fc7233324
commit 76a5adabcc
7 changed files with 268 additions and 63 deletions

View File

@ -4,7 +4,7 @@
```sh
# 在解决方案下运行
docker build -t miaoyu:dev-0.0.3 -f src/2-api/HuanMeng.MiaoYu.WebApi/Dockerfile .
docker build -t miaoyu:dev-0.0.3 --build-arg VERSION=7.0 --build-arg TARGET=dev -f src/2-api/HuanMeng.MiaoYu.WebApi/Dockerfile .
docker build -t miaoyu:dev-0.0.4 --build-arg VERSION=7.0 --build-arg TARGET=dev -f src/2-api/HuanMeng.MiaoYu.WebApi/Dockerfile .
# 运行

View File

@ -0,0 +1,149 @@
using System;
using System.Collections.Frozen;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace HuanMeng.DotNetCore.TextCensor.SensitiveWord
{
public class SensitiveWordFilterFrozen : ITextCensor
{
/// <summary>
/// 定义Trie树节点
/// </summary>
private class TrieNode
{
/// <summary>
/// 标记是否是一个敏感词的结尾
/// </summary>
public bool IsEnd { get; set; }
/// <summary>
/// 存储子节点
/// </summary>
public FrozenDictionary<char, TrieNode> Children { get; private set; }
public TrieNode()
{
IsEnd = false;
Children = null;
}
/// <summary>
/// 将子节点字典冻结为 FrozenDictionary
/// </summary>
public void FreezeChildren(Dictionary<char, TrieNode> children)
{
Children = children.ToFrozenDictionary();
}
}
/// <summary>
/// 根节点
/// </summary>
private TrieNode Root { get; set; }
public SensitiveWordFilterFrozen()
{
Root = new TrieNode();
}
/// <summary>
/// 添加敏感词到Trie树中
/// </summary>
/// <param name="word"></param>
public void AddSensitiveWord(string word)
{
TrieNode currentNode = Root;
word = CleanText(word);
foreach (char c in word.ToLower())
{
// 如果当前字符不存在于子节点中,则添加
if (currentNode.Children == null || !currentNode.Children.ContainsKey(c))
{
var children = currentNode.Children?.ToDictionary(kvp => kvp.Key, kvp => kvp.Value) ?? new Dictionary<char, TrieNode>();
children[c] = new TrieNode();
currentNode.FreezeChildren(children);
}
currentNode = currentNode.Children[c];
}
currentNode.IsEnd = true; // 标记当前节点为敏感词结尾
}
/// <summary>
/// 清理文字
/// </summary>
/// <param name="sourceTxt"></param>
/// <returns></returns>
public string CleanText(string sourceTxt)
{
if (string.IsNullOrEmpty(sourceTxt))
{
return string.Empty;
}
string cleanedText = sourceTxt
.Replace(',', ' ')
.Replace('.', ' ')
.Replace('。', ' ')
.Replace('', ' ')
.Replace('@', ' ')
.Replace('-', ' ')
.Replace('*', ' ')
.Replace("1", string.Empty)
.Replace("2", string.Empty)
.Replace("3", string.Empty)
.Replace("4", string.Empty)
.Replace("5", string.Empty)
.Replace("6", string.Empty)
.Replace("9", string.Empty)
.Replace("0", string.Empty)
.Replace("_", string.Empty)
.Replace(" ", string.Empty).ToLower();
return cleanedText;
}
/// <summary>
/// 判断文本中是否包含敏感词
/// </summary>
/// <param name="text"></param>
/// <returns></returns>
public bool ContainsSensitiveWord(string text)
{
//过滤字符串
text = CleanText(text);
for (int i = 0; i < text.Length; i++)
{
TrieNode currentNode = Root;
int j = i;
// 从当前位置开始匹配敏感词
while (j < text.Length && currentNode.Children != null && currentNode.Children.ContainsKey(text[j]))
{
currentNode = currentNode.Children[text[j]];
// 如果当前节点是敏感词结尾返回true
if (currentNode.IsEnd)
{
return true;
}
j++;
}
}
return false;
}
public bool TextCensor(string text)
{
return ContainsSensitiveWord(text);
}
public Task<bool> TextCensorAsync(string text)
{
return Task.Run(() =>
{
return ContainsSensitiveWord(text);
});
}
}
}

View File

@ -24,8 +24,12 @@ namespace HuanMeng.DotNetCore.TextCensor
public static ITextCensor GetITextCensor(string _dirPath)
{
return GetSensitiveWordFilter(_dirPath);
return GetSensitiveWordFilterFrozen(_dirPath)
//GetSensitiveWordFilter(_dirPath)
;
}
/// <summary>
///
/// </summary>
@ -43,6 +47,50 @@ namespace HuanMeng.DotNetCore.TextCensor
return sensitiveWordFilter;
}
/// <summary>
///
/// </summary>
/// <param name="_dirPath"></param>
/// <returns></returns>
public static SensitiveWordFilterFrozen GetSensitiveWordFilterFrozen(string _dirPath)
{
SensitiveWordFilterFrozen sensitiveWordFilter = new SensitiveWordFilterFrozen();
var ckPath = Path.GetFullPath(_dirPath);
var filePath = Directory.EnumerateFiles(ckPath);
foreach (var item in filePath)
{
AddShieldString(item, sensitiveWordFilter);
}
return sensitiveWordFilter;
}
/// <summary>
///
/// </summary>
/// <param name="path"></param>
/// <exception cref="Exception"></exception>
public static void AddShieldString(string path, SensitiveWordFilterFrozen sensitiveWordFilter)
{
if (!File.Exists(path))
{
throw new Exception("文件不存在");
}
using (StreamReader reader = new StreamReader(path, UnicodeEncoding.UTF8))
{
while (reader.Peek() > 0)
{
var tempStr = (reader.ReadLine() ?? "");
if (!string.IsNullOrEmpty(tempStr))
{
sensitiveWordFilter.AddSensitiveWord(tempStr);
}
}
}
}
/// <summary>
///
/// </summary>
@ -68,8 +116,6 @@ namespace HuanMeng.DotNetCore.TextCensor
}
}
}
}
}

View File

@ -162,6 +162,30 @@ namespace HuanMeng.MiaoYu.Code.Cache
}
#endregion
#region
/// <summary>
/// 菜单类型缓存表
/// </summary>
public CommonDataEntityCache<T_Category_Child_Menu>? _category_Child_Menu { get; set; }
/// <summary>
/// 菜单类型的角色
/// </summary>
public List<T_Category_Child_Menu> CategoryChildMenuList
{
get
{
if (_category_Child_Menu == null)
{
_category_Child_Menu = MiaoYuCacheExtend.GetMiaoYuDataEntityCache<T_Category_Child_Menu>(cacheBase, expWhere: it => it.IsEnabled);
}
return _category_Child_Menu.DataList ?? new List<T_Category_Child_Menu>();
}
}
#endregion
}
/// <summary>
/// 缓存扩展类

View File

@ -69,7 +69,8 @@ namespace HuanMeng.MiaoYu.Code.Category
public BaseResponse<List<RecommendDto<DataListBaseDto>>> GetRecommendList()
{
List<RecommendDto<DataListBaseDto>> recommendDtos = new List<RecommendDto<DataListBaseDto>>();
var menuList = Dao.daoDbMiaoYu.context.T_Category_Child_Menu.Where(it => it.IsEnabled).ToList();
var menuList = MiaoYuCache.CategoryChildMenuList.Where(it => it.IsEnabled).ToList();
var node = DictionaryInfo.GetDictionariesChildNode(T_Sys_DictionaryEnum.categorymenu);
node.ForEach(_node =>
{
@ -85,59 +86,6 @@ namespace HuanMeng.MiaoYu.Code.Category
recommendDtos.Add(banner);
}
});
//#region 假数据
//RecommendDto<DataListBaseDto> banner = new RecommendDto<DataListBaseDto>();
//banner.Title = "Banner";
//banner.Type = RecommendTypeEnum.banner.ToString();
//banner.Data = new List<DataListBaseDto>();
//banner.Data.Add(new CommonRecommendData
//{
// ActionId = "1",
// ActionType = RecommendActionTypeEnum.Mall.ToString(),
// ImageUrl = "https://cos.shhuanmeng.com/banner/20240717214627.png",
//});
//banner.Data.Add(new CommonRecommendData
//{
// ActionId = "2",
// ActionType = RecommendActionTypeEnum.Page.ToString(),
// ImageUrl = "https://cos.shhuanmeng.com/banner/202407172146272.png",
//});
//banner.Data.Add(new CommonRecommendData
//{
// ActionId = "3",
// ActionType = RecommendActionTypeEnum.Chat.ToString(),
// ImageUrl = "https://cos.shhuanmeng.com/banner/20240717214735.png",
//});
//recommendDtos.Add(banner);
//RecommendDto<DataListBaseDto> tuijian = new RecommendDto<DataListBaseDto>();
//tuijian.Title = "推荐";
//tuijian.Type = RecommendTypeEnum.tuijian.ToString();
//tuijian.Data = new List<DataListBaseDto>();
//RecommendDto<DataListBaseDto> xiaoshuo = new RecommendDto<DataListBaseDto>();
//xiaoshuo.Data = new List<DataListBaseDto>();
//xiaoshuo.Title = "小说";
//xiaoshuo.Type = RecommendTypeEnum.xiaoshuo.ToString();
//int index = 0;
//MiaoYuCache.CharacterList.ForEach(x =>
//{
// if (index > 5)
// {
// return;
// }
// var data = Mapper.Map<DataListDto>(x);
// data.ActionType = RecommendActionTypeEnum.Chat.ToString();
// data.ActionId = data.Id.ToString();
// data.ImageUrl = x.BgImage;//data.BgImage;
// tuijian.Data.Add(data);
// xiaoshuo.Data.Add(data);
// index++;
//});
//recommendDtos.Add(tuijian);
//recommendDtos.Add(xiaoshuo);
//#endregion
return new BaseResponse<List<RecommendDto<DataListBaseDto>>>(ResonseCode.Success, "", recommendDtos);
}
}

View File

@ -64,6 +64,9 @@ builder.Services.AddControllers()
options.SerializerSettings.ReferenceLoopHandling = Newtonsoft.Json.ReferenceLoopHandling.Ignore; // 忽略循环引用
options.SerializerSettings.ContractResolver = new CamelCasePropertyNamesContractResolver();// 首字母小写(驼峰样式)
options.SerializerSettings.DateFormatString = "yyyy-MM-dd HH:mm:ss";// 时间格式化
#if !DEBUG
options.SerializerSettings.Formatting = Newtonsoft.Json.Formatting.None;
#endif
//options.SerializerSettings.Converters.Add()
// 其他配置...
})
@ -155,7 +158,7 @@ app.UseAuthorization();
//使用跨域
app.UseCors(_myAllowSpecificOrigins);
app.MapControllers();
app.UseStaticFiles();//静态文件访问配置
//数据库中间件
app.UseMultiTenantMiaoYu();
@ -187,4 +190,4 @@ app.MapGet("/system", () =>
};
}).WithName("获取系统数据");
#endregion
app.Run();
app.Run();

View File

@ -14,6 +14,7 @@ namespace TextCensorFilterTest
[MemoryDiagnoser]
public class SensitiveWordFilterTest
{
public SensitiveWordFilterTest()
{
string path = "DataStorage/TextCensor/";
@ -21,22 +22,56 @@ namespace TextCensorFilterTest
{
sensitiveWordFilter = TextCensorExtend.GetSensitiveWordFilter(path);
}
if (sensitiveWordFilterFrozen == null)
{
sensitiveWordFilterFrozen = TextCensorExtend.GetSensitiveWordFilterFrozen(path);
}
//CheckTextVerification.VerifyTxtString("");
}
SensitiveWordFilter sensitiveWordFilter = null;
SensitiveWordFilter sensitiveWordFilter = null;
SensitiveWordFilterFrozen sensitiveWordFilterFrozen = null;
[Benchmark]
public bool SensitiveWordFiltermax()
{
return sensitiveWordFilter.TextCensor("*林婉儿面露疑惑之色,你突然来这么一声问候,实在令她有些摸不着头脑。作为一位淑女,她自是明白礼数之重要性的。见你竟三番两次地打断了她们之间正在深入的对话,不禁蹙起秀眉,神色间略带不悦。* \"阁下这般反复无常,却是何意?\"她语气平和,却不无不满,\"我们方才正在讨论魔兽山脉的来历,你也说了许多让婉儿费解的奇闻逸事。我正渴望能从你这里获知更多有益的解惑呢。\" *她优雅地抿了一口香茶,似在给自己一些缓冲的时间*\"不知阁下可曾见过其他游历山川的高人?他们对于这处所在是否也有过非同寻常的评说?又或是流传下来的什么古老传闻?\" *说着,她的目光缓缓流转,最终重新落在你的身上,眸中满怀期盼之色*\"若阁下实在没什么更多可说的了,不若就让我回忆起从前所读所闻,慢慢为你道来一二吧?或许还能给你带来一些新的启发。\"");
}
[Benchmark]
public bool SensitiveWordFiltermaxs()
{
var b = false;
for (int i = 0; i < 10; i++)
{
b = sensitiveWordFilter.TextCensor("*林婉儿面露疑惑之色,你突然来这么一声问候,实在令她有些摸不着头脑。作为一位淑女,她自是明白礼数之重要性的。见你竟三番两次地打断了她们之间正在深入的对话,不禁蹙起秀眉,神色间略带不悦。* \"阁下这般反复无常,却是何意?\"她语气平和,却不无不满,\"我们方才正在讨论魔兽山脉的来历,你也说了许多让婉儿费解的奇闻逸事。我正渴望能从你这里获知更多有益的解惑呢。\" *她优雅地抿了一口香茶,似在给自己一些缓冲的时间*\"不知阁下可曾见过其他游历山川的高人?他们对于这处所在是否也有过非同寻常的评说?又或是流传下来的什么古老传闻?\" *说着,她的目光缓缓流转,最终重新落在你的身上,眸中满怀期盼之色*\"若阁下实在没什么更多可说的了,不若就让我回忆起从前所读所闻,慢慢为你道来一二吧?或许还能给你带来一些新的启发。\"");
}
return b;
}
[Benchmark]
public bool SensitiveWordFilter()
{
return sensitiveWordFilter.TextCensor("*林婉儿面露疑惑之色,");
}
[Benchmark]
public bool SensitiveWordFiltermaxFrozen()
{
return sensitiveWordFilterFrozen.TextCensor("*林婉儿面露疑惑之色,你突然来这么一声问候,实在令她有些摸不着头脑。作为一位淑女,她自是明白礼数之重要性的。见你竟三番两次地打断了她们之间正在深入的对话,不禁蹙起秀眉,神色间略带不悦。* \"阁下这般反复无常,却是何意?\"她语气平和,却不无不满,\"我们方才正在讨论魔兽山脉的来历,你也说了许多让婉儿费解的奇闻逸事。我正渴望能从你这里获知更多有益的解惑呢。\" *她优雅地抿了一口香茶,似在给自己一些缓冲的时间*\"不知阁下可曾见过其他游历山川的高人?他们对于这处所在是否也有过非同寻常的评说?又或是流传下来的什么古老传闻?\" *说着,她的目光缓缓流转,最终重新落在你的身上,眸中满怀期盼之色*\"若阁下实在没什么更多可说的了,不若就让我回忆起从前所读所闻,慢慢为你道来一二吧?或许还能给你带来一些新的启发。\"");
}
[Benchmark]
public bool SensitiveWordFilterFrozen()
{
return sensitiveWordFilterFrozen.TextCensor("*林婉儿面露疑惑之色,");
}
[Benchmark]
public bool SensitiveWordFilterFrozens()
{
var b = false;
for (int i = 0; i < 10; i++)
{
b = sensitiveWordFilterFrozen.TextCensor("*林婉儿面露疑惑之色,你突然来这么一声问候,实在令她有些摸不着头脑。作为一位淑女,她自是明白礼数之重要性的。见你竟三番两次地打断了她们之间正在深入的对话,不禁蹙起秀眉,神色间略带不悦。* \"阁下这般反复无常,却是何意?\"她语气平和,却不无不满,\"我们方才正在讨论魔兽山脉的来历,你也说了许多让婉儿费解的奇闻逸事。我正渴望能从你这里获知更多有益的解惑呢。\" *她优雅地抿了一口香茶,似在给自己一些缓冲的时间*\"不知阁下可曾见过其他游历山川的高人?他们对于这处所在是否也有过非同寻常的评说?又或是流传下来的什么古老传闻?\" *说着,她的目光缓缓流转,最终重新落在你的身上,眸中满怀期盼之色*\"若阁下实在没什么更多可说的了,不若就让我回忆起从前所读所闻,慢慢为你道来一二吧?或许还能给你带来一些新的启发。\"");
}
return b;
}
}
}