添加文本审核

This commit is contained in:
zpc 2024-08-07 04:42:57 +08:00
parent ac2741eb37
commit b656a0b261
11 changed files with 531 additions and 9 deletions

View File

@ -46,6 +46,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "HuanMeng.MiaoYu.WebApi", "s
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "HuanMeng.Utility", "src\0-core\HuanMeng.Utility\HuanMeng.Utility.csproj", "{48E1532F-8B50-477C-BB78-8AEA89A167CE}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TextCensorFilterTest", "src\9-test\TextCensorFilterTest\TextCensorFilterTest.csproj", "{CF7FEDBA-FC1A-4D6D-92F1-6882B5143E0A}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@ -96,6 +98,10 @@ Global
{48E1532F-8B50-477C-BB78-8AEA89A167CE}.Debug|Any CPU.Build.0 = Debug|Any CPU
{48E1532F-8B50-477C-BB78-8AEA89A167CE}.Release|Any CPU.ActiveCfg = Release|Any CPU
{48E1532F-8B50-477C-BB78-8AEA89A167CE}.Release|Any CPU.Build.0 = Release|Any CPU
{CF7FEDBA-FC1A-4D6D-92F1-6882B5143E0A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{CF7FEDBA-FC1A-4D6D-92F1-6882B5143E0A}.Debug|Any CPU.Build.0 = Debug|Any CPU
{CF7FEDBA-FC1A-4D6D-92F1-6882B5143E0A}.Release|Any CPU.ActiveCfg = Release|Any CPU
{CF7FEDBA-FC1A-4D6D-92F1-6882B5143E0A}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@ -119,6 +125,7 @@ Global
{6E79742F-1E56-4B7D-94E8-B509D43561FA} = {DD14191F-22CE-48D8-A944-B8A41C97ACD4}
{729950F2-71EE-42C0-8B46-295740DE20BA} = {0C0B6EB5-E41D-46D9-9F60-90D320A2EEF3}
{48E1532F-8B50-477C-BB78-8AEA89A167CE} = {DD14191F-22CE-48D8-A944-B8A41C97ACD4}
{CF7FEDBA-FC1A-4D6D-92F1-6882B5143E0A} = {8D39E84B-2810-41D7-AFE6-0A58E09E34C3}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {4A1DC406-AFAA-4884-859C-51B9B26E37FC}

View File

@ -0,0 +1,157 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace HuanMeng.DotNetCore.TextCensor
{
/// <summary>
/// 验证字符串是否违规
/// </summary>
//[Obsolete]
//public static class CheckTextVerification
//{
// static HashSet<string> ShieldStr = new HashSet<string>();
// static List<string> ShieldListStr = new List<string>();
// static string[] ShieldString = new string[0];
// static CheckTextVerification()
// {
// var ckPath = Path.GetFullPath("I:\\test\\PrayForBlessings\\PrayForBlessings\\ciku/");
// var filePath = Directory.EnumerateFiles(ckPath);
// foreach (var item in filePath)
// {
// CheckTextVerification.AddShieldString(item);
// }
// //
// }
// /// <summary>
// ///
// /// </summary>
// /// <param name="path"></param>
// /// <exception cref="Exception"></exception>
// public static void AddShieldString(string path)
// {
// if (!File.Exists(path))
// {
// throw new Exception("文件不存在");
// }
// HashSet<string> strings = new HashSet<string>();
// using (StreamReader reader = new StreamReader(path, UnicodeEncoding.UTF8))
// {
// while (reader.Peek() > 0)
// {
// var tempStr = (reader.ReadLine() ?? "");
// //string result = Regex.Replace(tempStr, pattern, "");
// if (!string.IsNullOrEmpty(tempStr))
// {
// var tempStringToLower = tempStr.ToLower().Split(new char[] { '、', ',' });
// string[] filteredArrayToLower = tempStringToLower.Where(s => !string.IsNullOrEmpty(s)).ToArray();
// strings.UnionWith(filteredArrayToLower);
// }
// }
// ShieldStr.UnionWith(strings);
// ShieldListStr.AddRange(ShieldStr);
// ShieldString = ShieldStr.ToArray();
// Console.WriteLine(string.Format($"[{Path.GetFileName(path)}]加载完毕,总共加载屏蔽字行数:{ShieldStr.Count}"));
// }
// }
// /// <summary>
// ///
// /// </summary>
// /// <param name="sourceTxt"></param>
// /// <returns></returns>
// public static bool VerifyTxt(string sourceTxt)
// {
// if (!string.IsNullOrEmpty(sourceTxt))
// {
// //先去掉空格,特殊字符
// string cleanedText = CleanedTextString(sourceTxt);
// foreach (var item in ShieldStr)
// {
// if (cleanedText.Contains(item))
// {
// return false;
// }
// }
// }
// return true;
// }
// /// <summary>
// ///
// /// </summary>
// /// <param name="sourceTxt"></param>
// /// <returns></returns>
// public static bool VerifyTxtList(string sourceTxt)
// {
// if (!string.IsNullOrEmpty(sourceTxt))
// {
// //先去掉空格,特殊字符
// string cleanedText = CleanedTextString(sourceTxt);
// foreach (var item in ShieldListStr)
// {
// if (cleanedText.Contains(item))
// {
// return false;//2.56
// }
// }
// }
// return true;
// }
// private static string replaceString = " ,.。,@*12345690_-";
// /// <summary>
// ///
// /// </summary>
// /// <param name="sourceTxt"></param>
// /// <returns></returns>
// private static string CleanedTextString(string sourceTxt)
// {
// string cleanedText = sourceTxt.ToLower()
// .Replace(" ", "")
// .Replace(",", "")
// .Replace(".", "")
// .Replace("。", "")
// .Replace("", "")
// .Replace("@", "")
// .Replace("-", "")
// .Replace("*", "")
// .Replace("1", "")
// .Replace("2", "")
// .Replace("3", "")
// .Replace("4", "")
// .Replace("5", "")
// .Replace("6", "")
// .Replace("9", "")
// .Replace("0", "")
// .Replace("_", "");
// return cleanedText;
// }
// public static bool VerifyTxtString(string sourceTxt)
// {
// if (!string.IsNullOrEmpty(sourceTxt))
// {
// //先去掉空格,特殊字符
// string cleanedText = CleanedTextString(sourceTxt);
// foreach (var item in ShieldString)
// {
// if (cleanedText.Contains(item))
// {
// return false;//2.56
// }
// }
// }
// return true;
// }
//}
}

View File

@ -0,0 +1,28 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace HuanMeng.DotNetCore.TextCensor
{
/// <summary>
/// 文本审核接口
/// </summary>
public interface ITextCensor
{
/// <summary>
/// 文本审核
/// </summary>
/// <param name="text"></param>
/// <returns></returns>
bool TextCensor(string text);
/// <summary>
/// 文本审核
/// </summary>
/// <param name="text"></param>
/// <returns></returns>
Task<bool> TextCensorAsync(string text);
}
}

View File

@ -0,0 +1,140 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace HuanMeng.DotNetCore.TextCensor.SensitiveWord
{
public class SensitiveWordFilter : ITextCensor
{
/// <summary>
/// 定义Trie树节点
/// </summary>
private class TrieNode
{
/// <summary>
/// 标记是否是一个敏感词的结尾
/// </summary>
public bool IsEnd { get; set; }
/// <summary>
/// 存储子节点
/// </summary>
public Dictionary<char, TrieNode> Children { get; set; }
public TrieNode()
{
IsEnd = false;
Children = new Dictionary<char, TrieNode>();
}
}
/// <summary>
/// 根节点
/// </summary>
private TrieNode Root { get; set; }
public SensitiveWordFilter()
{
Root = new TrieNode();
}
/// <summary>
/// 添加敏感词到Trie树中
/// </summary>
/// <param name="word"></param>
public void AddSensitiveWord(string word)
{
TrieNode currentNode = Root;
foreach (char c in word.ToLower())
{
// 如果当前字符不存在于子节点中,则添加
if (!currentNode.Children.ContainsKey(c))
{
currentNode.Children[c] = new TrieNode();
}
currentNode = currentNode.Children[c];
}
currentNode.IsEnd = true; // 标记当前节点为敏感词结尾
}
/// <summary>
/// 定义要移除的字符集合,包括空格
/// </summary>
private static string replaceString = @"[ \.,。,@\-*12345690_]";
/// <summary>
/// 清理文字
/// </summary>
/// <param name="sourceTxt"></param>
/// <returns></returns>
public string CleanText(string sourceTxt)
{
if (string.IsNullOrEmpty(sourceTxt))
{
return string.Empty;
}
// 使用正则表达式替换所有匹配的字符
//string cleanedText = Regex.Replace(sourceTxt.ToLower(), replaceString, string.Empty);
string cleanedText = sourceTxt
.Replace(',', ' ')
.Replace('.', ' ')
.Replace('。', ' ')
.Replace('', ' ')
.Replace('@', ' ')
.Replace('-', ' ')
.Replace('*', ' ')
.Replace("1", "")
.Replace("2", "")
.Replace("3", "")
.Replace("4", "")
.Replace("5", "")
.Replace("6", "")
.Replace("9", "")
.Replace("0", "")
.Replace("_", "")
.Replace(" ", string.Empty).ToLower();
return cleanedText;
}
/// <summary>
/// 判断文本中是否包含敏感词
/// </summary>
/// <param name="text"></param>
/// <returns></returns>
public bool ContainsSensitiveWord(string text)
{
//过滤字符串
text = CleanText(text);
for (int i = 0; i < text.Length; i++)
{
TrieNode currentNode = Root;
int j = i;
// 从当前位置开始匹配敏感词
while (j < text.Length && currentNode.Children.ContainsKey(text[j]))
{
currentNode = currentNode.Children[text[j]];
// 如果当前节点是敏感词结尾返回true
if (currentNode.IsEnd)
{
return true;
}
j++;
}
}
return false;
}
public bool TextCensor(string text)
{
return ContainsSensitiveWord(text);
}
public Task<bool> TextCensorAsync(string text)
{
return Task.Run(() =>
{
return ContainsSensitiveWord(text);
});
}
}
}

View File

@ -0,0 +1,64 @@
using HuanMeng.DotNetCore.TextCensor.SensitiveWord;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace HuanMeng.DotNetCore.TextCensor
{
/// <summary>
///
/// </summary>
public static class TextCensorExtend
{
public static SensitiveWordFilter GetSensitiveWordFilter(string _dirPath)
{
SensitiveWordFilter sensitiveWordFilter = new SensitiveWordFilter();
var ckPath = Path.GetFullPath(_dirPath);
var filePath = Directory.EnumerateFiles(ckPath);
foreach (var item in filePath)
{
AddShieldString(item, sensitiveWordFilter);
}
return sensitiveWordFilter;
}
/// <summary>
///
/// </summary>
/// <param name="path"></param>
/// <exception cref="Exception"></exception>
public static void AddShieldString(string path, SensitiveWordFilter sensitiveWordFilter)
{
if (!File.Exists(path))
{
throw new Exception("文件不存在");
}
HashSet<string> strings = new HashSet<string>();
using (StreamReader reader = new StreamReader(path, UnicodeEncoding.UTF8))
{
while (reader.Peek() > 0)
{
var tempStr = (reader.ReadLine() ?? "");
//string result = Regex.Replace(tempStr, pattern, "");
if (!string.IsNullOrEmpty(tempStr))
{
sensitiveWordFilter.AddSensitiveWord(tempStr);
}
}
}
}
}
}

View File

@ -15,12 +15,18 @@ using XLib.DotNetCore.CacheHelper;
namespace HuanMeng.MiaoYu.Code.Cache
{
/// <summary>
///
/// 缓存扩展-
/// </summary>
public abstract class CommonDataEntityCache<T> : ICacheClearData, ICacheReloadData where T : class
{
//(object lockObj, int cacheTime = 36000)
/// <summary>
///
/// </summary>
protected object lockObj;
/// <summary>
///
/// </summary>
protected int cacheTime;
protected CommonDataEntityCache(object lockObj, int cacheTime = 36000)

View File

@ -81,7 +81,7 @@ namespace HuanMeng.MiaoYu.Code.Cache
if (CharacterCache == null)
{
CharacterCache = MiaoYuCacheExtend.GetMiaoYuDataEntityCache<CharacterCache>(cacheBase);
//CharacterCache = new CharacterEntityCache(cacheBase);
//CharacterCache = new CharacterEntityCache(miaoYuBase);
}
return CharacterCache.DataList ?? new List<CharacterCache>();
}
@ -261,13 +261,13 @@ namespace HuanMeng.MiaoYu.Code.Cache
/// 清除全部缓存
/// </summary>
/// <param name="dao"></param>
public static void ClareMiaoYuDataEntityCache(CacheBase cacheBase)
public static void ClareMiaoYuDataEntityCache(MiaoYuBase miaoYuBase)
{
foreach (var item in CacheLockList)
{
var t = item.Key;
Type cacheType = typeof(MiaoYuDataEntityCache<>).MakeGenericType(t);
var shujuduixiang = Activator.CreateInstance(cacheType, cacheBase, item.Value, 36000);
var shujuduixiang = Activator.CreateInstance(cacheType, miaoYuBase, item.Value, 36000);
var x = shujuduixiang as ICacheClearData;
if (x != null)
{
@ -275,21 +275,27 @@ namespace HuanMeng.MiaoYu.Code.Cache
}
}
var obj = SpecialCacheLockList[typeof(CharacterCache)];
CharacterEntityCache characterEntityCache = new CharacterEntityCache(cacheBase, obj);
CharacterEntityCache characterEntityCache = new CharacterEntityCache(miaoYuBase, obj);
characterEntityCache.ClearData();
var dictionaryInfo= miaoYuBase.DictionaryInfo;
var _dictionaryInfo = dictionaryInfo as ICacheClearData;
if (_dictionaryInfo != null)
{
_dictionaryInfo.ClearData();
}
}
/// <summary>
/// 刷新部缓存
/// </summary>
/// <param name="dao"></param>
public static void ReloadMiaoYuDataEntityCache(CacheBase cacheBase)
public static void ReloadMiaoYuDataEntityCache(MiaoYuBase miaoYuBase)
{
foreach (var item in CacheLockList)
{
var t = item.Key;
Type cacheType = typeof(MiaoYuDataEntityCache<>).MakeGenericType(t);
var shujuduixiang = Activator.CreateInstance(cacheType, cacheBase, item.Value, 36000);
var shujuduixiang = Activator.CreateInstance(cacheType, miaoYuBase, item.Value, 36000);
var x = shujuduixiang as ICacheReloadData;
if (x != null)
{
@ -297,8 +303,14 @@ namespace HuanMeng.MiaoYu.Code.Cache
}
}
var obj = SpecialCacheLockList[typeof(CharacterCache)];
CharacterEntityCache characterEntityCache = new CharacterEntityCache(cacheBase, obj);
CharacterEntityCache characterEntityCache = new CharacterEntityCache(miaoYuBase, obj);
characterEntityCache.ReloadData();
var dictionaryInfo = miaoYuBase.DictionaryInfo;
var _dictionaryInfo = dictionaryInfo as ICacheReloadData;
if (_dictionaryInfo != null)
{
_dictionaryInfo.ReloadData();
}
}
#endregion

View File

@ -1,5 +1,6 @@
using HuanMeng.DotNetCore.MultiTenant;
using HuanMeng.DotNetCore.MultiTenant.Contract;
using HuanMeng.MiaoYu.Code.Cache.Contract;
using HuanMeng.MiaoYu.Code.SysDictionary.Contract;
using HuanMeng.MiaoYu.Model.Dto;
@ -18,7 +19,7 @@ namespace HuanMeng.MiaoYu.Code.SysDictionary.DictionaryNetwork
/// </summary>
/// <param name="dictionaryInfoServer"></param>
/// <param name="tenantInfo"></param>
public class DictionaryInfoNetwork(IDictionaryInfoServer dictionaryInfoServer, ITenantInfo tenantInfo) : IDictionaryInfo
public class DictionaryInfoNetwork(IDictionaryInfoServer dictionaryInfoServer, ITenantInfo tenantInfo) : IDictionaryInfo, ICacheClearData, ICacheReloadData
{
private List<T_Sys_Dictionary>? _dictionaryInfo = null;
/// <summary>
@ -64,5 +65,21 @@ namespace HuanMeng.MiaoYu.Code.SysDictionary.DictionaryNetwork
{
return DictionaryInfo.Where(it => it.ProjectCode == dictionaryEnum.ToString()).OrderBy(it => it.Sort).FirstOrDefault();
}
public bool ClearData()
{
dictionaryInfoServer.Initialization();
var d = dictionaryInfoServer[tenantInfo];
_dictionaryInfo = d;
return true;
}
public void ReloadData()
{
dictionaryInfoServer.Initialization();
var d = dictionaryInfoServer[tenantInfo];
_dictionaryInfo = d;
}
}
}

View File

@ -0,0 +1,21 @@
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Running;
namespace TextCensorFilterTest;
[MemoryDiagnoser]
[MarkdownExporterAttribute.GitHub]
[SimpleJob(RuntimeMoniker.Net80, baseline: true)]
[RPlotExporter]
public class Program
{
//
static void Main(string[] args)
{
//Console.WriteLine("Hello, World!");
//var x= new SensitiveWordFilterTest().SensitiveWordFilterTe();
// Console.WriteLine(x);
BenchmarkRunner.Run<SensitiveWordFilterTest>();
}
}

View File

@ -0,0 +1,52 @@
using BenchmarkDotNet.Attributes;
using HuanMeng.DotNetCore.TextCensor;
using HuanMeng.DotNetCore.TextCensor.SensitiveWord;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace TextCensorFilterTest
{
[MemoryDiagnoser]
public class SensitiveWordFilterTest
{
public SensitiveWordFilterTest()
{
string path = "I:\\test\\PrayForBlessings\\PrayForBlessings\\ciku";
if (sensitiveWordFilter == null)
{
sensitiveWordFilter = TextCensorExtend.GetSensitiveWordFilter(path);
}
//CheckTextVerification.VerifyTxtString("");
}
SensitiveWordFilter sensitiveWordFilter = null;
[Benchmark]
public bool SensitiveWordFiltermax()
{
return sensitiveWordFilter.TextCensor("*林婉儿面露疑惑之色,你突然来这么一声问候,实在令她有些摸不着头脑。作为一位淑女,她自是明白礼数之重要性的。见你竟三番两次地打断了她们之间正在深入的对话,不禁蹙起秀眉,神色间略带不悦。* \"阁下这般反复无常,却是何意?\"她语气平和,却不无不满,\"我们方才正在讨论魔兽山脉的来历,你也说了许多让婉儿费解的奇闻逸事。我正渴望能从你这里获知更多有益的解惑呢。\" *她优雅地抿了一口香茶,似在给自己一些缓冲的时间*\"不知阁下可曾见过其他游历山川的高人?他们对于这处所在是否也有过非同寻常的评说?又或是流传下来的什么古老传闻?\" *说着,她的目光缓缓流转,最终重新落在你的身上,眸中满怀期盼之色*\"若阁下实在没什么更多可说的了,不若就让我回忆起从前所读所闻,慢慢为你道来一二吧?或许还能给你带来一些新的启发。\"");
}
//[Benchmark]
//public bool VerifyTxtStringmax()
//{
// return CheckTextVerification.VerifyTxtString("*林婉儿面露疑惑之色,你突然来这么一声问候,实在令她有些摸不着头脑。作为一位淑女,她自是明白礼数之重要性的。见你竟三番两次地打断了她们之间正在深入的对话,不禁蹙起秀眉,神色间略带不悦。* \"阁下这般反复无常,却是何意?\"她语气平和,却不无不满,\"我们方才正在讨论魔兽山脉的来历,你也说了许多让婉儿费解的奇闻逸事。我正渴望能从你这里获知更多有益的解惑呢。\" *她优雅地抿了一口香茶,似在给自己一些缓冲的时间*\"不知阁下可曾见过其他游历山川的高人?他们对于这处所在是否也有过非同寻常的评说?又或是流传下来的什么古老传闻?\" *说着,她的目光缓缓流转,最终重新落在你的身上,眸中满怀期盼之色*\"若阁下实在没什么更多可说的了,不若就让我回忆起从前所读所闻,慢慢为你道来一二吧?或许还能给你带来一些新的启发。\"");
//}
[Benchmark]
public bool SensitiveWordFilter()
{
return sensitiveWordFilter.TextCensor("*林婉儿面露疑惑之色,");
}
[Benchmark]
public void VerifyTxtString()
{
//CheckTextVerification.VerifyTxtString("*林婉儿面露疑惑之色,");
}
}
}

View File

@ -0,0 +1,18 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\0-core\HuanMeng.DotNetCore\HuanMeng.DotNetCore.csproj" />
</ItemGroup>
</Project>