<kbd id="afajh"><form id="afajh"></form></kbd>
<strong id="afajh"><dl id="afajh"></dl></strong>
    <del id="afajh"><form id="afajh"></form></del>
        1. <th id="afajh"><progress id="afajh"></progress></th>
          <b id="afajh"><abbr id="afajh"></abbr></b>
          <th id="afajh"><progress id="afajh"></progress></th>

          推薦基于.NetCore一款高性能敏感詞檢測開源庫

          共 7151字,需瀏覽 15分鐘

           ·

          2022-10-19 11:08

          今天給大家推薦一款高性能敏感詞檢測開源庫。

          項目簡介

          這是一款基于.Net開發(fā)的、高性能敏感詞工具箱,支持繁簡互換、全角半角互換,拼音模糊搜索等功能。功能強大、高性能,秒級檢測億級別的文章。

          技術(shù)架構(gòu)

          1、跨平臺:采用.Net Core3.1開發(fā),支持跨平臺??梢圆渴鹪贒ocker, Windows, Linux, Mac。

          項目結(jié)構(gòu)


          使用方法

          敏感詞檢測

          過濾敏感詞,可以設(shè)置跳字長度,默認(rèn)全角轉(zhuǎn)半角、忽略大小寫、跳詞、重復(fù)詞、黑名單。返回結(jié)果包含:關(guān)鍵字、關(guān)鍵字起始位置、結(jié)束位置、關(guān)鍵字序號等信息。

              string s = "中國|國人|zg人";    string test = "我是中國人";
          StringSearch iwords = new StringSearch(); iwords.SetKeywords(s.Split('|')); var b = iwords.ContainsAny(test);    Assert.AreEqual(true, b);
          var f = iwords.FindFirst(test); Assert.AreEqual("中國", f);
          var all = iwords.FindAll(test); Assert.AreEqual("中國", all[0]); Assert.AreEqual("國人", all[1]);    Assert.AreEqual(2, all.Count);
          var str = iwords.Replace(test, '*'); Assert.AreEqual("我是***", str);

          敏感詞通配符檢測

          支持正則表達式類型:.?[]|,通過正則表達式可以進行模糊匹配,提升檢測精準(zhǔn)度。

              string s = ".[中美]國|國人|zg人";    string test = "我是中國人";
          WordsMatch wordsSearch = new WordsMatch();    wordsSearch.SetKeywords(s.Split('|'));
          var b = wordsSearch.ContainsAny(test);    Assert.AreEqual(true, b);
          var f = wordsSearch.FindFirst(test);    Assert.AreEqual("是中國", f.Keyword);
          var alls = wordsSearch.FindAll(test); Assert.AreEqual("是中國", alls[0].Keyword); Assert.AreEqual(".[中美]國", alls[0].MatchKeyword); Assert.AreEqual(1, alls[0].Start); Assert.AreEqual(3, alls[0].End); Assert.AreEqual(0, alls[0].Index);//返回索引Index,默認(rèn)從0開始 Assert.AreEqual("國人", alls[1].Keyword);    Assert.AreEqual(2, alls.Count);
          var t = wordsSearch.Replace(test, '*'); Assert.AreEqual("我****", t);

          拼音轉(zhuǎn)換、繁簡轉(zhuǎn)換、數(shù)字轉(zhuǎn)大小寫操作

          此工具箱,集成了繁體簡體互轉(zhuǎn)、拼音轉(zhuǎn)換、首字母提取、數(shù)字轉(zhuǎn)大小寫,使用例子如下:

          // 轉(zhuǎn)成簡體    WordsHelper.ToSimplifiedChinese("我愛中國");    WordsHelper.ToSimplifiedChinese("我愛中國",1);// 港澳繁體 轉(zhuǎn) 簡體    WordsHelper.ToSimplifiedChinese("我愛中國",2);// 臺灣正體 轉(zhuǎn) 簡體    // 轉(zhuǎn)成繁體    WordsHelper.ToTraditionalChinese("我愛中國");    WordsHelper.ToTraditionalChinese("我愛中國",1);// 簡體 轉(zhuǎn) 港澳繁體    WordsHelper.ToTraditionalChinese("我愛中國",2);// 簡體 轉(zhuǎn) 臺灣正體    // 轉(zhuǎn)成全角    WordsHelper.ToSBC("abcABC123");    // 轉(zhuǎn)成半角    WordsHelper.ToDBC("abcABC123");    // 數(shù)字轉(zhuǎn)成中文大寫    WordsHelper.ToChineseRMB(12345678901.12);    // 中文轉(zhuǎn)成數(shù)字    WordsHelper.ToNumber("壹佰貳拾叁億肆仟伍佰陸拾柒萬捌仟玖佰零壹元壹角貳分");    // 獲取全拼    WordsHelper.GetPinyin("我愛中國");//WoAiZhongGuo       WordsHelper.GetPinyin("我愛中國",",");//Wo,Ai,Zhong,Guo       WordsHelper.GetPinyin("我愛中國",true);//WǒàiZhōngGuó
          // 獲取首字母 WordsHelper.GetFirstPinyin("我愛中國");//WAZG // 獲取全部拼音 WordsHelper.GetAllPinyin('傳');//Chuan,Zhuan // 獲取姓名 WordsHelper.GetPinyinForName("單一一")//ShanYiYi WordsHelper.GetPinyinForName("單一一",",")//Shan,Yi,Yi WordsHelper.GetPinyinForName("單一一",true)//ShànYīYī


          性能對比

          下面我們用戶1000字字符串,進行10萬次性能對比,看看對比結(jié)果,測試代碼如下:

                      ReadBadWord();            var text = File.ReadAllText("Talk.txt");
          Console.Write("-------------------- FindFirst OR ContainsAny 100000次 --------------------"); Run("TrieFilter", () => { tf1.HasBadWord(text); }); Run("FastFilter", () => { ff.HasBadWord(text); }); Run("StringSearch(ContainsAny)", () => { stringSearch.ContainsAny(text); }); Run("StringSearchEx(ContainsAny)--- WordsSearchEx(ContainsAny)代碼相同", () => { stringSearchEx.ContainsAny(text); }); Run("StringSearchEx2(ContainsAny)--- WordsSearchEx2(ContainsAny)代碼相同", () => { stringSearchEx2.ContainsAny(text); }); Run("StringSearchEx3(ContainsAny)--- WordsSearchEx3(ContainsAny)代碼相同", () => { stringSearchEx3.ContainsAny(text); }); Run("IllegalWordsSearch(ContainsAny)", () => { illegalWordsSearch.ContainsAny(text); });

          Run("StringSearch(FindFirst)", () => { stringSearch.FindFirst(text); }); Run("StringSearchEx(FindFirst)", () => { stringSearchEx.FindFirst(text); }); Run("StringSearchEx2(FindFirst)", () => { stringSearchEx2.FindFirst(text); }); Run("StringSearchEx3(FindFirst)", () => { stringSearchEx3.FindFirst(text); }); Run("WordsSearch(FindFirst)", () => { wordsSearch.FindFirst(text); }); Run("WordsSearchEx(FindFirst)", () => { wordsSearchEx.FindFirst(text); }); Run("WordsSearchEx2(FindFirst)", () => { wordsSearchEx2.FindFirst(text); }); Run("WordsSearchEx3(FindFirst)", () => { wordsSearchEx3.FindFirst(text); }); Run("IllegalWordsSearch(FindFirst)", () => { illegalWordsSearch.FindFirst(text); });



          Console.Write("-------------------- Find All 100000次 --------------------"); Run("TrieFilter(FindAll)", () => { tf1.FindAll(text); }); Run("FastFilter(FindAll)", () => { ff.FindAll(text); }); Run("StringSearch(FindAll)", () => { stringSearch.FindAll(text); }); Run("StringSearchEx(FindAll)", () => { stringSearchEx.FindAll(text); }); Run("StringSearchEx2(FindAll)", () => { stringSearchEx2.FindAll(text); }); Run("StringSearchEx3(FindAll)", () => { stringSearchEx3.FindAll(text); });

          Run("WordsSearch(FindAll)", () => { wordsSearch.FindAll(text); }); Run("WordsSearchEx(FindAll)", () => { wordsSearchEx.FindAll(text); }); Run("WordsSearchEx2(FindAll)", () => { wordsSearchEx2.FindAll(text); }); Run("WordsSearchEx3(FindAll)", () => { wordsSearchEx3.FindAll(text); }); Run("IllegalWordsSearch(FindAll)", () => { illegalWordsSearch.FindAll(text); });

          Console.Write("-------------------- Replace 100000次 --------------------"); Run("TrieFilter(Replace)", () => { tf1.Replace(text); }); Run("FastFilter(Replace)", () => { ff.Replace(text); }); Run("StringSearch(Replace)", () => { stringSearch.Replace(text); }); Run("WordsSearch(Replace)", () => { wordsSearch.Replace(text); }); Run("StringSearchEx(Replace)--- WordsSearchEx(Replace)代碼相同", () => { stringSearchEx.Replace(text); }); Run("StringSearchEx2(Replace)--- WordsSearchEx2(Replace)代碼相同", () => { stringSearchEx2.Replace(text); }); Run("StringSearchEx3(Replace)--- WordsSearchEx3(Replace)代碼相同", () => { stringSearchEx3.Replace(text); }); Run("IllegalWordsSearch(Replace)", () => { illegalWordsSearch.Replace(text); });

          Console.Write("-------------------- Regex 100次 --------------------"); Run(100, "Regex.IsMatch", () => { re.IsMatch(text); }); Run(100, "Regex.Match", () => { re.Match(text); }); Run(100, "Regex.Matches", () => { re.Matches(text); });

          Console.Write("-------------------- Regex used Trie tree 100次 --------------------"); Run(100, "Regex.IsMatch", () => { re2.IsMatch(text); }); Run(100, "Regex.Match", () => { re2.Match(text); }); Run(100, "Regex.Matches", () => { re2.Matches(text); });

          執(zhí)行10萬次性能對比,結(jié)果如下:

          從測試結(jié)果看,此工具比C#自帶的正則效率高8.8倍,如果數(shù)量量越大性能優(yōu)勢越明顯。

          瀏覽 99
          點贊
          評論
          收藏
          分享

          手機掃一掃分享

          分享
          舉報
          評論
          圖片
          表情
          推薦
          點贊
          評論
          收藏
          分享

          手機掃一掃分享

          分享
          舉報
          <kbd id="afajh"><form id="afajh"></form></kbd>
          <strong id="afajh"><dl id="afajh"></dl></strong>
            <del id="afajh"><form id="afajh"></form></del>
                1. <th id="afajh"><progress id="afajh"></progress></th>
                  <b id="afajh"><abbr id="afajh"></abbr></b>
                  <th id="afajh"><progress id="afajh"></progress></th>
                  8050网午夜一级 | 三级毛骗免费看电影 | 国产乱伦黄片 | 酒店操美女| 一本色道无码道 |