网络神采关键词过滤NET插件
生活随笔
收集整理的這篇文章主要介紹了
网络神采关键词过滤NET插件
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
此處代碼僅供參考,完整代碼請下載附件閱讀。
不說廢話,直接貼代碼:
插件接口實現:
using System; using System.Collections.Generic; using System.Collections.Specialized; using System.Text; using System.Windows.Forms; using System.IO; using Bget.Plugin;namespace HX_Plug {public class Plug : IBget //插件接口{//創建插件public void Create(string taskPath, string pluginPath, BgetInformation bgetInfo, Action action, bool firstCall){this.WriteLog("創建插件...");}//銷毀插件public void Dispose(Bget.Plugin.Action action){this.WriteLog("銷毀插件...");}//正在下載內容文件public void DownloadContentFile(string url, string path, bool skipIfFileExisted, string cookie, string referer){this.WriteLog("正在下載內容文件...");}//正在下載獨立文件public string DownloadSingleFile(string url, string path, string fileNamePrefix, bool skipIfFileExisted, string cookie, string referer){this.WriteLog("正在下載獨立文件...");return fileNamePrefix + Path.GetFileName(path);}//提取結果public string ExtractResult(string extractionRule, string dataColumn, string htmlText, string url){this.WriteLog("提取結果...");return "";}//正在進行采集結果篩選public bool Filter(string result, string extractionRule, string dataColumn, System.Data.DataRow extractingResultRow){this.WriteLog("正在進行采集結果篩選...");return true;}//所需選項public RequiredOptions GetRequiredOptions(){this.WriteLog("所需選項...");return RequiredOptions.None;}public Form GetSettingForm(string taskPath, string pluginPath, Bget.Plugin.BgetInformation bgetInfo){return new hx_Plug();}//獲取代理public BgetWebProxy GetWebProxy(string requestingUrl, int retryTimes){this.WriteLog("獲取代理...");return null;}//從數據庫載入起始地址public string LoadStartingUrl(string template, ref int position, string cookie){this.WriteLog("從數據庫載入起始地址...");return "http://www.sensite.cn";}//正在登錄public string Login(string url){this.WriteLog("正在登錄...");return "";}//選擇下一層網址public StringCollection PickNextLayerUrls(string htmlText, string layer, string url, string cookie){return null;}//選擇下一個網頁網址public string PickNextPageUrl(string htmlText, string layer, string url, string cookie){return "";}//正在處理下載后的內容文件public void ProcessContentFile(string path, bool skipped){this.WriteLog("正在處理下載后的內容文件...");}//正在處理結果數據行public bool ProcessResultRow(System.Data.DataRow extractedResultRow){this.WriteLog("《紅星關鍵字過濾插件 V1.0》");this.WriteLog(string.Format("過濾:{0}", extractedResultRow[0].ToString()));KeywordFilter keyFilter = new KeywordFilter();extractedResultRow[1] = keyFilter.On_Filter(extractedResultRow[1].ToString());return true;}//正在處理下載后的獨立文件public string ProcessSingleFile(string path, string fileNamePrefix, bool skipped){this.WriteLog("正在處理下載后的獨立文件...");return fileNamePrefix + Path.GetFileName(path);}//正在請求URLpublic string Visit(string url, byte[] postData, string layer, string cookie, string referer){this.WriteLog("正在請求URL: " + url);return "<html>test</html>";}public event LogEventHanlder Log;private void WriteLog(string message){if (this.Log != null){this.Log(this, new LogEventArgs(message));}}private void WriteLog(string message, int indent){if (this.Log != null){this.Log(this, new LogEventArgs(message, indent));}} } }具體實現功能代碼: using System; using System.Collections.Generic; using System.Text; using System.Data; using System.Windows.Forms;namespace HX_Plug {/// <summary>/// 文章內容過濾類/// </summary>public class KeywordFilter{private List<FilterStruct> _filter = new List<FilterStruct>();/// <summary>/// 構造函數,初始化關鍵字集合/// </summary>public KeywordFilter(){DataTable dt = new DataTable();using (DBase db = new DBase()){dt = db.GetDataTable("select oldValue,newValue from Filter");}if (dt != null){if (dt.Rows.Count != 0){for (int i = 0; i < dt.Rows.Count; i++){FilterStruct fil = new FilterStruct();fil.OldValue = dt.Rows[i][0].ToString();fil.NewValue = dt.Rows[i][1].ToString();_filter.Add(fil);}}}}/// <summary>/// 關鍵詞過濾/// </summary>/// <param name="Content">內容</param>/// <returns>過濾后的內容</returns>public string On_Filter(string Content){Content = ReplaceKeyword(Content); //常規關鍵詞過濾Content = SubContent(Content, 2000); //切割文章為指定長度Content = SpltParagraph(Content); //打亂句子if(Content != string.Empty)Content += "《紅星關鍵字過濾系統V1.0》";return Content;}/// <summary>/// 過濾常規關鍵詞/// </summary>/// <param name="Content">內容</param>/// <returns>過濾結果</returns>private string ReplaceKeyword(string Content){for (int i = 0; i < _filter.Count; i++){Content = Content.Replace(_filter[i].OldValue, _filter[i].NewValue);}return Content += _filter.Count.ToString();}/// <summary>/// 句子打亂/// </summary>/// <param name="Content">原始內容</param>/// <returns>打亂結果</returns>private string SpltParagraph(string Content){string[] Paragraph = Content.Split('。');string src = string.Empty;if (Paragraph.Length != 0 && Paragraph.Length > 5){//隨即交換一部分文章以句號分割的段落Random r = new Random();for (int i = 0; i < Paragraph.Length / 20; i++){Paragraph = RandomParagraph(r.Next(Paragraph.Length), r.Next(Paragraph.Length), Paragraph);}//重新組合文章內容for (int i = 0; i < Paragraph.Length; i++){if (i == 0){string line = Paragraph[i].ToString();if (line.Length > 8){line = line.Replace(",", string.Empty);line = line.Replace("\"", string.Empty);line = line.Replace(",", string.Empty);line = line.Replace("“", string.Empty);line = line.Replace("”", string.Empty);line = line.Replace(" ", string.Empty);line = "<h3>" + line.Substring(0, 8) + "</h3>";}src += (line + "<p>" + Paragraph[i].ToString());}else if (i % 5 == 0){string line = Paragraph[i].ToString();if (line.Length > 8){line = line.Replace(",", string.Empty);line = line.Replace("\"", string.Empty);line = line.Replace(",", string.Empty);line = line.Replace("“", string.Empty);line = line.Replace("”", string.Empty);line = line.Replace(" ", string.Empty);line = "<h3>" + line.Substring(0, 8) + "</h3>";}src += ("。</p>" + line + "<p>" + Paragraph[i].ToString());}else{src += Paragraph[i].ToString();}}return src;}else{return Content;}}/// <summary>/// 隨即交換文章內容/// </summary>/// <param name="start">起始交換處</param>/// <param name="end">結束交換處</param>/// <param name="Paragraph">段落集合</param>/// <returns>交換結果</returns>private string[] RandomParagraph(int start, int end, string[] Paragraph){if (start != end && start < Paragraph.Length && end < Paragraph.Length){string swap = string.Empty;swap = Paragraph[start].ToString();Paragraph[start] = Paragraph[end].ToString();Paragraph[end] = swap;return Paragraph;}else{return Paragraph;}}/// <summary>/// 切割文章為指定長度/// </summary>/// <param name="Content">文章內容</param>/// <param name="length">切割長度</param>/// <returns>切割結果</returns>private string SubContent(string Content, int length){if (Content.Length > length){return Content = Content.Substring(0, length);}else if (Content.Length < 300){return string.Empty;}else{return Content;}}}/// <summary>/// 關鍵詞過濾數據結構/// </summary>public struct FilterStruct{/// <summary>/// 被替換的字符/// </summary>public string OldValue;/// <summary>/// 替換后的字符/// </summary>public string NewValue;} }
數據庫底層連接類:(我把這個類寫成了個通用的DLL,N久都沒換過了) using System; using System.Collections.Generic; using System.Text; using System.Data; using System.Data.OleDb; using System.Windows.Forms;namespace HX_Plug {/// <summary>///數據庫基本操作類,提供Access數據庫基本操作,生存于數據層/// </summary>public class DBase : IDisposable{/// <summary>/// 數據庫是否打開成功標志。成功:True,失敗False。/// </summary>public bool Is_OpenState = false;/// <summary>/// Access數據庫連接字符串/// </summary>private string strOleConn;/// <summary>/// Access數據庫連接對象/// </summary>private OleDbConnection oleConn;/// <summary>/// 構造函數,初始化數據庫連接,但不打開數據庫/// 使用步驟:1.構造對象。2.檢測Is_OpenState是否打開成功。3.操作數據庫。4.自動或手工釋放資源/// </summary>public DBase(){strOleConn = "Provider=Microsoft.Jet.OLEDB.4.0;Data source=User.mdb;"; //數據庫連接字符串oleConn = new OleDbConnection(strOleConn); //實例化數據庫連接對象Is_OpenState = Open(); //設置當前數據庫打開的狀態}/// <summary>/// 打開數據庫/// </summary>/// <returns>數據庫打開是否成功。</returns>private bool Open(){try{//如果當前連接狀態為關閉狀態,則打開數據庫連接if (oleConn.State == ConnectionState.Closed){oleConn.Open();}return true;}catch{return false;}}/// <summary>/// 關閉數據庫/// </summary>/// <returns>數據庫打開是否成功。</returns>private bool Close(){try{//如果當前連接狀態為打開狀態,則關閉數據庫連接if (oleConn.State == ConnectionState.Open){oleConn.Close();}return true;}catch{return false;}}/// <summary>/// 釋放資源/// </summary>public void Dispose(){Close(); //關閉連接if (oleConn != null) //銷毀對象{oleConn.Dispose();}}/// <summary>/// 析構函數,自動釋放資源/// </summary>~DBase(){Dispose(); //釋放資源}/// <summary>/// 執行SqlCommand語句,返回一個DataTable/// </summary>/// <param name="sqlCommand">SqlCommand語句</param>/// <returns>執行成功返回DataTable對象,否則返回Null</returns>public DataTable GetDataTable(string sqlCommand){DataSet ds = new DataSet();try{OleDbDataAdapter da = new OleDbDataAdapter(sqlCommand, oleConn);da.Fill(ds);int i = ds.Tables[0].Rows.Count;return ds.Tables[0];}catch{return null;}}void IDisposable.Dispose(){}} }
悲哀,沒有找到上傳附件發功能。需要的話給我留個消息吧,我給你發過去。
附修改:
由于上網時間比較少,急需源代碼的童鞋可以直接發送郵件To:549015917@qq.com;注明標題和內容,這樣可以得到最快的處理!
總結
以上是生活随笔為你收集整理的网络神采关键词过滤NET插件的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Ubuntu18安装Realsense
- 下一篇: 软件测试有效性指标,评价软件测试的有效性