爬虫(转载的)
Code
你需要先得到網頁編碼。下面這段代碼可以解決大部分的網頁?
??private?void?button3_Click(object?sender,?EventArgs?e)
????{
??????String[]?UrlList?=?{
?????????????????????????"http://www.kbs.co.kr/",
?????????????????????????"http://rosemary.kbs.co.kr/",
?????????????????????????"http://sbcx.saic.gov.cn/trade/index.jsp",
?????????????????????????"http://www.csdn.net",
?????????????????????????"http://www.google.cn/",
?????????????????????????"http://www.baidu.com",
?????????????????????????"http://www.javaeye.com/",
?????????????????????????"http://blog.163.com/kel_scott66/blog/static/1150539632009614115635700/",
?????????????????????????"http://www.sina.com.hk/",
?????????????????????????"http://www.rthk.org.hk/"
????????????????????????};
??????foreach?(String?u?in?UrlList)
??????{
????????textBox1.Text?=?GetWebPage(u,?"GET");
????????MessageBox.Show(u);
??????}
????}
????public?string?GetWebPage(string?uri,?string?method)
????{
??????try
??????{
????????HttpWebRequest?req?=?(HttpWebRequest)WebRequest.Create(uri);
????????req.Method?=?method;
????????req.Timeout?=?10000;
????????req.UserAgent?=?"Mozilla/5.0?(Windows;?U;?Windows?NT?5.2;?zh-CN;?rv:1.9.1.4)?Gecko/20091016?Firefox/3.5.4?(.NET?CLR?3.5.30729)";
????????String?ReturnedEncoding?=?"";
????????HttpWebResponse?res?=?req.GetResponse()?as?HttpWebResponse;
????????Stream?ReceiveStream?=?res.GetResponseStream();
????????StreamReader?sr?=?new?StreamReader(ReceiveStream,?Encoding.UTF8);
????????string?ReturnedContent?=?sr.ReadToEnd();
????????if?(ReturnedEncoding?==?"")
????????{
??????????//string?h?=?"<meta?http-equiv='Content-Type'?content='text/html;?charset=big5'>";
??????????Regex?reg_charset?=?new?Regex(@"charset\b\s*=\s*(?<charset>[^""|^'']*)");
??????????if?(reg_charset.IsMatch(ReturnedContent))
??????????{
????????????ReturnedEncoding?=?reg_charset.Match(ReturnedContent).Groups["charset"].Value;
??????????}
????????}
????????if?(ReturnedEncoding?==?"")
????????{
??????????String?ct?=?res.ContentType.ToLower().Replace("?",?"");
??????????if?(ct.IndexOf("charset")?>?-1)
??????????{
????????????ReturnedEncoding?=?ct.Substring(ct.IndexOf("charset=")?+?8);
??????????}
????????}
????????if?(ReturnedEncoding?==?"")
????????{
??????????ReturnedEncoding?=?res.ContentEncoding;
????????}
????????if?(ReturnedEncoding?==?"")
????????{
??????????ReturnedEncoding?=?res.CharacterSet;
????????}
????????Encoding?HtmlEncoding?=?Encoding.Default;
????????if?(ReturnedEncoding?!=?"")
????????{
??????????HtmlEncoding?=?Encoding.GetEncoding(ReturnedEncoding);
????????}
????????req?=?(HttpWebRequest)WebRequest.Create(uri);
????????req.Method?=?method;
????????req.Timeout?=?10000;
????????req.UserAgent?=?"Mozilla/5.0?(Windows;?U;?Windows?NT?5.2;?zh-CN;?rv:1.9.1.4)?Gecko/20091016?Firefox/3.5.4?(.NET?CLR?3.5.30729)";
????????res?=?req.GetResponse()?as?HttpWebResponse;
????????ReceiveStream?=?res.GetResponseStream();
????????sr?=?new?StreamReader(ReceiveStream,?HtmlEncoding);
????????ReturnedContent?=?sr.ReadToEnd();
????????return?ReturnedContent;
??????}
??????catch
??????{
????????return?"獲取失敗!";
??????}
????}
你需要先得到網頁編碼。下面這段代碼可以解決大部分的網頁?
??private?void?button3_Click(object?sender,?EventArgs?e)
????{
??????String[]?UrlList?=?{
?????????????????????????"http://www.kbs.co.kr/",
?????????????????????????"http://rosemary.kbs.co.kr/",
?????????????????????????"http://sbcx.saic.gov.cn/trade/index.jsp",
?????????????????????????"http://www.csdn.net",
?????????????????????????"http://www.google.cn/",
?????????????????????????"http://www.baidu.com",
?????????????????????????"http://www.javaeye.com/",
?????????????????????????"http://blog.163.com/kel_scott66/blog/static/1150539632009614115635700/",
?????????????????????????"http://www.sina.com.hk/",
?????????????????????????"http://www.rthk.org.hk/"
????????????????????????};
??????foreach?(String?u?in?UrlList)
??????{
????????textBox1.Text?=?GetWebPage(u,?"GET");
????????MessageBox.Show(u);
??????}
????}
????public?string?GetWebPage(string?uri,?string?method)
????{
??????try
??????{
????????HttpWebRequest?req?=?(HttpWebRequest)WebRequest.Create(uri);
????????req.Method?=?method;
????????req.Timeout?=?10000;
????????req.UserAgent?=?"Mozilla/5.0?(Windows;?U;?Windows?NT?5.2;?zh-CN;?rv:1.9.1.4)?Gecko/20091016?Firefox/3.5.4?(.NET?CLR?3.5.30729)";
????????String?ReturnedEncoding?=?"";
????????HttpWebResponse?res?=?req.GetResponse()?as?HttpWebResponse;
????????Stream?ReceiveStream?=?res.GetResponseStream();
????????StreamReader?sr?=?new?StreamReader(ReceiveStream,?Encoding.UTF8);
????????string?ReturnedContent?=?sr.ReadToEnd();
????????if?(ReturnedEncoding?==?"")
????????{
??????????//string?h?=?"<meta?http-equiv='Content-Type'?content='text/html;?charset=big5'>";
??????????Regex?reg_charset?=?new?Regex(@"charset\b\s*=\s*(?<charset>[^""|^'']*)");
??????????if?(reg_charset.IsMatch(ReturnedContent))
??????????{
????????????ReturnedEncoding?=?reg_charset.Match(ReturnedContent).Groups["charset"].Value;
??????????}
????????}
????????if?(ReturnedEncoding?==?"")
????????{
??????????String?ct?=?res.ContentType.ToLower().Replace("?",?"");
??????????if?(ct.IndexOf("charset")?>?-1)
??????????{
????????????ReturnedEncoding?=?ct.Substring(ct.IndexOf("charset=")?+?8);
??????????}
????????}
????????if?(ReturnedEncoding?==?"")
????????{
??????????ReturnedEncoding?=?res.ContentEncoding;
????????}
????????if?(ReturnedEncoding?==?"")
????????{
??????????ReturnedEncoding?=?res.CharacterSet;
????????}
????????Encoding?HtmlEncoding?=?Encoding.Default;
????????if?(ReturnedEncoding?!=?"")
????????{
??????????HtmlEncoding?=?Encoding.GetEncoding(ReturnedEncoding);
????????}
????????req?=?(HttpWebRequest)WebRequest.Create(uri);
????????req.Method?=?method;
????????req.Timeout?=?10000;
????????req.UserAgent?=?"Mozilla/5.0?(Windows;?U;?Windows?NT?5.2;?zh-CN;?rv:1.9.1.4)?Gecko/20091016?Firefox/3.5.4?(.NET?CLR?3.5.30729)";
????????res?=?req.GetResponse()?as?HttpWebResponse;
????????ReceiveStream?=?res.GetResponseStream();
????????sr?=?new?StreamReader(ReceiveStream,?HtmlEncoding);
????????ReturnedContent?=?sr.ReadToEnd();
????????return?ReturnedContent;
??????}
??????catch
??????{
????????return?"獲取失敗!";
??????}
????}
轉載于:https://www.cnblogs.com/z2002m/archive/2009/11/09/1599176.html
總結
- 上一篇: 即将发布的文章
- 下一篇: silverlight中如何将strin