猫眼api html,爬取猫眼电影数据(示例代码)
1 static void Main(string[] args)2 {3 int errorCount = 0;//計算爬取失敗的次數(shù)
4 int count = 450;//結(jié)束范圍
5 for (int i = 401; i <= count; i++)6 {7 Thread.Sleep(2000);//每隔兩秒爬取一次,不要給服務器壓力
8 try
9 {10 HtmlWeb web = newHtmlWeb();11 //https://maoyan.com/films/1
12 web.OverrideEncoding =Encoding.UTF8;13 HtmlDocument doc = web.Load($"https://maoyan.com/films/{i}");//把url中的1替換為i
14
15 HtmlDocument htmlDoc = newHtmlDocument();16 string url = $"https://maoyan.com/films/{i}";17
18 //獲取電影名
19 HtmlNode MovieTitle = doc.DocumentNode.SelectSingleNode("//div[@class=‘movie-brief-container‘]/h1[@class=‘name‘]");//分析頁面結(jié)構(gòu)后得到的div20 if (MovieTitle == null)//如果是null,那么表明進入驗證頁面了,執(zhí)行第二種方法
21 {22 string urlResponse =URLRequest(url);23 htmlDoc.LoadHtml(urlResponse);24 MovieTitle = htmlDoc.DocumentNode.SelectSingleNode("//div[@class=‘movie-brief-container‘]/h1[@class=‘name‘]");25 if (MovieTitle == null)//如果是null,那么表明進入驗證頁面了。(第二種方法也失效)
26 {27 //此處需要進入瀏覽器手動完成驗證 或者 自行分析驗證頁面實現(xiàn)自動驗證
28 }29 }30 string title =MovieTitle.InnerText;31 //Console.WriteLine(MovieTitle.InnerText);32
33 //獲取電影海報
34 HtmlNode MovieImgSrc = doc.DocumentNode.SelectSingleNode("//div[@class=‘celeInfo-left‘]/div[@class=‘a(chǎn)vatar-shadow‘]/img[@class=‘a(chǎn)vatar‘]");35 if (MovieImgSrc == null)36 {37
38 MovieImgSrc = htmlDoc.DocumentNode.SelectSingleNode("//div[@class=‘celeInfo-left‘]/div[@class=‘a(chǎn)vatar-shadow‘]/img[@class=‘a(chǎn)vatar‘]");39 }40 //Console.WriteLine(MovieImgSrc.GetAttributeValue("src", ""));
41 string imgurl = MovieImgSrc.GetAttributeValue("src", "");42
43 //電影類型
44 HtmlNodeCollection MovieTypes = doc.DocumentNode.SelectNodes("//div[@class=‘movie-brief-container‘]/ul/li[@class=‘ellipsis‘]");45 if (MovieTypes == null)46 {47 MovieTypes = htmlDoc.DocumentNode.SelectNodes("//div[@class=‘movie-brief-container‘]/ul/li[@class=‘ellipsis‘]");48 }49 string types = "", artime = "", releasetime = "";50
51 foreach (var item in MovieTypes[0].ChildNodes)52 {53 if (item.InnerText.Trim() != "")54 {55 //Console.WriteLine(item.InnerText.Trim());
56 types += item.InnerText.Trim() + "-";57 }58 }59 artime = MovieTypes[1].InnerText;60 releasetime = MovieTypes[2].InnerText;61 //Console.WriteLine(MovieTypes[i].InnerText);62
63 //劇情簡介
64 string intro = "";65 HtmlNode introduction = doc.DocumentNode.SelectSingleNode("//div[@class=‘mod-content‘]/span[@class=‘dra‘]");66 if (introduction == null)67 {68 introduction = htmlDoc.DocumentNode.SelectSingleNode("//div[@class=‘mod-content‘]/span[@class=‘dra‘]");69 }70 //Console.WriteLine(introduction.InnerText);
71 intro =introduction.InnerText;72 //Console.WriteLine(i);
73
74 using (FileStream fs = new FileStream(@"d:Sql.txt", FileMode.Append, FileAccess.Write))75 {76 fs.Lock(0, fs.Length);77 StreamWriter sw = newStreamWriter(fs);78 sw.WriteLine($"INSERT INTO Movies VALUES(‘{title}‘,‘{imgurl}‘,‘{types}‘,‘{artime}‘,‘{releasetime}‘,‘{intro.Trim()}‘);");79 fs.Unlock(0, fs.Length);//一定要用在Flush()方法以前,否則拋出異常。
80 sw.Flush();81 }82
83 }84 catch(Exception ex)85 {86 errorCount++;87 Console.WriteLine(ex);88 }89 }90 Console.WriteLine($"結(jié)束 成功:{count - errorCount}條,失敗:{errorCount}條");91 Console.ReadLine();92
93 }
總結(jié)
以上是生活随笔為你收集整理的猫眼api html,爬取猫眼电影数据(示例代码)的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 操作系统与操作系统内核
- 下一篇: UML系列图2