puppeteer爬取豆瓣top250电影及详细信息
生活随笔
收集整理的這篇文章主要介紹了
puppeteer爬取豆瓣top250电影及详细信息
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
const puppeteer = require('puppeteer');
const fs = require('fs');// 豆瓣top250電影
async function top250(i) {const browser = await puppeteer.launch({headless: false, //關閉無頭模式defaultViewport: {width: 1200,height: 800},timeout: 60000});const page = await browser.newPage();await page.goto(`https://movie.douban.com/top250?start=0&filter=`);let alldata = [];alldata = alldata.concat(await getdata(page));for (let i = 0; i < 5; i++) { //控制翻頁// 翻頁await Promise.all([page.waitForNavigation(),page.click(' div > div.article > div.paginator > span.next > a', { delay: 1000 }),]);alldata = alldata.concat(await getdata(page)); //封裝getdata函數console.log(`第${i + 2}頁已記錄`,'color:#0f0;');}fs.writeFile(`./output/doubantop250.json`, JSON.stringify(alldata), err => {if (err) return console.log("寫入文件失敗" + err.message);console.log('寫入成功');})page.close();console.log("窗口關閉,任務完成");browser.close();
}async function getdata(page) {let titles = await page.$$eval('div > div.info > div.hd > a > span:nth-child(1)', titles =>titles.map(x => {return {title: x.innerText}}))let details = [];// 點擊進去的詳情頁面for (let i = 1; i < 26; i++) {await Promise.all([page.waitForNavigation(),page.click(`ol > li:nth-child(${i}) div > div.info > div.hd > a`),]);// 跳轉后的頁面的詳情介紹// 判斷介紹是否展開const zk = await page.$('#link-report > span.short > a')if (zk) {await page.click('#link-report > span.short > a');//展開全部按鈕let detail = await page.$$eval('#link-report > span.all.hidden', items =>items.map(x => {return {detail: x.innerText}}))details = details.concat(detail[0]);console.log(`第${i}個詳情已采集`);// console.log(detail);} else {let detail = await page.$$eval('#link-report > span:nth-child(1)', items =>items.map(x => {return {detail: x.innerText}}))details = details.concat(detail[0]);console.log(`第${i}個詳情已采集`);// console.log(detail);}await page.goBack();}let score = await page.$$eval('div > div.info > div.bd > div > span.rating_num', items => items.map(item => {return {score: item.innerText}}))let img = await page.$$eval('div > div.article > ol > li:nth-child(n) > div > div.pic > a > img', items => items.map(item => {return {img: item.src}}))let comment = await page.$$eval('p.quote > span', items => items.map(x => {return {comment: x.innerText}}))let id = await page.$$eval('div > div.pic > em', items => items.map(x => {return {id: x.innerText}}))for (let item in titles) {Object.assign(titles[item], id[item], score[item], img[item], comment[item],details[item])}return titles}top250();//執行
總結
以上是生活随笔為你收集整理的puppeteer爬取豆瓣top250电影及详细信息的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 管理类联考——英语——趣味篇——背诵单词
- 下一篇: 虹科为您介绍精确的多相机同步技术