當前位置：首頁 > 编程语言 > python >内容正文

python

python apply_async函数_进程池未执行apply_async中添加的函数就直接结束了

發布時間：2023/12/2 python 30 豆豆

生活随笔收集整理的這篇文章主要介紹了 python apply_async函数_进程池未执行apply_async中添加的函数就直接结束了小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

代碼沒有執行apply_async中添加的函數就直接結束了

from bs4 import BeautifulSoup

import random

import requests

import pymongo

import datetime

import random

import time

from multiprocessing import Pool

user_agents = [

'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 '

'Mobile/13B143 Safari/601.1]',

'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) '

'Chrome/48.0.2564.23 Mobile Safari/537.36',

'Mozilla/5.0 (Linux; Android 5.1.1; Nexus 6 Build/LYZ28E) AppleWebKit/537.36 (KHTML, like Gecko) '

'Chrome/48.0.2564.23 Mobile Safari/537.36']

heads = {

'User_Agent': random.choice(user_agents)

}

ipHeads = {

'Upgrade-Insecure-Requests':'1',

'User-Agent':random.choice(user_agents),

'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',

'Referer':'http://www.xicidaili.com/nn/',

'Accept-Encoding':'gzip, deflate, sdch',

'Accept-Language':'zh-CN,zh;q=0.8',

}

class douban():

def __init__(self):

self.client = pymongo.MongoClient('localhost', 27017)

self.db = self.client['books']

self.tool = self.client['tool']

self.collectIp = self.tool['ip']

def getFromSQL(self):

item = self.collectIp.find_one({'http': 'http'})

proxies = {}

proxies[item['http']] = 'http://' + item['ip'] + ':' + item['port']

return proxies

def getAllTag(self):

ipDic = {}

url = 'https://book.douban.com/tag/?view=type&icn=index-sorttags-all'

proxies = self.getFromSQL()

s = requests.get(url,headers=heads,proxies=proxies)

if s.status_code == 403:

values = list(proxies.values())[0]

ip = values.split('//')[1].split(':')[0]

self.collect.remove({'ip': ip})

proxies = getFromSQL()

s = requests.get(url,headers=heads,proxies=proxies)

soup = BeautifulSoup(s.text,'lxml')

titleTags= soup.find_all('a', class_='tag-title-wrapper')

tagList = soup.find_all('table',class_='tagCol')

href = {}

titleList = []

i = 0

for titleTag in titleTags:

title = titleTag['name']

titleList.append(title)

trs = tagList[i].find_all('tr')

hreflist = []

for tr in trs:

hreflist.append(tr.td.a['href'])

href[title] = hreflist

i = i + 1

return titleList,href

def getAllBookUrl(self,title, hrefDic):

print('a')

collect = self.db[title]

for href in hrefDic[title]:

index = 0

while 1:

url = 'https://book.douban.com' + href +'?start='+ str(index) + '&type=T'

proxies = self.getFromSQL()

s = requests.get(url, headers=heads,proxies=proxies)

if s.status_code == 403:

values = list(proxies.values())[0]

ip = values.split('//')[1].split(':')[0]

collect.remove({'ip': ip})

proxies = self.getFromSQL()

s = requests.get(url,headers=heads,proxies=proxies)

html = s.text

soup = BeautifulSoup(html, 'lxml')

liList= soup.find_all('li',class_='subject-item')

if len(liList):

for li in liList:

id = li.find('a')['href'][32:-1]

collect.insert({'bookId':id})

index += 20

time.sleep(3)

else:

break

if __name__== '__main__':

p = Pool(4)

a = douban()

titleList, hrefDic = a.getAllTag()

for i in range(len(titleList)):

print('開始爬取%s'%titleList[i])

p.apply_async(a.getAllBookUrl, args=(titleList[i],hrefDic))

p.close()

p.join()

# a = douban()

# titleList, hrefDic = a.getAllTag()

# a.getAllBookUrl(titleList[0],hrefDic)

# print('done')

總結

以上是生活随笔為你收集整理的python apply_async函数_进程池未执行apply_async中添加的函数就直接结束了的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： iPhone 15 Pro升级涨价的理由
下一篇： python打印星星居中_python实