當前位置：首頁 > 编程语言 > python >内容正文

python

python3多线程实例_python3多线程糗事百科案例

發布時間：2023/12/18 python 32 豆豆

生活随笔收集整理的這篇文章主要介紹了 python3多线程实例_python3多线程糗事百科案例小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

1 #使用了線程庫

2 importthreading3 #隊列

4 importqueue5 #解析庫

6 from lxml importetree7 #請求處理

8 importrequests9 #json處理

10 importjson11 importtime12

13 classThreadCrawl(threading.Thread):14 def __init__(self, threadName, pageQueue, dataQueue):15 #threading.Thread.__init__(self)

16 #調用父類初始化方法

17 super(ThreadCrawl, self).__init__()18 #線程名

19 self.threadName =threadName20 #頁碼隊列

21 self.pageQueue =pageQueue22 #數據隊列

23 self.dataQueue =dataQueue24 #請求報頭

25 self.headers = {"User-Agent" : "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;"}26

27 defrun(self):28 print("啟動" +self.threadName)29 while notCRAWL_EXIT:30 try:31 #取出一個數字，先進先出

32 #可選參數block，默認值為True

33 #1. 如果對列為空，block為True的話，不會結束，會進入阻塞狀態，直到隊列有新的數據

34 #2. 如果隊列為空，block為False的話，就彈出一個Queue.empty()異常，

35 page =self.pageQueue.get(False)36 url = "http://www.qiushibaike.com/8hr/page/" + str(page) +"/"

37 #print url

38 content = requests.get(url, headers =self.headers).text39 time.sleep(1)40 self.dataQueue.put(content)41 #print(len(content))

42 except:43 pass

44 print("結束" +self.threadName)45

46 classThreadParse(threading.Thread):47 def __init__(self, threadName, dataQueue, filename, lock):48 super(ThreadParse, self).__init__()49 #線程名

50 self.threadName =threadName51 #數據隊列

52 self.dataQueue =dataQueue53 #保存解析后數據的文件名

54 self.filename =filename55 #鎖

56 self.lock =lock57

58 defrun(self):59 print("啟動" +self.threadName)60 while notPARSE_EXIT:61 try:62 html =self.dataQueue.get(False)63 self.parse(html)64 except:65 pass

66 print("退出" +self.threadName)67

68 defparse(self, html):69 #解析為HTML DOM

70 html =etree.HTML(html)71

72 node_list = html.xpath('//div[contains(@id, "qiushi_tag")]')73

74 for node innode_list:75 #xpath返回的列表，這個列表就這一個參數，用索引方式取出來，用戶名

76 username = node.xpath('.//img/@alt')[0]77 #圖片連接

78 image = node.xpath('.//div[@class="thumb"]//@src')#[0]

79 #取出標簽下的內容,段子內容

80 content = node.xpath('.//div[@class="content"]/span')[0].text81 #取出標簽里包含的內容，點贊

82 zan = node.xpath('.//i')[0].text83 #評論

84 comments = node.xpath('.//i')[1].text85

86 items ={87 "username": username,88 "image": image,89 "content": content,90 "zan": zan,91 "comments": comments92 }93

94 #with 后面有兩個必須執行的操作：__enter__ 和 _exit__

95 #不管里面的操作結果如何，都會執行打開、關閉

96 #打開鎖、處理內容、釋放鎖

97 with self.lock:98 #寫入存儲的解析后的數據

99 self.filename.write(json.dumps(items, ensure_ascii = False) + "\n")100

101 CRAWL_EXIT =False102 PARSE_EXIT =False103

104

105 defmain():106 #頁碼的隊列，表示20個頁面

107 pageQueue = queue.Queue(20)108 #放入1~10的數字，先進先出

109 for i in range(1, 21):110 pageQueue.put(i)111

112 #采集結果(每頁的HTML源碼)的數據隊列，參數為空表示不限制

113 dataQueue =queue.Queue()114

115 filename = open("duanzi.json", "a")116 #創建鎖

117 lock =threading.Lock()118

119 #三個采集線程的名字

120 crawlList = ["采集線程1號", "采集線程2號", "采集線程3號"]121 #存儲三個采集線程的列表集合

122 threadcrawl =[]123 for threadName incrawlList:124 thread =ThreadCrawl(threadName, pageQueue, dataQueue)125 thread.start()126 threadcrawl.append(thread)127

128

129 #三個解析線程的名字

130 parseList = ["解析線程1號","解析線程2號","解析線程3號"]131 #存儲三個解析線程

132 threadparse =[]133 for threadName inparseList:134 thread =ThreadParse(threadName, dataQueue, filename, lock)135 thread.start()136 threadparse.append(thread)137

138 #等待pageQueue隊列為空，也就是等待之前的操作執行完畢

139 while notpageQueue.empty():140 pass

141

142 #如果pageQueue為空，采集線程退出循環

143 globalCRAWL_EXIT144 CRAWL_EXIT =True145

146 print("pageQueue為空")147

148 for thread inthreadcrawl:149 thread.join()150 print("1")151

152 while notdataQueue.empty():153 pass

154

155 globalPARSE_EXIT156 PARSE_EXIT =True157

158 for thread inthreadparse:159 thread.join()160 print("2")161

162 with lock:163 #關閉文件

164 filename.close()165 print("謝謝使用！")166

167 if __name__ == "__main__":168 main()

總結

以上是生活随笔為你收集整理的python3多线程实例_python3多线程糗事百科案例的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇：驱动人生2008_驱动人生致敬深圳经济特
下一篇： python bind_Python b