python的变量如何理解_Python程序中变量作用范围应该如何理解?
初學Python,自己嘗試著寫了一個爬蟲,主要代碼如下import json
import scrapy
from bs4 import BeautifulSoup
from docx import Document
from docx.shared import Pt
class BidsSpider(scrapy.Spider):
name = 'bids_spider'
allowed_domains = [
'deal.ggzy.gov.cn',
'www.ggzy.gov.cn',
]
start_urls = [
'http://deal.ggzy.gov.cn',
'http://www.ggzy.gov.cn',
]
custom_settings = {
"DOWNLOAD_DELAY": 1,
"RETRY_ENABLED": True,
}
page = 1
url = 'http://deal.ggzy.gov.cn/ds/deal/dealList_find.jsp?TIMEBEGIN_SHOW=2020-09-01&TIMEEND_SHOW=2020-10-20&TIMEBEGIN=2020-09-01&TIMEEND=2020-10-20&SOURCE_TYPE=1&DEAL_TIME=06&DEAL_CLASSIFY=01&DEAL_STAGE=0101&DEAL_PROVINCE=0&DEAL_CITY=0&DEAL_PLATFORM=0&BID_PLATFORM=0&DEAL_TRADE=0&isShowAll=1&FINDTXT=風電&PAGENUMBER='
doc = Document()
def start_requests(self):
yield scrapy.Request(
url=self.url + str(self.page),
callback=self.parse,
method='GET',
)
def parse(self, response):
json_data = json.loads(response.text)
bid_list = json_data['data']
if not len(bid_list):
self.doc.save('D:/projects/test.docx')
return
for index, value in enumerate(bid_list):
item = dict()
item['title'] = value['title']
item['platformName'] = value['platformName']
item['districtShow'] = value['districtShow']
item['tradeShow'] = value['tradeShow']
item['timeShow'] = value['timeShow']
yield scrapy.Request(
url=value['url'],
callback=self.parse_detail,
meta={'item': item},
method='GET',
)
self.page += 1
yield scrapy.Request(
url=self.url + str(self.page),
callback=self.parse,
method='GET',
)
def parse_detail(self, response):
item = response.meta['item']
url = response.css('li.li_hover a::attr(onclick)').extract_first()
yield scrapy.Request(
url='http://www.ggzy.gov.cn/information' + url[25: -2],
callback=self.parse_text,
meta={'item': item},
method='GET',
)
def parse_text(self, response):
item = response.meta['item']
html = response.xpath('//div[@class="detail_content"]').extract_first()
soup = BeautifulSoup(str(html), 'html.parser')
item['detail'] = soup.get_text()
p = self.doc.add_paragraph()
r = p.add_run(item['title'])
r.font.name = '黑體'
r.bold = True
p = self.doc.add_paragraph()
r = p.add_run('來源平臺:')
r.bold = True
p.add_run(item['platformName'])
p = self.doc.add_paragraph()
r = p.add_run('省份:')
r.bold = True
p.add_run(item['districtShow'])
p = self.doc.add_paragraph()
r = p.add_run('行業:')
r.bold = True
p.add_run(item['tradeShow'])
p = self.doc.add_paragraph()
r = p.add_run('發布日期:')
r.bold = True
p.add_run(item['timeShow'])
p = self.doc.add_paragraph()
r = p.add_run(item['detail'])
r.font.size = Pt(8)
self.doc.add_page_break()
self.logger.info('#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#')
self.logger.info(item['title'])
self.logger.info('#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#@#')
出現的問題是self.doc.save行生成的文件是空的,但是最后的日志輸出語句是有內容的,的確爬到了數據。我覺得是我對Python的變量作用范圍不理解造成代碼有問題,但是嘗試了各種方法,還是沒法解決。希望哪位大佬可以指導我一下,萬分感謝。
總結
以上是生活随笔為你收集整理的python的变量如何理解_Python程序中变量作用范围应该如何理解?的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 清华大学博士,就12年前抄袭一事公开道歉
- 下一篇: websocket python爬虫_p