當前位置：首頁 > 编程语言 > python >内容正文

python

python输入三个商品_用python3采集shopify站点商品

發布時間：2024/9/27 python 29 豆豆

生活随笔收集整理的這篇文章主要介紹了 python输入三个商品_用python3采集shopify站点商品小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

import requests,bs4,csv,os,re,time

'''采集商品url'''

def shopifylist(url):

while True:

try:

res=requests.get(url,timeout=30)

res.encoding = res.apparent_encoding

print('請求',url,'狀態',res.status_code)

res.raise_for_status()#如果返回狀態不是200，則拋出異常

break

except:

timeout=3

print('鏈接失敗,等待',timeout,'秒重試')

time.sleep(timeout)

print('')

print('重新鏈接中')

print('鏈接順暢，開始獲取商品鏈接')

noStarchSoup=bs4.BeautifulSoup(res.text,'html.parser')#html.parser 指定解析器

url=noStarchSoup.select('.product-card.sc-pb-element')

for i in range(len(url)):

imgurl='https://tribalhollywood.com'+url[i].get('href')

print('獲取產品url')

shopify(imgurl,site)#調用采集內容方法

print('\n')

'''采集商品url結束'''

'''采集商品內容開始'''

def shopify(url,site):

print('開始請求產品頁面',url)

while True:

try:

res=requests.get(url,timeout=30)

res.encoding = res.apparent_encoding

print('成功請求商品頁面:',res.status_code)

res.raise_for_status()#如果下載發生問題，就拋出異常

break

except:

print('請求商品頁面',url,'失敗，重新鏈接')

noStarchSoup=bs4.BeautifulSoup(res.text,'html.parser')

#匹配class屬性為‘wc-block-grid__product-title’的內容

name=noStarchSoup.select('.product-single__title')

name=name[0].getText()

price=noStarchSoup.select('.product-single__price')

price=price[0].getText()

price=re.sub(' ','',price)

price=re.sub('\n','',price)

#特別注意class="rte product-single__description"只需要product-single__description

des=noStarchSoup.select('.product-single__description')

des=des[0].getText()

des=re.sub('Hollywood','customadd.com',des)#替換版權信息

img=noStarchSoup.select('#ProductThumbs-product-template img')

if img==[]:

img=noStarchSoup.select('.sc-pb-element1')

l=img[0].get('src')

l='http:'+l

l=re.sub('_960x','',l)

else:

l=[]

for i in range(len(img)):

imgurl=img[i].get('src')

imgurl=re.sub('_160x160','',imgurl)

l.append('https:'+imgurl)

l=','.join(l)

fileHeader=['標題','產品url','價格','描述','圖片']

file=[name,url,price,des,l]

#文件存儲的地方，文件夾需要事先創建，并指定文件的格式為utf-8

while True:

try:

csvFile=open(site,'a',encoding='utf-8')

break

except:

print('')

print(site+'文件寫入失敗，重試中。。。。。')

time.sleep(5)

size=os.path.getsize(site)#判斷文件大小，如果文件大于0則表示文件有內

writer=csv.writer(csvFile)

if size==0:

writer.writerow(fileHeader)

writer.writerow(file)

csvFile.close()

else:

writer.writerow(file)

csvFile.close()

print('采集成功！')

'''采集內容結束'''

#urlpro=str(input('輸入要采集的商品列表'))

urlpro='https://www.tribalhollywood.com/collections/mens-necklaces'

site='D:\Backup\桌面\python3\mens-necklaces1.csv'

nt=['我不是空的']

n=1

while nt!=[]:

url=urlpro+'?page='+str(n)

prourl=shopifylist(url)#調用采集列表方法

print('成功采集',n,'頁')

n=n+1

res=requests.get(url)

res.raise_for_status()

noStarchSoup=bs4.BeautifulSoup(res.text,'html.parser')

nt=noStarchSoup.select('.next')

print('全部采集完畢！！')

總結

以上是生活随笔為你收集整理的python输入三个商品_用python3采集shopify站点商品的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇：数据结构python吕云翔_《数据结构》
下一篇： mysql实例怎么复制_Mysql实例M