生活随笔
收集整理的這篇文章主要介紹了
拉钩网的起薪
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
import requests
import re
import pyecharts
user_agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
res = requests.get("https://www.lagou.com/",headers = {"User-Agent":user_agent})
# print(res.status_code)
# print(res.text)
data = re.search("<span>后端開發</span>([sS]*?)后端開發其它</a>",res.text).group(1)
data = data.replace(" ","").replace("
","")
urls = re.findall('<ahref="(.*?)"data-lg-tj-id="4O00"data-lg-tj-no="01dd"data-lg-tj-cid="idnull"class=".*?">(.*?)</a>',data)
print(urls)
def parser_job(i):
# 工作的鏈接
job_url = re.search('href="(.*?)"', i).group(1)
name = re.search("<h3>(.*?)</h3>", i).group(1)
add = re.search("<em>(.*?)</em>", i).group(1)
money = re.search('"money">(.*?)</span>', i).group(1)
jy, xl = re.search('-->([sS]*?)
', i).group(1).split(" / ")
return {"name":name,"add":add,"job_url":job_url,"money":money,"jy":jy,"xl":xl}
# 指定需要查看的技術方向
jobs = ["Java","Python"]
# 創建一個頁面
page = pyecharts.Page()
for url in urls:
if url[1] not in jobs:
continue
# 存儲解析完成的字典
datas = []
for j in range(1,11):
res = requests.get(url[0] + str(j))
data = re.findall("""<a class="position_link"([sS]*?)<div class="company">""",res.text)
for i in data:
dic = parser_job(i)
if "{" in dic["xl"]:
continue
datas.append(dic)
print(len(datas))
# 統計每個起薪有多少個崗位
count_dic = { }
for d in datas:
key = d["money"].split("-")[0]
if key in count_dic:
count_dic[key] += 1
else:
count_dic[key] =1
print(url[1],count_dic)
# 創建一個餅圖
pie = pyecharts.Pie()
# 為餅圖添加數據
"""
標題
keys
values
"""
pie.add(url[1],count_dic.keys(), count_dic.values())
# 將圖加到頁面上
page.add(pie)
# 生成一個頁面文件
page.render("test.html")
import requests
import re
import pyecharts
user_agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
res = requests.get("https://www.lagou.com/",headers = {"User-Agent":user_agent})
# print(res.status_code)
# print(res.text)
data = re.search("<span>后端開發</span>([sS]*?)后端開發其它</a>",res.text).group(1)
data = data.replace(" ","").replace("
","")
urls = re.findall('<ahref="(.*?)"data-lg-tj-id="4O00"data-lg-tj-no="01dd"data-lg-tj-cid="idnull"class=".*?">(.*?)</a>',data)
print(urls)
def parser_job(i):
# 工作的鏈接
job_url = re.search('href="(.*?)"', i).group(1)
name = re.search("<h3>(.*?)</h3>", i).group(1)
add = re.search("<em>(.*?)</em>", i).group(1)
money = re.search('"money">(.*?)</span>', i).group(1)
jy, xl = re.search('-->([sS]*?)
', i).group(1).split(" / ")
return {"name":name,"add":add,"job_url":job_url,"money":money,"jy":jy,"xl":xl}
# 指定需要查看的技術方向
jobs = ["Java","Python"]
# 創建一個頁面
page = pyecharts.Page()
for url in urls:
if url[1] not in jobs:
continue
# 存儲解析完成的字典
datas = []
for j in range(1,11):
res = requests.get(url[0] + str(j))
data = re.findall("""<a class="position_link"([sS]*?)<div class="company">""",res.text)
for i in data:
dic = parser_job(i)
if "{" in dic["xl"]:
continue
datas.append(dic)
print(len(datas))
# 統計每個起薪有多少個崗位
count_dic = { }
for d in datas:
key = d["money"].split("-")[0]
if key in count_dic:
count_dic[key] += 1
else:
count_dic[key] =1
print(url[1],count_dic)
# 創建一個餅圖
pie = pyecharts.Pie()
# 為餅圖添加數據
"""
標題
keys
values
"""
pie.add(url[1],count_dic.keys(), count_dic.values())
# 將圖加到頁面上
page.add(pie)
# 生成一個頁面文件
page.render("test.html")
總結
以上是生活随笔為你收集整理的拉钩网的起薪的全部內容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。