基于python可视化的拉勾网招聘岗位分析
基于python可视化的拉勾网招聘岗位分析
论文第一作者,原创,侵权必究
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xlwt
def base(jos, page):
urls='https://www.lagou.com/jobs/list_python?isSchoolJob=1'
url='https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false&isSchoolJob=1'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
'Referer': 'https://www.lagou.com/jobs/list_python?labelWords=sug&fromSearch=true&suginput=p',
'Accept': 'application/json, text/javascript, */*; q=0.01'
}
url_info(url,urls,headers,jos,page)
return
def url_info(url,urls,headers,jos,page):
info_list=[]
for i in range(1,page+1):
data = {
'first': 'false',
'kd': jos,
'pn': i
}
s=requests.session()
s.get(urls,headers=headers,timeout=2)
cookie=s.cookies
result=s.post(url,data=data,headers=headers,cookies=cookie,timeout=4)
info=result.json()
info_list.append(info['content']['positionResult']['result'])
Download(info_list,jos)
def Download(infolist,jos):
w=xlwt.Workbook(encoding='utf-8')
ws=w.add_sheet(jos,cell_overwrite_ok=True)
ws.write(0,0,'序号')
ws.write(0,1,'公司')
ws.write(0,2,'城市')
ws.write(0,3,'职位')
ws.write(0,4,'最低薪资(k)')
ws.write(0,5,'最高薪资(k)')
ws.write(0,6,'平均薪资(k)')
ws.write(0,7,'学历要求')
ws.write(0,8,'工作经验')
ws.write(0,9,'职位优点')
ws.write(0,10,'公司待遇')
row=1
for info in infolist:
for n in range(0,len(info)):
ws.write(row,0,row)
ws.write(row,1,info[n]['companyFullName'])
ws.write(row,2,info[n]['city'])
ws.write(row,3,info[n]['positionName'])
money = info[n]['salary'].split('-')
money_min = int(money[0][:-1])
money_max = int(money[1][:-1])
money_ave = np.mean([money_min, money_max])
ws.write(row,4,money_min )
ws.write(row,5,money_max)
ws.write(row,6,money_ave)
ws.write(row,7,info[n]['education'])
ws.write(row,8,info[n]['workYear'])
ws.write(row,9,info[n]['positionAdvantage'])
ws.write(row,10,info[n]['companyLabelList'])
row+=1
w.save('拉勾网.xls')
print('爬取已完成')
Readdata()
return row
def autolabel(rects):
for rect in rects:
height = rect.get_height()
plt.text(rect.get_x()+rect.get_width()/2.-0.2, 1.03*height, '%s' % float(round(height,1)), fontsize=15)
def Readdata():
file = '拉勾网.xls'
df = pd.read_excel(file)
cs_ave_money= df.groupby(['城市'])['平均薪资(k)'].mean()
cs_min_money=df.groupby(['城市'])['最低薪资(k)'].min()
cs_max_money=df.groupby(['城市'])['最高薪资(k)'].max()
cs_count=df.groupby(by=['城市']).size()
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams.update({"font.size":20})
data = list(cs_count.values)
labels=list(cs_count.keys())
plt.rcParams['figure.figsize'] = (15.0, 10.0)
a = plt.bar(np.arange(0,len(labels)*2,2),cs_min_money,alpha=0.5,width=0.5,color='yellow',edgecolor='red',label='最低薪资',lw=3)
b = plt.bar(np.arange(0,len(labels)*2,2)+0.5, cs_max_money, alpha=0.2, width=0.5, color='green', edgecolor='blue', label='最高薪资', lw=3)
c = plt.bar(np.arange(0,len(labels)*2,2)+1, cs_ave_money, alpha=0.2, width=0.5, color='red', edgecolor='blue', label='平均薪资', lw=3)
autolabel(a)
autolabel(b)
autolabel(c)
plt.title('各地区薪资对照表',color='r')
plt.ylabel('薪资(K)')
plt.xlabel('城市')
plt.legend()
plt.xticks(np.arange(0,len(labels)*2,2)+0.5/2,labels)
plt.show()
plt.close()
print
plt.title('地区需求量分析',color='r')
plt.pie(data,labels=labels,autopct='%1.1f%%',shadow=False,startangle=150)
plt.show()
plt.savefig('需求.png')
plt.savefig('薪资.png')
print('展示已完成')
if __name__ == '__main__':
print("说明:此程序完成指定职位的各城市薪资对照表及需求量分析与展示 \n 注:页数越多数据分析越准确完善")
position=input('请输入要查询的职位:')
pg=input('请输入要爬取的页数<=30:')
page=eval(pg)
base(position,page)
相关链接:[论文发表:](http://d.wanfangdata.com.cn/periodical/ChlQZXJpb2RpY2FsQ0hJTmV3UzIwMjEwMzAyEhpRS0JKQkQyMDIwMjAyMTAzMTIwMDAwODAwNBoIaHY1d3E4MnY=)
#