基于python可视化的拉勾网招聘岗位分析

论文第一作者，原创，侵权必究

import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xlwt
def base(jos, page):
    
    urls='https://www.lagou.com/jobs/list_python?isSchoolJob=1'
    url='https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false&isSchoolJob=1'
    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
    'Referer': 'https://www.lagou.com/jobs/list_python?labelWords=sug&fromSearch=true&suginput=p',
    'Accept': 'application/json, text/javascript, */*; q=0.01'
    }
    
    url_info(url,urls,headers,jos,page)
    return
    
def url_info(url,urls,headers,jos,page):
    info_list=[]
     
    for i in range(1,page+1):
        data = {
			    'first': 'false',
			    'kd': jos,
			    'pn': i
		       }
        s=requests.session()
        s.get(urls,headers=headers,timeout=2)
        cookie=s.cookies
        result=s.post(url,data=data,headers=headers,cookies=cookie,timeout=4)
        info=result.json()
        info_list.append(info['content']['positionResult']['result'])
        
        
    Download(info_list,jos)
    
       
   
      
def Download(infolist,jos):
    w=xlwt.Workbook(encoding='utf-8')
    ws=w.add_sheet(jos,cell_overwrite_ok=True)
    ws.write(0,0,'序号')
    ws.write(0,1,'公司')
    ws.write(0,2,'城市')
    ws.write(0,3,'职位')
    ws.write(0,4,'最低薪资(k)')
    ws.write(0,5,'最高薪资(k)')
    ws.write(0,6,'平均薪资(k)')
    ws.write(0,7,'学历要求')
    ws.write(0,8,'工作经验')
    ws.write(0,9,'职位优点')
    ws.write(0,10,'公司待遇')
    row=1
    for info in infolist:
      for n in range(0,len(info)):
        ws.write(row,0,row)
        ws.write(row,1,info[n]['companyFullName'])
        ws.write(row,2,info[n]['city'])
        ws.write(row,3,info[n]['positionName'])
        
        money = info[n]['salary'].split('-')
        money_min = int(money[0][:-1])
        money_max = int(money[1][:-1])
        money_ave = np.mean([money_min, money_max])
        
        ws.write(row,4,money_min )
        ws.write(row,5,money_max)
        ws.write(row,6,money_ave)
        ws.write(row,7,info[n]['education'])
        ws.write(row,8,info[n]['workYear'])
        ws.write(row,9,info[n]['positionAdvantage'])
        ws.write(row,10,info[n]['companyLabelList'])
        row+=1
    w.save('拉勾网.xls')
    print('爬取已完成')
    Readdata()
    return row

def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        plt.text(rect.get_x()+rect.get_width()/2.-0.2, 1.03*height, '%s' % float(round(height,1)), fontsize=15)

def Readdata():
    
    file = '拉勾网.xls'
    df = pd.read_excel(file)
    cs_ave_money= df.groupby(['城市'])['平均薪资(k)'].mean()
    cs_min_money=df.groupby(['城市'])['最低薪资(k)'].min()
    cs_max_money=df.groupby(['城市'])['最高薪资(k)'].max()
    cs_count=df.groupby(by=['城市']).size()
    plt.rcParams['font.sans-serif']=['SimHei']
    plt.rcParams.update({"font.size":20})
    data = list(cs_count.values)
    labels=list(cs_count.keys())
    plt.rcParams['figure.figsize'] = (15.0, 10.0)
    
    a = plt.bar(np.arange(0,len(labels)*2,2),cs_min_money,alpha=0.5,width=0.5,color='yellow',edgecolor='red',label='最低薪资',lw=3)
    b = plt.bar(np.arange(0,len(labels)*2,2)+0.5, cs_max_money, alpha=0.2, width=0.5, color='green', edgecolor='blue', label='最高薪资', lw=3)
    c = plt.bar(np.arange(0,len(labels)*2,2)+1, cs_ave_money, alpha=0.2, width=0.5, color='red', edgecolor='blue', label='平均薪资', lw=3)
    autolabel(a)
    autolabel(b)
    autolabel(c)
    plt.title('各地区薪资对照表',color='r')
    plt.ylabel('薪资(K)')
    plt.xlabel('城市')
    plt.legend()
    plt.xticks(np.arange(0,len(labels)*2,2)+0.5/2,labels)
    plt.show()
    plt.close()
    print
    plt.title('地区需求量分析',color='r')
    plt.pie(data,labels=labels,autopct='%1.1f%%',shadow=False,startangle=150)
    plt.show()
    plt.savefig('需求.png')
    plt.savefig('薪资.png')
    print('展示已完成')
if __name__ == '__main__':
    print("说明：此程序完成指定职位的各城市薪资对照表及需求量分析与展示 \n  注：页数越多数据分析越准确完善")
    position=input('请输入要查询的职位:')
    pg=input('请输入要爬取的页数<=30:')
    page=eval(pg)
    base(position,page)


相关链接：[论文发表：](http://d.wanfangdata.com.cn/periodical/ChlQZXJpb2RpY2FsQ0hJTmV3UzIwMjEwMzAyEhpRS0JKQkQyMDIwMjAyMTAzMTIwMDAwODAwNBoIaHY1d3E4MnY=)
#