记事本做网站怎么调整图片间距,wordpress 边框线,怎么进入追信魔盒网站开发软件,广西建设网查证文章目录一. 思路概述二. 分析数据接口三. 详细代码一. 思路概述 
1.拉勾网采用Ajax技术#xff0c;加载网页时会向后端发送Ajax异步请求#xff0c;因此首先找到数据接口#xff1b; 
2.后端会返回json的数据#xff0c;分析数据#xff0c;找到单个招聘对应的positionId…
文章目录一. 思路概述二. 分析数据接口三. 详细代码一. 思路概述 
1.拉勾网采用Ajax技术加载网页时会向后端发送Ajax异步请求因此首先找到数据接口 
2.后端会返回json的数据分析数据找到单个招聘对应的positionId拼接出具体的url 
3.解析url使用xpath获取数据。 
二. 分析数据接口 三. 详细代码 
#encoding: utf-8import requests
from lxml import etree
import time
import reheaders  {User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36,Referer: https://www.lagou.com/jobs/list_python?labelWordsfromSearchtruesuginput,Cookie: user_trace_token20200524155122-083f0df9-307f-44ee-8266-fceb6af1028b; _gaGA1.2.728245592.1590306686; LGUID20200524155125-3d15f82a-9111-4024-98da-9c1422b856dd; JSESSIONIDABAAAECABFAACEA42866F7C54F46B971C0B595641303F73; WEBTJ-ID20200524155129-17245aa50d74ec-0946ac3961652a-f7d1d38-1327104-17245aa50d980; RECOMMEND_TIPtrue; _gidGA1.2.369120051.1590306690; index_location_city%E5%8C%97%E4%BA%AC; sensorsdata2015jssdkcross%7B%22distinct_id%22%3A%2217245aab6d623b-09a44787242115-f7d1d38-1327104-17245aab6d7237%22%2C%22%24device_id%22%3A%2217245aab6d623b-09a44787242115-f7d1d38-1327104-17245aab6d7237%22%7D; sajssdk_2015_cross_new_user1; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e61590306685,1590324851; LGSID20200524205410-e3cbacc7-ae0f-4d56-a775-67d45085e31d; PRE_UTMm_cf_cpt_baidu_pcbt; PRE_HOSTsp0.baidu.com; PRE_SITEhttps%3A%2F%2Fsp0.baidu.com%2F9q9JcDHa2gU2pMbgoY3K%2Fadrc.php%3Ft%3D06KL00c00fA1VH%5F0w3dM0FNkUsKEU%5FGX00000rUBx7C00000xs000y.THL0oUhY1x60UWY3rj04n1cdnNtzndqCmyqxTAT0T1dhmymYmhm1P10snH6dmvR10ZRqfbwDfHuDPWnsnRP7f1uDPbfLrDc1rHPDPjfknY7ArjD0mHdL5iuVmv-b5Hn1PWRzP1DYnHThTZFEuA-b5HDv0ARqpZwYTZnlQzqLILT8UA7MULR8mvqVQvk9UhwGUhTVTA7Muiqsmzq1uy7zmv68pZwVUjqdIAdxTvqdThP-5ydxmvuxmLKYgvF9pywdgLKWmMf0mLFW5HR4PWRs%26tpl%3Dtpl%5F11534%5F22672%5F17382%26l%3D1518133990%26attach%3Dlocation%253D%2526linkName%253D%2525E6%2525A0%252587%2525E5%252587%252586%2525E5%2525A4%2525B4%2525E9%252583%2525A8-%2525E6%2525A0%252587%2525E9%2525A2%252598-%2525E4%2525B8%2525BB%2525E6%2525A0%252587%2525E9%2525A2%252598%2526linkText%253D%2525E3%252580%252590%2525E6%25258B%252589%2525E5%25258B%2525BE%2525E6%25258B%25259B%2525E8%252581%252598%2525E3%252580%252591%2525E5%2525AE%252598%2525E6%252596%2525B9%2525E7%2525BD%252591%2525E7%2525AB%252599%252520-%252520%2525E4%2525BA%252592%2525E8%252581%252594%2525E7%2525BD%252591%2525E9%2525AB%252598%2525E8%252596%2525AA%2525E5%2525A5%2525BD%2525E5%2525B7%2525A5%2525E4%2525BD%25259C%2525EF%2525BC%25258C%2525E4%2525B8%25258A%2525E6%25258B%252589%2525E5%25258B%2525BE%21%2526xp%253Did%28%252522m3365271417%5Fcanvas%252522%29%25252FDIV%25255B1%25255D%25252FDIV%25255B1%25255D%25252FDIV%25255B1%25255D%25252FDIV%25255B1%25255D%25252FDIV%25255B1%25255D%25252FH2%25255B1%25255D%25252FA%25255B1%25255D%2526linkType%253D%2526checksum%253D154%26ie%3Dutf-8%26f%3D8%26tn%3D88093251%5F23%5Fhao%5Fpg%26wd%3D%25E6%258B%2589%25E5%258B%25BE%25E7%25BD%2591%26oq%3D%25E6%258B%2589%25E5%258B%25BE%25E7%25BD%2591%26rqlang%3Dcn; PRE_LANDhttps%3A%2F%2Fwww.lagou.com%2Flanding-page%2Fpc%2Fsearch.html%3Futm%5Fsource%3Dm%5Fcf%5Fcpt%5Fbaidu%5Fpcbt; TG-TRACK-CODEindex_search; X_HTTP_TOKEN50fc797f56a45533494523095182b5415766d31319; _gat1; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e61590325496; LGRID20200524210515-c04502d0-30cb-4056-bbd3-4d96e0564e1e; SEARCH_ID00630409e0a54ce59315f7af9b9fcce1,Origin: https://www.lagou.com,X-Anit-Forge-Code: 0,X-Anit-Forge-Token: None,X-Requested-With: XMLHttpRequest
}def request_list_page():url  https://www.lagou.com/jobs/positionAjax.json?city%E5%8C%97%E4%BA%ACneedAddtionalResultfalsedata  {first: false,pn: 1,kd: python}for x in range(1,21):data[pn]  xresponse  requests.post(url, headersheaders, datadata)# print(type(response))# json方法如果返回来的是json数据。那么这个方法会自动的load成字典result  response.json()# print(result)positions  result[content][positionResult][positionId]for position in positions:positionId  position[positionId]position_url  https://www.lagou.com/jobs/%s.html % positionIdprint(position_url)parse_postion_detail(position_url)breakbreakdef parse_postion_detail(url):positions  []response  requests.get(url,headersheaders)text  response.texthtml  etree.HTML(text)position_name  html.xpath(//span[classname]/text())[0]job_request_spans  html.xpath(//dd[classjob_request]//span)salary  job_request_spans[0].xpath(.//text())[0].strip()city  job_request_spans[1].xpath(.//text())[0].strip()city  re.sub(r[\s/],,city)work_years  job_request_spans[2].xpath(.//text())[0].strip()work_years  re.sub(r[\s/],,work_years)education  job_request_spans[3].xpath(.//text())[0].strip()education  re.sub(r[\s/],,education)desc  .join(html.xpath(//dd[classjob_bt]//text())).strip()position  {name: position_name,salary: salary,city: city,work_years: work_years,education: education,desc: desc}positions.append(position)print(position)print( * 40)def main():request_list_page()if __name__  __main__:main()