1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
| import time from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import pandas as pd
base_url = 'https://www.zhipin.com/web/geek/job?query=%E4%BA%A7%E5%93%81%E7%BB%8F%E7%90%86&city=101280100°ree=203&jobType=1901&salary=406&page='
driver = webdriver.Chrome()
all_jobs = [] for page in range(1, 11): url = base_url + str(page) driver.get(url) time.sleep(60)
try: element = WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.CLASS_NAME, 'job-card-wrapper')) ) finally: job_name = [elem.text for elem in driver.find_elements(By.CLASS_NAME, 'job-name')] job_area = [elem.text for elem in driver.find_elements(By.CLASS_NAME, 'job-area')] salary = [elem.text for elem in driver.find_elements(By.CLASS_NAME, 'salary')] years_graduate = [elem.text for elem in driver.find_elements(By.CLASS_NAME, 'tag-list')] info_public = [elem.text for elem in driver.find_elements(By.CLASS_NAME, 'info-public')] company = [elem.text for elem in driver.find_elements(By.CLASS_NAME, 'company-name')] com_info = [elem.text for elem in driver.find_elements(By.CLASS_NAME, 'company-tag-list')] detail = [elem.text for elem in driver.find_elements(By.CLASS_NAME, 'info-desc')] job_links = [elem.get_attribute('href') for elem in driver.find_elements(By.CSS_SELECTOR, 'a.job-card-left') ]
jobs = list(zip(job_name, job_area, salary, years_graduate, info_public, company, com_info, detail, job_links)) all_jobs.extend(jobs)
df = pd.DataFrame(all_jobs, columns=[ '公司名', '职位', '区域', '薪资', '经验要求', '信息公开','企业信息', '详细', '链接'])
df.to_excel('boss直聘产品岗位列表.xlsx', index=False)
|