自动化考后核验照片下载工具
import os import time import psutil from selenium.webdriver.chrome.options import Options from selenium import webdriver from selenium.webdriver.common.by import By import openpyxl import urllib.request
检查是否已经存在浏览器实例,如果存在则不再创建
for proc in psutil.process_iter(): try: if 'chrome' in proc.name() and '--remote-debugging-port=9224' in proc.cmdline(): options = Options() options.add_experimental_option('debuggerAddress', '127.0.0.1:9224') break except: pass else: # 创建浏览器实例 os.system(r'start chrome --remote-debugging-port=9224 --user-data-dir='D:\评阅用'') options = Options() options.add_experimental_option('debuggerAddress', '127.0.0.1:9224')
在已有的浏览器实例中查找标签页
driver = webdriver.Chrome(options=options) tabs = driver.window_handles for tab in tabs: driver.switch_to.window(tab) if driver.title == '考后核验': print('登陆成功') break
打开Excel表格
wb = openpyxl.load_workbook(r'C:\Users\Administrator\Desktop\考后核验.xlsx') sheet = wb.active
创建照片文件夹
if not os.path.exists(r'C:\Users\Administrator\Desktop\照片'): os.mkdir(r'C:\Users\Administrator\Desktop\照片')
遍历时间单元K列
time_set = set() for row in sheet.iter_rows(min_row=2, max_row=sheet.max_row, min_col=11, max_col=11): for cell in row: # 获取时间单元内的数据 data = cell.value if data: time_set.add(data)
创建子文件夹
for time_data in time_set: folder_name = time.strftime('%Y-%m-%d %H-%M-%S', time.strptime(time_data, '%Y-%m-%d %H:%M:%S')) folder_path = os.path.join(r'C:\Users\Administrator\Desktop\照片', folder_name) if not os.path.exists(folder_path): os.mkdir(folder_path)
遍历表格内'A'列数据
for row in sheet.iter_rows(min_row=2, max_row=sheet.max_row, min_col=1, max_col=1): for cell in row: # 获取学号 student_id = cell.value if student_id: # 在网页内查找考生信息 search_input = driver.find_element(By.XPATH, '//input[@placeholder='姓名/考生号']') search_input.clear() search_input.send_keys(student_id) time.sleep(1) # 等待页面加载完成 search_button = driver.find_element(By.XPATH, '//button[@class='ant-btn ant-btn-primary']') driver.execute_script('arguments[0].click();', search_button) time.sleep(1) # 等待页面加载完成
# 获取考场编号
exam_room = sheet.cell(row=cell.row, column=8).value
# 获取姓名
name = sheet.cell(row=cell.row, column=2).value
# 在网页中查找所有时间单元元素
time_elements = driver.find_elements(By.XPATH, '//td[@class='ant-table-column-has-actions ant-table-column-has-sorters' and @style='text-align: center;']')
for time_element in time_elements:
# 提取时间
time_text = time_element.text
if time_text == time_data:
# 在网页中查找所有考场编号元素
exam_room_elements = driver.find_elements(By.XPATH, '//td[@class='ant-table-column-has-actions ant-table-column-has-sorters' and @style='text-align: center;']')
for exam_room_element in exam_room_elements:
# 提取考场编号
exam_room_text = exam_room_element.text
if exam_room_text == exam_room:
# 创建考场编号文件夹
exam_room_path = os.path.join(folder_path, exam_room_text)
if not os.path.exists(exam_room_path):
os.mkdir(exam_room_path)
# 提取学号和姓名
student_id_text = driver.find_element(By.XPATH, '//td[@class='ant-table-column-has-actions ant-table-column-has-sorters' and @style='text-align: center;']').text
name_text = driver.find_element(By.XPATH, '//td[@class='' and @style='text-align: center;']').text
# 提取照片链接
photo_element = driver.find_element(By.XPATH, '//img[@alt='' and @title='报名照片']')
photo_src = photo_element.get_attribute('src')
# 下载照片
photo_name = f'{student_id_text}-{name_text}.jpg'
photo_path = os.path.join(exam_room_path, photo_name)
urllib.request.urlretrieve(photo_src, photo_path)
break
关闭浏览器
driver.quit()
原文地址: https://cveoy.top/t/topic/fI4w 著作权归作者所有。请勿转载和采集!