python爬虫之selenium记录

Chrome浏览器驱动下载地址：http://npm.taobao.org/mirrors/chromedriver/

基础代码：

browser = webdriver.Firefox() //选择浏览器
browser.find_element_by_id().send_keys() //寻找控件通过ID，且发送值selenium
browser.find_element_by_id().click() //搜索的按钮的id 叫su ，且点击
browser.quit() //退出并关闭窗口的每一个相关的驱动程序
browser.close() //关闭窗口
browser.implicitly_wait(10) //隐式等待

无窗口模式：

#selenium:3.12.0
#webdriver:2.38
#chrome.exe: 65.0.3325.181（正式版本） （32 位）

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')#解决DevToolsActivePort文件不存在的报错

chrome_options.add_argument('window-size=1920x3000') #指定浏览器分辨率
chrome_options.add_argument('--disable-gpu') #谷歌文档提到需要加上这个属性来规避bug
chrome_options.add_argument('--hide-scrollbars') #隐藏滚动条, 应对一些特殊页面
chrome_options.add_argument('blink-settings=imagesEnabled=false') #不加载图片, 提升速度
chrome_options.add_argument('--headless') #浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败
chrome_options.binary_location = r"C:Program Files (x86)GoogleChromeApplicationchrome.exe" #手动指定使用的浏览器位置


driver=webdriver.Chrome(chrome_options=chrome_options)
driver.get('https://www.baidu.com')

print('hao123' in driver.page_source)


driver.close() #切记关闭浏览器，回收资源

键盘操作：

selenium.webdriver.common.keys Keys

browser = webdriver.Chrome()
browser.get()
browser.find_element_by_id().send_keys()
browser.find_element_by_id().send_keys(Keys.SPACE)
browser.find_element_by_id().send_keys(Keys.CONTROL)
browser.find_element_by_id().send_keys(Keys.CONTROL)
browser.find_element_by_id().send_keys(Keys.CONTROL)
browser.find_element_by_id().send_keys(Keys.ENTER)

鼠标操作：

selenium.webdriver ActionChains

driver = webdriver.Chrome()
driver.get()
driver.find_element_by_id().send_keys()
driver.find_element_by_id().click()

element = driver.find_element_by_name()
ActionChains(driver).move_to_element(element).perform()

driver.find_element_by_link_text().click()

截屏定位

location = img.location
(location)
size = img.size
left = location[]
= location[]
= left + size[]

保存cookie，以及调用cookie

保存cookies
cookies = driver.get_cookies()with open("cookies.txt", "w") as fp:
    json.dump(cookies, fp)
selenium读取cookies
def read_cookies():
    # 设置cookies前必须访问一次百度的页面
    driver.get("http://www.baidu.com")
    with open("cookies.txt", "r") as fp:
        cookies = json.load(fp)
        for cookie in cookies:
            # cookie.pop('domain')  # 如果报domain无效的错误
            driver.add_cookie(cookie)

cookies_dict = dict()
with open('cookies.txt','r')as f:
    cookies = json.load(f)
    for cookie in cookies:
        cookies_dict[cookie['name']] = cookie['value']

评论 (0)