博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Python爬虫学习笔记之极限滑动验证码的识别
阅读量:4553 次
发布时间:2019-06-08

本文共 6692 字,大约阅读时间需要 22 分钟。

代码:

 

1 import time  2 from io import BytesIO  3 from PIL import Image  4 from selenium import webdriver  5 from selenium.webdriver import ActionChains  6 from selenium.webdriver.common.by import By  7 from selenium.webdriver.support.ui import WebDriverWait  8 from selenium.webdriver.support import expected_conditions as EC  9  10 EMAIL = '' # 邮箱 密码需要自己注册 11 PASSWORD = '' 12 BORDER = 6 13 INIT_LEFT = 60 14  15  16 class CrackGeetest(): 17     def __init__(self): 18         self.url = 'https://account.geetest.com/login' 19         self.browser = webdriver.Chrome() 20         self.wait = WebDriverWait(self.browser, 20) 21         self.email = EMAIL 22         self.password = PASSWORD 23      24     def __del__(self): 25         self.browser.close() 26      27     def get_geetest_button(self): 28         """ 29         获取初始验证按钮 30         :return: 31         """ 32         button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_radar_tip'))) 33         return button 34      35     def get_position(self): 36         """ 37         获取验证码位置 38         :return: 验证码位置元组 39         """ 40         img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_canvas_img'))) 41         time.sleep(2) 42         location = img.location 43         size = img.size 44         top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[ 45             'width'] 46         return (top, bottom, left, right) 47      48     def get_screenshot(self): 49         """ 50         获取网页截图 51         :return: 截图对象 52         """ 53         screenshot = self.browser.get_screenshot_as_png() 54         screenshot = Image.open(BytesIO(screenshot)) 55         return screenshot 56      57     def get_slider(self): 58         """ 59         获取滑块 60         :return: 滑块对象 61         """ 62         slider = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_slider_button'))) 63         return slider 64      65     def get_geetest_image(self, name='captcha.png'): 66         """ 67         获取验证码图片 68         :return: 图片对象 69         """ 70         top, bottom, left, right = self.get_position() 71         print('验证码位置', top, bottom, left, right) 72         screenshot = self.get_screenshot() 73         captcha = screenshot.crop((left, top, right, bottom)) 74         captcha.save(name) 75         return captcha 76      77     def open(self): 78         """ 79         打开网页输入用户名密码 80         :return: None 81         """ 82         self.browser.get(self.url) 83         email = self.wait.until(EC.presence_of_element_located((By.ID, 'email'))) 84         password = self.wait.until(EC.presence_of_element_located((By.ID, 'password'))) 85         email.send_keys(self.email) 86         password.send_keys(self.password) 87      88     def get_gap(self, image1, image2): 89         """ 90         获取缺口偏移量 91         :param image1: 不带缺口图片 92         :param image2: 带缺口图片 93         :return: 94         """ 95         left = 60 96         for i in range(left, image1.size[0]): 97             for j in range(image1.size[1]): 98                 if not self.is_pixel_equal(image1, image2, i, j): 99                     left = i100                     return left101         return left102     103     def is_pixel_equal(self, image1, image2, x, y):104         """105         判断两个像素是否相同106         :param image1: 图片1107         :param image2: 图片2108         :param x: 位置x109         :param y: 位置y110         :return: 像素是否相同111         """112         # 取两个图片的像素点113         pixel1 = image1.load()[x, y]114         pixel2 = image2.load()[x, y]115         threshold = 60116         if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(117                 pixel1[2] - pixel2[2]) < threshold:118             return True119         else:120             return False121     122     def get_track(self, distance):123         """124         根据偏移量获取移动轨迹125         :param distance: 偏移量126         :return: 移动轨迹127         """128         # 移动轨迹129         track = []130         # 当前位移131         current = 0132         # 减速阈值133         mid = distance * 4 / 5134         # 计算间隔135         t = 0.2136         # 初速度137         v = 0138         139         while current < distance:140             if current < mid:141                 # 加速度为正2142                 a = 2143             else:144                 # 加速度为负3145                 a = -3146             # 初速度v0147             v0 = v148             # 当前速度v = v0 + at149             v = v0 + a * t150             # 移动距离x = v0t + 1/2 * a * t^2151             move = v0 * t + 1 / 2 * a * t * t152             # 当前位移153             current += move154             # 加入轨迹155             track.append(round(move))156         return track157     158     def move_to_gap(self, slider, track):159         """160         拖动滑块到缺口处161         :param slider: 滑块162         :param track: 轨迹163         :return:164         """165         ActionChains(self.browser).click_and_hold(slider).perform()166         for x in track:167             ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()168         time.sleep(0.5)169         ActionChains(self.browser).release().perform()170     171     def login(self):172         """173         登录174         :return: None175         """176         submit = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'login-btn')))177         submit.click()178         time.sleep(10)179         print('登录成功')180     181     def crack(self):182         # 输入用户名密码183         self.open()184         # 点击验证按钮185         button = self.get_geetest_button()186         button.click()187         # 获取验证码图片188         image1 = self.get_geetest_image('captcha1.png')189         # 点按呼出缺口190         slider = self.get_slider()191         slider.click()192         # 获取带缺口的验证码图片193         image2 = self.get_geetest_image('captcha2.png')194         # 获取缺口位置195         gap = self.get_gap(image1, image2)196         print('缺口位置', gap)197         # 减去缺口位移198         gap -= BORDER199         # 获取移动轨迹200         track = self.get_track(gap)201         print('滑动轨迹', track)202         # 拖动滑块203         self.move_to_gap(slider, track)204         205         success = self.wait.until(206             EC.text_to_be_present_in_element((By.CLASS_NAME, 'geetest_success_radar_tip_content'), '验证成功'))207         print(success)208         209         # 失败后重试210         if not success:211             self.crack()212         else:213             self.login()214 215 216 if __name__ == '__main__':217     crack = CrackGeetest()218     crack.crack()

 

转载于:https://www.cnblogs.com/Trojan00/p/9501653.html

你可能感兴趣的文章
git从已有分支拉新分支开发
查看>>
滚动条隐藏兼容写法
查看>>
SQL2005查询所有表的大小
查看>>
Shell 正则表达式
查看>>
Docker run命令参数整理
查看>>
qt-opencv配置mingw编译器
查看>>
CSS之Medial Queries的另一用法:实现IE hack的方法
查看>>
oo第三单元总结
查看>>
linux-CentOS6.4下安装oracle11g详解
查看>>
tomcat禁用webdav
查看>>
还是畅通工程
查看>>
电脑软件故障排除2014年2月16日[修正版]
查看>>
YARN的笔记
查看>>
javascript单线程,异步与执行机制
查看>>
14 模块
查看>>
4- 算法练习leetcode.com
查看>>
许式伟、张宴——系统架构运维思路对话
查看>>
android 左右页面滑动(滑屏)增加layout文件 而不是drawable(还有activity)
查看>>
替换textarea文本值中的换行符
查看>>
JPA + SpringData 操作数据库原来可以这么简单 ---- 深入了解 JPA - 2
查看>>