旋转拖动验证码解决方案
曾几何时,你是否被一个旋转验证码而困扰,没错今日主题——旋转验证码。
之前也是被他伤透了心,研究了好几天的js,想直接通过接口传输直接解决验证码的,然而我失败了,不过这一次,他来了他来了,他带着RotNet走来了。
彩虹屁
RotNet也是我无意间发现的,没错时隔了好几个月,他自己出现在我眼前的。这是他的github:https://github.com/d4nst/RotNet/tree/master,他主要是预测图像的旋转角度以校正其方向,库中包括很全,数据集的下载,训练,预测全都有,而且最最最重要的是,大神提供了模型,我的天。。。这是什么神仙,你是孙悟空派来拯救我的吧!兄弟!!!
当然有兴趣的同学可以看看他的文章,有具体的思路和网络实现。还有觉得有用的同学可以星一下他的github
好的,话不多说,先看看我最后的成果吧,
思路和修改
然后因为在跳出验证码的时候一般是直接给出图片的网址,所以我修改了源文件,用来直接读取网络图片和修整图片大小来适应网络,
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
#utils.py
#在RotNetDataGenerator._get_batches_of_transformed_samples中添加响应代码 #增加读取网络图片的函数 class RotNetDataGenerator(Iterator): def _get_batches_of_transformed_samples(self, index_array): # create array to hold the images batch_x = np.zeros((len(index_array),) + self.input_shape, dtype='float32') # create array to hold the labels batch_y = np.zeros(len(index_array), dtype='float32') # iterate through the current batch for i, j in enumerate(index_array): if self.filenames is None: image = self.images[j] else: is_color = int(self.color_mode == 'rgb') #修改这这一块{{{{{{{{{ image = ImageScale(self.filenames[j]) if self.filenames[j][:4].lower()=="http" else cv2.imread(self.filenames[j], is_color) h,w=image.shape[:2] if h !=224 or w !=224: image = cv2.resize(image, (224, 224), interpolation=cv2.INTER_CUBIC) #}}}}}}}} if is_color: image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if self.rotate: # get a random angle rotation_angle = np.random.randint(360) else: rotation_angle = 0 # generate the rotated image rotated_image = generate_rotated_image( image, rotation_angle, size=self.input_shape[:2], crop_center=self.crop_center, crop_largest_rect=self.crop_largest_rect ) # add dimension to account for the channels if the image is greyscale if rotated_image.ndim == 2: rotated_image = np.expand_dims(rotated_image, axis=2) # store the image and label in their corresponding batches batch_x[i] = rotated_image batch_y[i] = rotation_angle if self.one_hot: # convert the numerical labels to binary labels batch_y = to_categorical(batch_y, 360) else: batch_y /= 360 # preprocess input images if self.preprocess_func: batch_x = self.preprocess_func(batch_x) return batch_x, batch_y def ImageScale(url): resp = request.urlopen(url) image = np.asarray(bytearray(resp.read()), dtype="uint8") image = cv2.imdecode(image, cv2.IMREAD_COLOR) return image |
预测角度,也是根据他的源码基础上做修改的,需要注意的是模型位置和测试图片的位置需要修改为你电脑上的文件位置
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
from __future__ import print_function
import os import numpy as np from keras.applications.imagenet_utils import preprocess_input from keras.models import load_model from utils import RotNetDataGenerator, angle_error def process_images(input_path, batch_size=64, crop=True): #需要修改模型文件位置 model = load_model("I:\\pythonProject\\RotNet\\rotnet_models\\rotnet_street_view_resnet50_keras2.hdf5", custom_objects={'angle_error': angle_error}, compile=False) extensions = ['.jpg', '.jpeg', '.bmp', '.png'] if os.path.isfile(input_path) or input_path[:4].lower()=="http": image_paths = [input_path] else: image_paths = [os.path.join(input_path, f) for f in os.listdir(input_path) if os.path.splitext(f)[1].lower() in extensions] predictions = model.predict_generator( RotNetDataGenerator( image_paths, input_shape=(224, 224, 3), batch_size=batch_size, one_hot=True, preprocess_func=preprocess_input, rotate=False, crop_largest_rect=True, crop_center=True ), val_samples=len(image_paths) ) predicted_angles = np.argmax(predictions, axis=1) print(predicted_angles) return predicted_angles if __name__ == '__main__': #修改测试图片位置,本地地址,或是网络图片地址 process_images("I:\\pythonProject\\RotNet\\data\\test_examples\\008999_4.jpg") |
然后通过分析百度指数的js源码发现旋转角度的公式是 angle=o/b*360
即o为拖动的距离,b=底轴宽-按钮宽
所以我们需要知道的拖动的距离就是 o=angle*360*b
好的,汇总到一起,就可以了。模拟登录百度指数,而且支持无头模式
中间有参考一段这位老哥写的pyppeteer的拖动,https://blog.csdn.net/qq393912540/article/details/91956136
还有这位老哥的反爬策略
https://github.com/wkunzhi/Python3-Spider/blob/master/%E3%80%90%E6%B7%98%E5%AE%9D%E3%80%91%E8%87%AA%E5%8A%A8%E7%99%BB%E9%99%86/auto_login_pyppeteer.py
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import asyncio
from pyppeteer import launch import random from correct_rotation_for_angle import process_images async def page_evaluate(page): await page.evaluate( '''() =>{ Object.defineProperties(navigator,{ webdriver:{ get: () => false } });window.screen.width=1366; }''') await page.evaluate('''() =>{ window.navigator.chrome = { runtime: {}, };}''') await page.evaluate('''() =>{ Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); }''') await page.evaluate('''() =>{ Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5,6], }); }''') async def main(username, password, width, height): browser = await launch({'headless': False,#可以无头 'slowMo':1.3, 'userDataDir': './userdata', 'args': [ f'--window-size={width},{height}' '--disable-extensions', '--hide-scrollbars', '--disable-bundled-ppapi-flash', '--mute-audio', '--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu', '--disable-infobars' ], 'dumpio': True }) page = await browser.newPage() # 设置浏览器头部 await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36") # 设置浏览器大小 await page.setViewport({'width': width, 'height': height}) # 注入js,防反爬 await page_evaluate(page) res=await page.goto('http://index.baidu.com/v2/index.html') await page.waitFor(2000) # 获取登录位置的文字,如果是登录就登录,不是就使用cookie elements = await (await(await page.querySelector('.username-text')).getProperty('textContent')).jsonValue() if elements == "登录": await page.click(".username-text") await asyncio.sleep(1.6) # 填写用户名 await page.type('.pass-text-input-userName', username) # 填写密码 await page.hover(".pass-text-input-password") await asyncio.sleep(0.5) await page.mouse.down() await asyncio.sleep(random.random()) await page.mouse.up() # await page.click(".pass-text-input-password") await page.type('.pass-text-input-password', password) # 点击登录 await page.mouse.move(page.mouse._x+random.randint(50,100), page.mouse._y+random.randint(100,200), options={"step": 3}) await page.hover(".pass-button-submit") await page.mouse.down() await asyncio.sleep(random.random()) await page.mouse.up() # await page.click(".pass-button-submit") await asyncio.sleep(2) rotImg = await page.querySelector('.vcode-spin-img') # 如果有验证码就去旋转 while rotImg: img_url=await (await(rotImg).getProperty("src")).jsonValue() angle=process_images(img_url)[0] bottom_line=await (await(await page.querySelector(".vcode-spin-bottom")).getProperty("offsetWidth")).jsonValue() button_line = await (await(await page.querySelector(".vcode-spin-button")).getProperty("offsetWidth")).jsonValue() b=bottom_line-button_line move_line = angle/360*b await try_validation(page,move_line) # 停个3秒 await asyncio.sleep(3) rotImg = await page.querySelector('.vcode-spin-img') #如果有需要短信验证码的弹窗的就费了 no_in = await page.querySelector(".pass-forceverify-wrapper .forceverify-header-a") if no_in: print("有短信验证码废了") await no_in.click() # 停个2秒 await asyncio.sleep(2) cookies = await page.cookies() # 无头模式可以打印一下用户名看看能不能登录 elements = await (await(await page.querySelector('.username-text')).getProperty('textContent')).jsonValue() print(elements) await browser.close() if elements == "登录": return None return cookies async def try_validation(page, distance=308): # 将距离拆分成两段,模拟正常人的行为 distance1 = distance - 10 distance2 = 10 btn_position = await page.evaluate(''' () =>{ return { x: document.querySelector('.vcode-spin-button').getBoundingClientRect().x, y: document.querySelector('.vcode-spin-button').getBoundingClientRect().y, width: document.querySelector('.vcode-spin-button').getBoundingClientRect().width, height: document.querySelector('.vcode-spin-button').getBoundingClientRect().height }} ''') x = btn_position['x'] + btn_position['width'] / 2 y = btn_position['y'] + btn_position['height'] / 2 # print(btn_position) await page.mouse.move(x, y) await page.mouse.down() await page.mouse.move(x + distance1, y, {'steps': 30}) await page.waitFor(800) await page.mouse.move(x + distance1 + distance2, y, {'steps': 20}) await page.waitFor(800) await page.mouse.up() def baidu_login(username, password, width, height): return asyncio.get_event_loop().run_until_complete(main(username, password, width, height)) if __name__ == "__main__": width, height = 1366, 768 username = '你的账户' password = '你的密码' cookies = baidu_login(username, password, width, height) print(cookies) if cookies: string_cookies = "" for each in cookies: string_cookies += f"{each['name']}={each['value']};" |
最后
完整的项目放在https://github.com/ShortCJL/RotateCode,注意:需要把模型下载下来解压到根目录