如何利用深度学习识别滑块验证码缺口位置

极验滑块验证码缺口位置的计算有2种方式，第一种通过计算像素点差值来定位缺口位置，在JS逆向案例——天眼查过极验滑块验证码一文中介绍过，今天这篇文章介绍第二种方式，如何利用深度学习识别缺口的位置。这里采用华为云ModelArts云服务去训练缺口识别模型。

数据集的准备

收集验证码图片

先上代码，如下：

import json
import math
import re
import time
from urllib.parse import urljoin

import requests
from PIL import Image


div_offset = [
    {"x": -157, "y": -58},
    {"x": -145, "y": -58},
    {"x": -265, "y": -58},
    {"x": -277, "y": -58},
    {"x": -181, "y": -58},
    {"x": -169, "y": -58},
    {"x": -241, "y": -58},
    {"x": -253, "y": -58},
    {"x": -109, "y": -58},
    {"x": -97, "y": -58},
    {"x": -289, "y": -58},
    {"x": -301, "y": -58},
    {"x": -85, "y": -58},
    {"x": -73, "y": -58},
    {"x": -25, "y": -58},
    {"x": -37, "y": -58},
    {"x": -13, "y": -58},
    {"x": -1, "y": -58},
    {"x": -121, "y": -58},
    {"x": -133, "y": -58},
    {"x": -61, "y": -58},
    {"x": -49, "y": -58},
    {"x": -217, "y": -58},
    {"x": -229, "y": -58},
    {"x": -205, "y": -58},
    {"x": -193, "y": -58},
    {"x": -145, "y": 0},
    {"x": -157, "y": 0},
    {"x": -277, "y": 0},
    {"x": -265, "y": 0},
    {"x": -169, "y": 0},
    {"x": -181, "y": 0},
    {"x": -253, "y": 0},
    {"x": -241, "y": 0},
    {"x": -97, "y": 0},
    {"x": -109, "y": 0},
    {"x": -301, "y": 0},
    {"x": -289, "y": 0},
    {"x": -73, "y": 0},
    {"x": -85, "y": 0},
    {"x": -37, "y": 0},
    {"x": -25, "y": 0},
    {"x": -1, "y": 0},
    {"x": -13, "y": 0},
    {"x": -133, "y": 0},
    {"x": -121, "y": 0},
    {"x": -49, "y": 0},
    {"x": -61, "y": 0},
    {"x": -229, "y": 0},
    {"x": -217, "y": 0},
    {"x": -193, "y": 0},
    {"x": -205, "y": 0}
]


def recover_pic(pic_path, new_pic_path):
    unordered_pic = Image.open(pic_path)
    ordered_pic = unordered_pic.copy()

    # 裁剪并拼接
    for i, d in enumerate(div_offset):
        im = unordered_pic.crop((math.fabs(d['x']), math.fabs(d['y']), math.fabs(d['x']) + 10, math.fabs(d['y']) + 58))
        # 上半区
        if d['y'] != 0:
            ordered_pic.paste(im, (10 * (i % (len(div_offset) // 2)), 0), None)
        else:
            ordered_pic.paste(im, (10 * (i % (len(div_offset) // 2)), 58), None)

    ordered_pic.save(new_pic_path)


def download_pic(store_name):
    headers = {
        'Accept': 'application/json, text/plain, */*',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Type': 'application/json',
        'Origin': 'https://www.tianyancha.com',
        'Pragma': 'no-cache',
        'Referer': 'https://www.tianyancha.com/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 '
                      '(KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
        'X-TYCID': '70c68810ddbd11eda0a455532f9618b6',
        'sec-ch-ua': '"Chromium";v="112", "Google Chrome";v="112", "Not:A-Brand";v="99"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"macOS"',
    }

    params = {
        '_': '1683799958183',
    }

    response = requests.get('https://napi-huawei.tianyancha.com/validate/init', params=params, headers=headers)
    resp = json.loads(response.text)
    data = json.loads(resp['data'])

    gt = data['gt']
    challenge = data['challenge']

    headers = {
        'Accept': '*/*',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Pragma': 'no-cache',
        'Referer': 'https://www.tianyancha.com/',
        'Sec-Fetch-Dest': 'script',
        'Sec-Fetch-Mode': 'no-cors',
        'Sec-Fetch-Site': 'cross-site',
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/112.0.0.0 Safari/537.36',
        'sec-ch-ua': '"Chromium";v="112", "Gostiogle Chrome";v="112", "Not:A-Brand";v="99"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"macOS"',
    }

    response = requests.get(
        f'https://api.geevisit.com/get.php?gt={gt}&challenge={challenge}&product=popup&offline=false&'
        f'api_server=api.geevisit.com&protocol=https://&type=slide&path=/static/js/geetest.6.0.9.js'
        f'&callback=geetest_{str(round(time.time() * 1000))}',
        headers=headers,
    )

    bg_url = urljoin("https://static.geetest.com/", re.findall("\"bg\": \"(.*?)\"", response.text)[0])
    print("下载验证码图片，地址：" + bg_url)
    resp = requests.get(bg_url)
    with open(store_name, "wb+") as f:
        f.write(resp.content)


if __name__ == '__main__':
    for i in range(10):
        name = "input/" + str(i) + ".jpg"
        download_pic(name)
        print("保存乱序验证码图片，保存位置：" + name)
        new_name = "output/" + str(i) + ".jpg"
        recover_pic(name, new_name)
        print("还原乱序验证码图片，保存位置：" + new_name)
        time.sleep(0.5)