编写鼠标驱动利用脚本

沿用我发的另一篇笔记
将logitech.driver.dll放到当前目录下。新建一个logie_g1.py文件用来存放利用脚本，内容如下

import ctypes
import os
# import pynput
# import winsound
# import random
# import pyautogui
import time

try:
    # 获取当前绝对路径
    root = os.path.abspath(os.path.dirname(__file__))
    driver = ctypes.CDLL(f'{root}/logitech.driver.dll')
    ok = driver.device_open() == 1  # 该驱动每个进程可打开一个实例
    if not ok:
        print('Error, GHUB or LGS driver not found')
except FileNotFoundError:
    print(f'Error, DLL file not found')
class Logitech:
 
    class mouse:
        """
        code: 1:左键, 2:中键, 3:右键
        """
        WAIT_TIME = 0.5 # 等待时间
        RANDOM_NUM = 0.1 # 最大时间随机数
        @staticmethod
        def press(code):
            if not ok:
                return
            driver.mouse_down(code)
 
        @staticmethod
        def release(code):
            if not ok:
                return
            driver.mouse_up(code)
 
        @staticmethod
        def click(code):
            if not ok:
                return
            driver.mouse_down(code)
            driver.mouse_up(code)
 
        @staticmethod
        def scroll(a):
            """
            a:没搞明白
            """
            if not ok:
                return
            driver.scroll(a)
 
        @staticmethod
        def move(x, y):
            """
            相对移动, 绝对移动需配合 pywin32 的 win32gui 中的 GetCursorPos 计算位置
            pip install pywin32 -i https://pypi.tuna.tsinghua.edu.cn/simple
            x: 水平移动的方向和距离, 正数向右, 负数向左
            y: 垂直移动的方向和距离
            """
            if not ok:
                return
            if x == 0 and y == 0:
                return
            driver.moveR(x, y, True)

class RunMovingTo:
    def __init__(self,move_x_distance,move_y_distance):
        self.move_x_distance = move_x_distance
        self.move_y_distance = move_y_distance
        self.log_mouse = Logitech.mouse
    def quick_move(self):
        self.log_mouse.move(self.move_x_distance,self.move_y_distance)

def main():
    time.sleep(3)
    #run_lgt = RunMovingTo(-50961,2)
    run_lgt = RunMovingTo(1920,1080)
    # for i in range(110):
    #     time.sleep(1)
    #     run_lgt.quick_move()
    run_lgt.quick_move()
    print(type(run_lgt))
    # time.sleep(1)
    # run_lgt.quick_move()
if __name__=="__main__":
    main()

接着我们考虑以下问题：
1.在fps游戏中，鼠标到屏幕上的映射变成了角度的映射，而不是像素的映射，这点如何解决？
2.在对局中，我们只能是任意一方，除非是死斗，如何避免锁到队友身上？
3.当某一次推理识别到多个目标时，应该锁谁？

模型能每次推理，都会给出n行数据，n为检测到目标数量，每行5个，与打标签时标签文件每行意义相同。
针对第一个问题，我们这样想，在切到游戏里页面时，我们的鼠标位置一直在准心处，某一帧，一个敌人在篇左上角的位置，我们获取到这一帧上敌人的坐标，减去我们鼠标的坐标，得到在图片上相差的坐标(loss_x,loss_y)，这俩数值应该都是负的。拿到这俩值，这意味着我们只需要调用鼠标驱动quick_move(loss_x,loss_y)，就能让鼠标靠近敌人了，只要给这俩值乘以一个倍率，合理设置这个倍率让quick_move后准心不超过敌人，每次识别都让quick_move一次，这样每次识别不久能让鼠标趋近于目标了？
针对第二个问题，我们加上判断条件即可，只锁头，并且判断是哪一方的头即可。
针对第三个问题，应该通过就近原则，因为在游戏中，我们应该始终保证每次quick_move都是锁的同一个目标，用勾股定理即可。
额外一个问题，游戏中我们并不需要全屏幕地去识别，因为有各种UI的阻挡，而且为了节省硬件资源，提高帧率，我们只截取并检测屏幕正中心的640*640区域即可。
接下来涉及到坐标变换的计算，因为yolov5给出的矩形中心的位置信息是比例形式的，请看以下图片：

这里(x,y)表示框内人头中心的坐标，这是相对于640*640检测框的，(x,y)=(640*px,640*py)，其中px和py是yolov5识别出的人头坐标比率，也就是每行数据的第2、3个数据。x1，y1分别为屏幕左边缘和上边缘到检测框的距离，分别加上x和y就能得到人头相对于整个屏幕的真实坐标，减去屏幕中心坐标x2,y2就得到鼠标应该移动方向了。直接quick_move(x2-x1,y2-y1)即可。

代码编写，胶水粘合

接下来就是拼凑写代码了，从yolov5自带的推理模块中找有用的代码copy过来。
至此附上外挂脚本代码。注意以下几点：
1.如果屏幕是别的分辨率的记得按照这个计算方法修改代码。
2.适当调整置信度conf_thres和交并比iou_thres这俩参数，置信度尽量大一点，模型识别时认为概率大于置信度的目标才会返回结果，交并比尽量小一点，否则一个目标会被识别出多个几乎重叠的矩形框。
3.修改只锁某个阵营的头在注释just_head处，记得按照classes.txt文件中的来改。

import os
import sys

import torch
import tkinter,pyautogui,time,win32gui,win32con
from models.common import DetectMultiBackend
from utils.dataloaders import LoadScreenshots
from utils.general import check_img_size, non_max_suppression, scale_boxes, xyxy2xywh
from utils.torch_utils import select_device
from pathlib import Path
from logie_g1 import RunMovingTo
import cv2
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0]  # YOLOv5 root directory
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative


def run(
        weights=ROOT / 'yolov5s.pt',  # model path or triton URL
        # source=ROOT / 'data/images',  # file/dir/URL/glob/screen/0(webcam)
        source=ROOT / 'cs_black.mp4',  # file/dir/URL/glob/screen/0(webcam)
        data=ROOT / 'data/coco128.yaml',  # dataset.yaml path
        imgsz=(640,640),  # inference size (height, width)
        # conf_thres=0.55,  # confidence threshold
        conf_thres=0.75,  # confidence threshold
        iou_thres=0.15,  # NMS IOU threshold
        # iou_thres=0.45,  # NMS IOU threshold
        max_det=1000,  # maximum detections per image
        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        classes=None,  # filter by class: --class 0, or --class 0 2 3
        agnostic_nms=False,  # class-agnostic NMS
        augment=False,  # augmented inference
        visualize=False,  # visualize features
        half=False,  # use FP16 half-precision inference
        dnn=False,  # use OpenCV DNN for ONNX inference
):
    device = select_device(device)
    model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
    stride, names, pt = model.stride, model.names, model.pt
    imgsz = check_img_size(imgsz, s=stride)
    shot_width,shot_height = get_resolution()
    dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
    for path, im, im0s, vid_cap, s in dataset:
        aims = []
        im = torch.from_numpy(im).to(model.device)
        im = im.half() if model.fp16 else im.float()
        im /= 255
        if len(im.shape) == 3:
            im = im[None]
        pred = model(im, augment=augment, visualize=visualize)
        pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
        for i, det in enumerate(pred):
            im0 = im0s.copy()
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]
            if len(det):
                det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
                for *xyxy, conf, cls in reversed(det):
                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()
                    line = (cls, *xywh)
                    # print(('%g ' * len(line)).rstrip() % line,xywh,type(xywh))
                    # if cls!=0:
                    #     continue


                    # just_head
                    if cls==0 or cls == 2:
                        continue
                    aims.append(line)
        # #this for moving mouse
        current_x,current_y = pyautogui.position()
        aim = get_nearest_center_aim(aims, current_x , current_y, shot_width, shot_height)
        try:
            movx,movy = calculate_mouse_offset(aim,current_x,current_y,shot_width,shot_height)
        except:
            movx,movy = (0,0)
        run_lgt = RunMovingTo(int(movx*0.6),int(movy*0.6))
        #print(type(run_lgt))
        run_lgt.quick_move()


def get_nearest_center_aim(aims, current_mouse_x, current_mouse_y, shot_width, shot_height):
    """筛选离鼠标最近的label"""
    dist_list = []
    aims_copy = aims.copy()
    aims_copy = [x for x in aims_copy]
    if len(aims_copy) == 0:
        return
    for det in aims_copy:
        _, x_c, y_c, _, _ = det
        # dist = (shot_width * float(x_c) - current_mouse_x) ** 2 + (shot_height * float(y_c) - current_mouse_y) ** 2

        #640*640
        dist = (640 * float(x_c) - 320) ** 2 + (640 * float(y_c) - 320) ** 2

        # #320*320
        # dist = (320 * float(x_c) - 160) ** 2 + (320 * float(y_c) - 160) ** 2

        dist_list.append(dist)
    return aims_copy[dist_list.index(min(dist_list))]
def calculate_mouse_offset(aim, current_x, current_y,shot_width,shot_height):
    # print(aim,"this is aim!!!",type(aim))
    tag, target_x, target_y, target_width, target_height = aim
    # movex,movey = target_x*shot_width-current_x,target_y*shot_height-current_y
    # print(target_x,target_y,resolution_x//2,resolution_x//2,movex,movey)

    #640*640
    tarx,tary = (target_x*640+640),(target_y*640+220)
    # #320*320
    # tarx,tary = (target_x*320+800),(target_y*320+380)
    movex,movey = tarx-current_x,tary-current_y
    return movex,movey
def get_resolution():
    """获取屏幕分辨率"""
    screen = tkinter.Tk()
    resolution_x = screen.winfo_screenwidth()
    resolution_y = screen.winfo_screenheight()
    screen.destroy()
    return resolution_x, resolution_y
if __name__ == '__main__':
    # run(weights="./runs/train/exp5/weights/best.pt",source="screen 0 0 0 1920 1080")
    # run(weights="./yolov5s.pt",source="screen 0 640 220 640 640")
    # run(weights="./yolov5s.pt",source="screen 0 800 380 320 320")

    # #640*640
    # run(weights="./csmodel.engine",source="screen 0 640 220 640 640")

    run(weights="./yolov5s.pt",source="screen 0 640 220 640 640")
    # #320*320
    # run(weights="./csmodel.engine",source="screen 0 800 380 320 320")

同时这里附上@xrect1fy提供的重绘脚本，开一个新窗口，用cv实时重绘检测到的目标，然后用obs窗口捕捉就能录制了，便于展示外挂效果，我是两个1920*1080屏幕扩展的，运行后3840*1080截图为如下效果：

至此，外g脚本开发完毕，想要优化目前考虑两条路：
1.使用TensorRT优化pytorch的pt文件，加快模型推理。
2.使用c++重构代码。
第一条后续考虑会水一篇文章。
在此，感谢@xrect1fy提供的帮助，数据集是他采集的，模型是他训练的，重绘脚本是他写的，如果有需求可以联系他或者我。