環境補充

        配置訓練環境時,有一些包沒有安裝,這裏進行一些補充,就記得這幾個了,其餘的忘了,大家缺啥補啥吧,反正都是pip安裝。

conda activate rf-detr
pip install onnx
pip install onnxruntime
pip install onnxsim

onnx模型導出

from rfdetr import RFDETRBase,RFDETRLarge,RFDETRNano,RFDETRSmall,RFDETRMedium

model = RFDETRNano(pretrain_weights="/home/project_python/rf-detr/runs/nano/checkpoint_best_total.pth")
model.export(output_dir = "/home/project_python/rf-detr/runs/nano/")

onnx模型推理

        這裏作者實現了兩個版本的預處理,一個使用torch一個使用numpy,經過測試,torch預處理的圖像推理效果與pt模型推理效果一致,numpy版本推理結果與pt模型略有出入,但影響不大,由於工程端一般不使用torch,大家可以選擇numpy版本的預處理,將其轉換成C++代碼進行部署。

import cv2
import onnxruntime
import numpy as np
import random
import time
import torchvision.transforms.functional as F


def xywh2xyxy(x):
    """
    將邊界框座標從 (x, y, width, height) 轉換為 (x1, y1, x2, y2)
    
    Args:
        x (np.ndarray): 輸入邊界框數組
        
    Returns:
        np.ndarray: 轉換後的邊界框數組
    """
    y = np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
    y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
    y[..., 2] = x[..., 0] + x[..., 2] / 2  # bottom right x
    y[..., 3] = x[..., 1] + x[..., 3] / 2  # bottom right y
    return y


def softmax(x):
    """
    Softmax函數
    
    Args:
        x (np.ndarray): 輸入數組
        
    Returns:
        np.ndarray: 經過softmax處理的數組
    """
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

def sigmoid(x):
    """
    Sigmoid函數

    Args:
        x (np.ndarray): 輸入數組

    Returns:
        np.ndarray: 經過sigmoid處理的數組
    """
    return 1 / (1 + np.exp(-x))


def get_optimal_font_scale(image_shape, text, font_face=cv2.FONT_HERSHEY_SIMPLEX):
    """
    根據圖像分辨率自動計算最優字體大小
    """
    # 獲取圖像的最小邊長
    min_dimension = min(image_shape[0], image_shape[1])
    
    # 基於圖像尺寸計算基礎字體大小
    base_font_scale = min_dimension / 1000.0
    
    # 確保字體大小在合理範圍內
    font_scale = max(0.5, min(base_font_scale, 2.0))
    
    return font_scale


def generate_distinct_color():
    """
    生成具有明顯區分度的隨機顏色
    """
    # 使用HSV色彩空間生成顏色,確保顏色具有較高的飽和度和亮度
    h = random.randint(0, 179)   # OpenCV中色調範圍是0-179
    s = random.randint(150, 255) # 飽和度:150-255 (避免過淡的顏色)
    v = random.randint(150, 255) # 亮度:150-255 (避免過暗的顏色)
    
    # 將HSV轉換為BGR
    hsv = np.uint8([[[h, s, v]]])
    bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
    return tuple(int(x) for x in bgr[0][0])


def get_color_for_class(class_id, color_map):
    """
    為類別ID獲取顏色,如果不存在則生成新顏色
    """
    if class_id not in color_map:
        color_map[class_id] = generate_distinct_color()
    return color_map[class_id]


def preprocess_image(image, target_size=(384, 384)):
    """
    預處理圖像

    Args:
        image (np.ndarray): 輸入圖像
        target_size (tuple): 目標圖像尺寸

    Returns:
        tuple: (處理後的圖像, 縮放因子, 原始尺寸)
    """
    # 保存原始尺寸
    h, w = image.shape[:2]
    w_rate = w / target_size[0]
    h_rate = h / target_size[1]

    # 調整圖像大小
    resized_image = cv2.resize(image, target_size,interpolation=cv2.INTER_LINEAR)

    # 顏色空間轉換和歸一化
    rgb_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB)
    normalized_image = rgb_image.astype(np.float32) / 255.0

    # 標準化
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    normalized_image = (normalized_image - mean) / std

    # 轉換維度並擴展批次維度
    processed_image = np.transpose(normalized_image, (2, 0, 1))
    processed_image = np.expand_dims(processed_image, axis=0).astype(np.float32)

    return processed_image, (w_rate, h_rate), (w, h)


def preprocess_image_torch(image, target_size=(384, 384)):
    """
    預處理圖像

    Args:
        image (np.ndarray): 輸入圖像
        target_size (tuple): 目標圖像尺寸

    Returns:
        tuple: (處理後的圖像, 縮放因子, 原始尺寸)

    """
    img_tensor = image
    img_tensor = F.to_tensor(img_tensor).to("cuda:0")

    h, w = img_tensor.shape[1:]
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    img_tensor = F.normalize(img_tensor, mean, std)
    img_tensor = F.resize(img_tensor, target_size)

    # 保存原始尺寸
    h, w = image.shape[:2]
    w_rate = w / target_size[0]
    h_rate = h / target_size[1]

    return np.expand_dims(img_tensor.cpu().numpy(), axis=0).astype(np.float32), (w_rate, h_rate), (w, h)


def postprocess_detections(bboxes, labels, target_size=(384, 384)):
    """
    後處理檢測結果
    
    Args:
        bboxes (np.ndarray): 邊界框數組
        labels (np.ndarray): 標籤數組
        target_size (tuple): 目標圖像尺寸
        
    Returns:
        list: 處理後的檢測結果列表
    """
    results = []
    for i in range(len(bboxes)):
        # 轉換邊界框格式
        bbox = xywh2xyxy(bboxes[i])
        
        # 縮放邊界框座標
        bbox[0] *= target_size[1]  # x座標乘以寬度
        bbox[2] *= target_size[1]  # x座標乘以寬度
        bbox[1] *= target_size[0]  # y座標乘以高度
        bbox[3] *= target_size[0]  # y座標乘以高度
        
        # 應用softmax並獲取類別和置信度
        label_list = sigmoid(labels[i])
        class_id = np.argmax(label_list)
        conf = label_list[class_id]
        
        results.append({
            'bbox': bbox,
            'class_id': class_id,
            'confidence': conf
        })
        
    return results


def draw_detections(image, detections, scale_factors, conf_threshold=0.5):
    """
    在圖像上繪製檢測結果
    
    Args:
        image (np.ndarray): 輸入圖像
        detections (list): 檢測結果列表
        scale_factors (tuple): 縮放因子 (w_rate, h_rate)
        conf_threshold (float): 置信度閾值
        
    Returns:
        np.ndarray: 繪製了檢測結果的圖像
    """
    w_rate, h_rate = scale_factors
    result_image = image.copy()
    detection_count = 0
    
    # 存儲類別顏色的字典
    color_map = {}
    
    # 根據圖像分辨率自動調整字體大小
    font_scale = get_optimal_font_scale(image.shape, "SampleText")
    thickness = max(1, int(font_scale * 2))  # 根據字體大小調整線條粗細
    
    for detection in detections:
        conf = detection['confidence']
        if conf > conf_threshold:
            detection_count += 1
            bbox = detection['bbox']
            class_id = detection['class_id']
            
            # 為每個類別ID獲取顏色
            color = get_color_for_class(class_id, color_map)
            
            # 在原始尺寸圖像上繪製框
            x1 = int(bbox[0] * w_rate)
            y1 = int(bbox[1] * h_rate)
            x2 = int(bbox[2] * w_rate)
            y2 = int(bbox[3] * h_rate)
            
            # 繪製邊界框
            cv2.rectangle(result_image, (x1, y1), (x2, y2), color, thickness)
            
            # 在同一行顯示類別名稱和置信度,帶背景色
            label = f"{class_id} {conf:.2f}"
            # 計算文本尺寸
            (text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
            
            # 繪製文本背景框
            cv2.rectangle(result_image, (x1, y1 - text_height - baseline - 2),
                         (x1 + text_width, y1), color, -1)
            
            # 繪製文本
            cv2.putText(result_image, label, (x1, y1 - baseline), 
                       cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), thickness)
            
            print(f"檢測框: 類別={class_id}, 置信度={conf:.2f}")
            
    print(f"總共檢測到 {detection_count} 個置信度高於閾值的目標")
    return result_image


def run_detection(model_path, image_path, conf_threshold=0.5):
    """
    運行目標檢測
    
    Args:
        model_path (str): ONNX模型路徑
        image_path (str): 圖像路徑
        conf_threshold (float): 置信度閾值
    """
    # 初始化模型
    model = onnxruntime.InferenceSession(model_path,providers=['CPUExecutionProvider'])
    
    # 讀取圖像
    image = cv2.imread(image_path)
    src_image = image.copy()
    
    # 預處理圖像
    processed_image, scale_factors, original_size = preprocess_image(image)
    
    # 模型推理
    start = time.time()
    output = model.run(["dets", "labels"], {"input": processed_image})
    print(f"inference time: {time.time() - start}")
    bboxes = output[0][0]
    labels = output[1][0]
    
    # 後處理檢測結果
    detections = postprocess_detections(bboxes, labels)
    
    # 繪製檢測結果
    result_image = draw_detections(src_image, detections, scale_factors, conf_threshold)
    
    # 保存結果
    cv2.imwrite("result_onnx.jpg", result_image)


# 主程序
if __name__ == "__main__":
    model_path = "/home/project_python/rf-detr/runs/nano/inference_model.onnx"
    image_path = "/home/project_python/rf-detr/dataset/dtrain_20251030_v1/images/test/gacdbx_1_17424bef_1600_20251029113538995.jpg"
    start = time.time()
    run_detection(model_path, image_path, conf_threshold=0.9)
    print(f"total time: {time.time() - start}")

結語

        onnx模型推理速度為72ms每張,下篇文章將更新tensorrt推理。