# -------------------------------------------------------- # Based on yolov10 # https://github.com/THU-MIG/yolov10/app.py # --------------------------------------------------------' import gradio as gr import cv2 import tempfile from ultralytics import YOLO import threading from fastapi import FastAPI from pydantic import BaseModel import uvicorn import logging # 设置日志格式和级别 logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s - %(message)s') def yolov12_inference(image, video, model_id, image_size, conf_threshold): model = YOLO(model_id) if image: results = model.predict(source=image, imgsz=image_size, conf=conf_threshold) annotated_image = results[0].plot() return annotated_image[:, :, ::-1], None else: video_path = tempfile.mktemp(suffix=".webm") with open(video_path, "wb") as f: with open(video, "rb") as g: f.write(g.read()) cap = cv2.VideoCapture(video_path) fps = cap.get(cv2.CAP_PROP_FPS) frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) output_video_path = tempfile.mktemp(suffix=".webm") out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'vp80'), fps, (frame_width, frame_height)) while cap.isOpened(): ret, frame = cap.read() if not ret: break results = model.predict(source=frame, imgsz=image_size, conf=conf_threshold) annotated_frame = results[0].plot() out.write(annotated_frame) cap.release() out.release() return None, output_video_path def yolov12_inference_for_examples(image, model_path, image_size, conf_threshold): annotated_image, _ = yolov12_inference(image, None, model_path, image_size, conf_threshold) return annotated_image def app(): with gr.Blocks(): with gr.Row(): with gr.Column(): image = gr.Image(type="pil", label="Image", visible=True) video = gr.Video(label="Video", visible=False) input_type = gr.Radio( choices=["Image", "Video"], value="Image", label="Input Type", ) model_id = gr.Dropdown( label="Model", choices=[ "yolov12n.pt", "yolov12s.pt", "yolov12m.pt", "yolov12l.pt", "yolov12x.pt", ], value="yolov12m.pt", ) image_size = gr.Slider( label="Image Size", minimum=320, maximum=1280, step=32, value=640, ) conf_threshold = gr.Slider( label="Confidence Threshold", minimum=0.0, maximum=1.0, step=0.05, value=0.25, ) yolov12_infer = gr.Button(value="Detect Objects") with gr.Column(): output_image = gr.Image(type="numpy", label="Annotated Image", visible=True) output_video = gr.Video(label="Annotated Video", visible=False) def update_visibility(input_type): image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False) video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True) output_image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False) output_video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True) return image, video, output_image, output_video input_type.change( fn=update_visibility, inputs=[input_type], outputs=[image, video, output_image, output_video], ) def run_inference(image, video, model_id, image_size, conf_threshold, input_type): if input_type == "Image": return yolov12_inference(image, None, model_id, image_size, conf_threshold) else: return yolov12_inference(None, video, model_id, image_size, conf_threshold) yolov12_infer.click( fn=run_inference, inputs=[image, video, model_id, image_size, conf_threshold, input_type], outputs=[output_image, output_video], ) gr.Examples( examples=[ [ "ultralytics/assets/bus.jpg", "yolov12s.pt", 640, 0.25, ], [ "ultralytics/assets/zidane.jpg", "yolov12x.pt", 640, 0.25, ], ], fn=yolov12_inference_for_examples, inputs=[ image, model_id, image_size, conf_threshold, ], outputs=[output_image], cache_examples='lazy', ) gradio_app = gr.Blocks() with gradio_app: gr.HTML( """

YOLOv12: Attention-Centric Real-Time Object Detectors

""") gr.HTML( """

arXiv | github

""") with gr.Row(): with gr.Column(): app() def start_gradio(): gradio_app.launch(server_name="0.0.0.0", server_port=7860) # FastAPI部分 app_fastapi = FastAPI() class TrainParams(BaseModel): """ 用于接收/yolov12/train接口的训练参数,所有参数均需前端传入。 """ data: str # 数据集配置文件路径 epochs: int # 训练轮数 batch: int # 批次大小 imgsz: int # 输入图片尺寸 scale: float # 随机缩放增强比例 mosaic: float # mosaic数据增强概率 mixup: float # mixup数据增强概率 copy_paste: float # copy-paste数据增强概率 device: str # 训练设备 project: str # 工程名 name: str # 实验名 exist_ok: bool # 是否允许覆盖同名目录 @app_fastapi.post("/yolov12/train") def yolov12_train(params: TrainParams): """ RESTful POST接口:/yolov12/train 接收训练参数,调用YOLO模型训练,并返回训练结果。 返回格式:{"code": 0/1, "msg": "success/错误原因", "result": 训练结果或None} """ logging.info("收到/yolov12/train训练请求") logging.info(f"请求参数: {params}") try: model = YOLO("yolov12.yaml") # 如有yolov12n.yaml可替换 logging.info("开始模型训练...") results = model.train( data=params.data, epochs=params.epochs, batch=params.batch, imgsz=params.imgsz, scale=params.scale, mosaic=params.mosaic, mixup=params.mixup, copy_paste=params.copy_paste, device=params.device, project=params.project, name=params.name, exist_ok=params.exist_ok, ) logging.info("模型训练完成") logging.info(f"训练结果: save_dir={results.save_dir}, metrics={results.metrics}, epoch={results.epoch}, best_fitness={getattr(results, 'best_fitness', None)}") return { "code": 0, "msg": "success", "result": { "save_dir": str(results.save_dir), "metrics": str(results.metrics), "epoch": results.epoch, "best_fitness": getattr(results, "best_fitness", None) } } except Exception as e: logging.error(f"训练过程发生异常: {e}") return { "code": 1, "msg": str(e), "result": None } if __name__ == "__main__": threading.Thread(target=start_gradio, daemon=True).start() uvicorn.run(app_fastapi, host="0.0.0.0", port=8000)