# --------------------------------------------------------
# Based on yolov10
# https://github.com/THU-MIG/yolov10/app.py
# --------------------------------------------------------'

import gradio as gr
import cv2
import tempfile
from ultralytics import YOLO
import threading
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn
import logging

# 设置日志格式和级别
logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s - %(message)s')

def yolov12_inference(image, video, model_id, image_size, conf_threshold):
    model = YOLO(model_id)
    if image:
        results = model.predict(source=image, imgsz=image_size, conf=conf_threshold)
        annotated_image = results[0].plot()
        return annotated_image[:, :, ::-1], None
    else:
        video_path = tempfile.mktemp(suffix=".webm")
        with open(video_path, "wb") as f:
            with open(video, "rb") as g:
                f.write(g.read())

        cap = cv2.VideoCapture(video_path)
        fps = cap.get(cv2.CAP_PROP_FPS)
        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        output_video_path = tempfile.mktemp(suffix=".webm")
        out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'vp80'), fps, (frame_width, frame_height))

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            results = model.predict(source=frame, imgsz=image_size, conf=conf_threshold)
            annotated_frame = results[0].plot()
            out.write(annotated_frame)

        cap.release()
        out.release()

        return None, output_video_path


def yolov12_inference_for_examples(image, model_path, image_size, conf_threshold):
    annotated_image, _ = yolov12_inference(image, None, model_path, image_size, conf_threshold)
    return annotated_image


def app():
    with gr.Blocks():
        with gr.Row():
            with gr.Column():
                image = gr.Image(type="pil", label="Image", visible=True)
                video = gr.Video(label="Video", visible=False)
                input_type = gr.Radio(
                    choices=["Image", "Video"],
                    value="Image",
                    label="Input Type",
                )
                model_id = gr.Dropdown(
                    label="Model",
                    choices=[
                        "yolov12n.pt",
                        "yolov12s.pt",
                        "yolov12m.pt",
                        "yolov12l.pt",
                        "yolov12x.pt",
                    ],
                    value="yolov12m.pt",
                )
                image_size = gr.Slider(
                    label="Image Size",
                    minimum=320,
                    maximum=1280,
                    step=32,
                    value=640,
                )
                conf_threshold = gr.Slider(
                    label="Confidence Threshold",
                    minimum=0.0,
                    maximum=1.0,
                    step=0.05,
                    value=0.25,
                )
                yolov12_infer = gr.Button(value="Detect Objects")

            with gr.Column():
                output_image = gr.Image(type="numpy", label="Annotated Image", visible=True)
                output_video = gr.Video(label="Annotated Video", visible=False)

        def update_visibility(input_type):
            image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False)
            video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True)
            output_image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False)
            output_video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True)

            return image, video, output_image, output_video

        input_type.change(
            fn=update_visibility,
            inputs=[input_type],
            outputs=[image, video, output_image, output_video],
        )

        def run_inference(image, video, model_id, image_size, conf_threshold, input_type):
            if input_type == "Image":
                return yolov12_inference(image, None, model_id, image_size, conf_threshold)
            else:
                return yolov12_inference(None, video, model_id, image_size, conf_threshold)


        yolov12_infer.click(
            fn=run_inference,
            inputs=[image, video, model_id, image_size, conf_threshold, input_type],
            outputs=[output_image, output_video],
        )

        gr.Examples(
            examples=[
                [
                    "ultralytics/assets/bus.jpg",
                    "yolov12s.pt",
                    640,
                    0.25,
                ],
                [
                    "ultralytics/assets/zidane.jpg",
                    "yolov12x.pt",
                    640,
                    0.25,
                ],
            ],
            fn=yolov12_inference_for_examples,
            inputs=[
                image,
                model_id,
                image_size,
                conf_threshold,
            ],
            outputs=[output_image],
            cache_examples='lazy',
        )

gradio_app = gr.Blocks()
with gradio_app:
    gr.HTML(
        """
    <h1 style='text-align: center'>
    YOLOv12: Attention-Centric Real-Time Object Detectors
    </h1>
    """)
    gr.HTML(
        """
        <h3 style='text-align: center'>
        <a href='https://arxiv.org/abs/2502.12524' target='_blank'>arXiv</a> | <a href='https://github.com/sunsmarterjie/yolov12' target='_blank'>github</a>
        </h3>
        """)
    with gr.Row():
        with gr.Column():
            app()

def start_gradio():
    gradio_app.launch(server_name="0.0.0.0", server_port=7860)

# FastAPI部分
app_fastapi = FastAPI()

class TrainParams(BaseModel):
    """
    用于接收/yolov12/train接口的训练参数，所有参数均需前端传入。
    """
    data: str           # 数据集配置文件路径
    epochs: int         # 训练轮数
    batch: int          # 批次大小
    imgsz: int          # 输入图片尺寸
    scale: float        # 随机缩放增强比例
    mosaic: float       # mosaic数据增强概率
    mixup: float        # mixup数据增强概率
    copy_paste: float   # copy-paste数据增强概率
    device: str         # 训练设备
    project: str        # 工程名
    name: str           # 实验名
    exist_ok: bool      # 是否允许覆盖同名目录

@app_fastapi.post("/yolov12/train")
def yolov12_train(params: TrainParams):
    """
    RESTful POST接口：/yolov12/train
    接收训练参数，调用YOLO模型训练，并返回训练结果。
    返回格式：{"code": 0/1, "msg": "success/错误原因", "result": 训练结果或None}
    """
    logging.info("收到/yolov12/train训练请求")
    logging.info(f"请求参数: {params}")
    try:
        model = YOLO("yolov12.yaml")  # 如有yolov12n.yaml可替换
        logging.info("开始模型训练...")
        results = model.train(
            data=params.data,
            epochs=params.epochs,
            batch=params.batch,
            imgsz=params.imgsz,
            scale=params.scale,
            mosaic=params.mosaic,
            mixup=params.mixup,
            copy_paste=params.copy_paste,
            device=params.device,
            project=params.project,
            name=params.name,
            exist_ok=params.exist_ok,
        )
        logging.info("模型训练完成")
        logging.info(f"训练结果: save_dir={results.save_dir}, metrics={results.metrics}, epoch={results.epoch}, best_fitness={getattr(results, 'best_fitness', None)}")
        return {
            "code": 0,
            "msg": "success",
            "result": {
                "save_dir": str(results.save_dir),
                "metrics": str(results.metrics),
                "epoch": results.epoch,
                "best_fitness": getattr(results, "best_fitness", None)
            }
        }
    except Exception as e:
        logging.error(f"训练过程发生异常: {e}")
        return {
            "code": 1,
            "msg": str(e),
            "result": None
        }

if __name__ == "__main__":
    threading.Thread(target=start_gradio, daemon=True).start()
    uvicorn.run(app_fastapi, host="0.0.0.0", port=8000)