app.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. # --------------------------------------------------------
  2. # Based on yolov10
  3. # https://github.com/THU-MIG/yolov10/app.py
  4. # --------------------------------------------------------'
  5. import logging
  6. import tempfile
  7. import threading
  8. import cv2
  9. import gradio as gr
  10. import uvicorn
  11. from fastapi import FastAPI
  12. from fastapi import status
  13. from fastapi.exceptions import RequestValidationError
  14. from fastapi.responses import JSONResponse
  15. from pydantic import BaseModel
  16. from ultralytics import YOLO
  17. import os
  18. import glob
  19. from typing import Optional
  20. # 设置日志格式和级别
  21. logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s - %(message)s')
  22. def yolov12_inference(image, video, model_id, image_size, conf_threshold):
  23. model = YOLO(model_id)
  24. if image:
  25. results = model.predict(source=image, imgsz=image_size, conf=conf_threshold)
  26. annotated_image = results[0].plot()
  27. return annotated_image[:, :, ::-1], None
  28. else:
  29. video_path = tempfile.mktemp(suffix=".webm")
  30. with open(video_path, "wb") as f:
  31. with open(video, "rb") as g:
  32. f.write(g.read())
  33. cap = cv2.VideoCapture(video_path)
  34. fps = cap.get(cv2.CAP_PROP_FPS)
  35. frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  36. frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  37. output_video_path = tempfile.mktemp(suffix=".webm")
  38. out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'vp80'), fps, (frame_width, frame_height))
  39. while cap.isOpened():
  40. ret, frame = cap.read()
  41. if not ret:
  42. break
  43. results = model.predict(source=frame, imgsz=image_size, conf=conf_threshold)
  44. annotated_frame = results[0].plot()
  45. out.write(annotated_frame)
  46. cap.release()
  47. out.release()
  48. return None, output_video_path
  49. def yolov12_inference_for_examples(image, model_path, image_size, conf_threshold):
  50. annotated_image, _ = yolov12_inference(image, None, model_path, image_size, conf_threshold)
  51. return annotated_image
  52. def app():
  53. with gr.Blocks():
  54. with gr.Row():
  55. with gr.Column():
  56. image = gr.Image(type="pil", label="Image", visible=True)
  57. video = gr.Video(label="Video", visible=False)
  58. input_type = gr.Radio(
  59. choices=["Image", "Video"],
  60. value="Image",
  61. label="Input Type",
  62. )
  63. model_id = gr.Dropdown(
  64. label="Model",
  65. choices=[
  66. "yolov12n.pt",
  67. "yolov12s.pt",
  68. "yolov12m.pt",
  69. "yolov12l.pt",
  70. "yolov12x.pt",
  71. ],
  72. value="yolov12m.pt",
  73. )
  74. image_size = gr.Slider(
  75. label="Image Size",
  76. minimum=320,
  77. maximum=1280,
  78. step=32,
  79. value=640,
  80. )
  81. conf_threshold = gr.Slider(
  82. label="Confidence Threshold",
  83. minimum=0.0,
  84. maximum=1.0,
  85. step=0.05,
  86. value=0.25,
  87. )
  88. yolov12_infer = gr.Button(value="Detect Objects")
  89. with gr.Column():
  90. output_image = gr.Image(type="numpy", label="Annotated Image", visible=True)
  91. output_video = gr.Video(label="Annotated Video", visible=False)
  92. def update_visibility(input_type):
  93. image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False)
  94. video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True)
  95. output_image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False)
  96. output_video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True)
  97. return image, video, output_image, output_video
  98. input_type.change(
  99. fn=update_visibility,
  100. inputs=[input_type],
  101. outputs=[image, video, output_image, output_video],
  102. )
  103. def run_inference(image, video, model_id, image_size, conf_threshold, input_type):
  104. if input_type == "Image":
  105. return yolov12_inference(image, None, model_id, image_size, conf_threshold)
  106. else:
  107. return yolov12_inference(None, video, model_id, image_size, conf_threshold)
  108. yolov12_infer.click(
  109. fn=run_inference,
  110. inputs=[image, video, model_id, image_size, conf_threshold, input_type],
  111. outputs=[output_image, output_video],
  112. )
  113. gr.Examples(
  114. examples=[
  115. [
  116. "ultralytics/assets/bus.jpg",
  117. "yolov12s.pt",
  118. 640,
  119. 0.25,
  120. ],
  121. [
  122. "ultralytics/assets/zidane.jpg",
  123. "yolov12x.pt",
  124. 640,
  125. 0.25,
  126. ],
  127. ],
  128. fn=yolov12_inference_for_examples,
  129. inputs=[
  130. image,
  131. model_id,
  132. image_size,
  133. conf_threshold,
  134. ],
  135. outputs=[output_image],
  136. cache_examples='lazy',
  137. )
  138. gradio_app = gr.Blocks()
  139. with gradio_app:
  140. gr.HTML(
  141. """
  142. <h1 style='text-align: center'>
  143. YOLOv12: Attention-Centric Real-Time Object Detectors
  144. </h1>
  145. """)
  146. gr.HTML(
  147. """
  148. <h3 style='text-align: center'>
  149. <a href='https://arxiv.org/abs/2502.12524' target='_blank'>arXiv</a> | <a href='https://github.com/sunsmarterjie/yolov12' target='_blank'>github</a>
  150. </h3>
  151. """)
  152. with gr.Row():
  153. with gr.Column():
  154. app()
  155. def start_gradio():
  156. gradio_app.launch(server_name="0.0.0.0", server_port=7860)
  157. # FastAPI部分
  158. app_fastapi = FastAPI()
  159. class TrainParams(BaseModel):
  160. """
  161. 用于接收/yolov12/train接口的训练参数,所有参数均需前端传入。
  162. """
  163. model: str # 训练底模
  164. data: str # 数据集配置文件路径
  165. epochs: int # 训练轮数
  166. batch: int # 批次大小
  167. imgsz: int # 输入图片尺寸
  168. scale: float # 随机缩放增强比例
  169. mosaic: float # mosaic数据增强概率
  170. mixup: float # mixup数据增强概率
  171. copy_paste: float # copy-paste数据增强概率
  172. device: str # 训练设备
  173. project: str # 工程名
  174. name: str # 实验名
  175. exist_ok: bool # 是否允许覆盖同名目录
  176. @app_fastapi.post("/yolov12/train")
  177. def yolov12_train(params: TrainParams):
  178. """
  179. RESTful POST接口:/yolov12/train
  180. 接收训练参数,调用YOLO模型训练,并返回训练结果。
  181. 返回格式:{"code": 0/1, "msg": "success/错误原因", "result": 训练结果或None}
  182. """
  183. logging.info("收到/yolov12/train训练请求")
  184. logging.info(f"请求参数: {params}")
  185. try:
  186. # 根据params.model动态确定配置文件
  187. if params.model.endswith('.pt'):
  188. # 如果是.pt文件,将后缀替换为.yaml
  189. config_file = params.model.replace('.pt', '.yaml')
  190. else:
  191. # 如果不是.pt文件,使用默认配置
  192. config_file = "yolov12.yaml"
  193. model = YOLO(config_file)
  194. model.load(params.model)
  195. logging.info("开始模型训练...")
  196. results = model.train(
  197. data=params.data,
  198. epochs=params.epochs,
  199. batch=params.batch,
  200. imgsz=params.imgsz,
  201. scale=params.scale,
  202. mosaic=params.mosaic,
  203. mixup=params.mixup,
  204. copy_paste=params.copy_paste,
  205. device=params.device,
  206. project=params.project,
  207. name=params.name,
  208. exist_ok=params.exist_ok,
  209. )
  210. logging.info("模型训练完成")
  211. # logging.info(f"训练结果: {str(results)}")
  212. return {
  213. "code": 0,
  214. "msg": "success",
  215. "result": str(results.save_dir)
  216. }
  217. except Exception as e:
  218. logging.error(f"训练过程发生异常: {e}")
  219. return {
  220. "code": 1,
  221. "msg": str(e),
  222. "result": None
  223. }
  224. class PredictParams(BaseModel):
  225. """
  226. 用于接收/yolov12/predict接口的预测参数,与YOLO predict方法保持一致。
  227. """
  228. model: str = "yolov12m.pt" # 模型路径
  229. source: str = None # 输入源(图片/视频路径、URL等)
  230. stream: bool = False # 是否流式处理
  231. conf: float = 0.25 # 置信度阈值
  232. iou: float = 0.7 # IoU阈值
  233. max_det: int = 300 # 最大检测数量
  234. imgsz: int = 640 # 输入图片尺寸
  235. batch: int = 1 # 批次大小
  236. device: str = "" # 设备
  237. show: bool = False # 是否显示结果
  238. save: bool = False # 是否保存结果
  239. save_txt: bool = False # 是否保存txt文件
  240. save_conf: bool = False # 是否保存置信度
  241. save_crop: bool = False # 是否保存裁剪图片
  242. show_labels: bool = True # 是否显示标签
  243. show_conf: bool = True # 是否显示置信度
  244. show_boxes: bool = True # 是否显示边界框
  245. line_width: int = None # 线条宽度
  246. vid_stride: int = 1 # 视频帧步长
  247. stream_buffer: bool = False # 流缓冲区
  248. visualize: bool = False # 可视化特征
  249. augment: bool = False # 数据增强
  250. agnostic_nms: bool = False # 类别无关NMS
  251. classes: list = None # 指定类别
  252. retina_masks: bool = False # 高分辨率分割掩码
  253. embed: list = None # 特征向量层
  254. half: bool = False # 半精度
  255. dnn: bool = False # OpenCV DNN
  256. project: str = "" # 项目名
  257. name: str = "" # 实验名
  258. exist_ok: bool = False # 是否覆盖现有目录
  259. verbose: bool = True # 详细输出
  260. @app_fastapi.post("/yolov12/predict")
  261. def yolov12_predict(params: PredictParams):
  262. """
  263. RESTful POST接口:/yolov12/predict
  264. 接收预测参数,调用YOLO模型进行预测,并返回预测结果。
  265. 返回格式:{"code": 0/1, "msg": "success/错误原因", "result": {"save_dir": "保存目录", "filename": "文件名"}}
  266. """
  267. logging.info("收到/yolov12/predict预测请求")
  268. logging.info(f"请求参数: {params}")
  269. try:
  270. model = YOLO(params.model)
  271. logging.info("开始模型预测...")
  272. # 构建预测参数
  273. predict_kwargs = {}
  274. for field, value in params.dict().items():
  275. if field not in ['model'] and value is not None:
  276. predict_kwargs[field] = value
  277. # 确保保存结果,并强制使用MP4格式
  278. predict_kwargs['save'] = True
  279. results = model.predict(**predict_kwargs)
  280. logging.info("模型预测完成")
  281. # 获取保存目录和最终文件名
  282. result = results[0]
  283. save_dir = result.save_dir if hasattr(result, 'save_dir') else None
  284. # 获取最终生成的文件名
  285. final_filename = None
  286. if save_dir:
  287. # 获取输入文件名(不含扩展名)
  288. source = params.source
  289. base_name = None
  290. if source:
  291. base_name = os.path.splitext(os.path.basename(source))[0]
  292. # 支持的扩展名
  293. exts = ['*.jpg', '*.jpeg', '*.png', '*.mp4', '*.webm', '*.avi', '*.mov']
  294. matched_files = []
  295. for ext in exts:
  296. matched_files.extend(glob.glob(os.path.join(save_dir, ext)))
  297. # 按时间排序,查找与输入文件同名的第一个文件
  298. if base_name and matched_files:
  299. matched_files = sorted(matched_files, key=os.path.getmtime)
  300. for f in matched_files:
  301. if os.path.splitext(os.path.basename(f))[0] == base_name:
  302. final_filename = os.path.basename(f)
  303. logging.info(f"按输入文件名查找,返回文件: {final_filename}")
  304. break
  305. # 如果没找到同名文件,返回最新文件
  306. if not final_filename and matched_files:
  307. latest_file = max(matched_files, key=os.path.getmtime)
  308. final_filename = os.path.basename(latest_file)
  309. logging.info(f"未找到同名,返回最新文件: {final_filename}")
  310. return {
  311. "code": 0,
  312. "msg": "success",
  313. "result": save_dir+"/"+final_filename
  314. }
  315. except Exception as e:
  316. logging.error(f"预测过程发生异常: {e}")
  317. return {
  318. "code": 1,
  319. "msg": str(e),
  320. "result": None
  321. }
  322. class StreamParams(BaseModel):
  323. """
  324. 用于接收 /yolov12/stream 接口的参数。
  325. model: 推理模型路径
  326. source: 拉流地址(如rtsp/http视频流)
  327. stream_url: 推流地址(如rtmp推流地址)
  328. 其他参数同 predict
  329. """
  330. model: str = "yolov12m.pt"
  331. source: str = None
  332. stream_url: str = None
  333. conf: float = 0.25
  334. iou: Optional[float] = 0.7
  335. imgsz: int = 640
  336. device: str = ""
  337. # 可根据需要补充更多参数
  338. @app_fastapi.post("/yolov12/stream")
  339. def yolov12_stream(params: StreamParams):
  340. """
  341. RESTful POST接口:/yolov12/stream
  342. 接收视频拉流地址和推流地址,调用YOLO模型推理,将推理后的视频推送到推流地址。
  343. 返回格式:{"code": 0/1, "msg": "success/错误原因", "result": None}
  344. """
  345. import cv2
  346. import logging
  347. logging.info("收到/yolov12/stream请求")
  348. logging.info(f"请求参数: {params}")
  349. try:
  350. model = YOLO(params.model)
  351. cap = cv2.VideoCapture(params.source)
  352. if not cap.isOpened():
  353. return {"code": 1, "msg": f"无法打开视频流: {params.source}", "result": None}
  354. fps = cap.get(cv2.CAP_PROP_FPS)
  355. width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  356. height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  357. # 推流地址通常为rtmp/rtsp等
  358. fourcc = cv2.VideoWriter_fourcc(*'flv1') if params.stream_url.startswith('rtmp') else cv2.VideoWriter_fourcc(*'mp4v')
  359. out = cv2.VideoWriter(params.stream_url, fourcc, fps if fps > 0 else 25, (width, height))
  360. if not out.isOpened():
  361. cap.release()
  362. return {"code": 1, "msg": f"无法打开推流地址: {params.stream_url}", "result": None}
  363. frame_count = 0
  364. while cap.isOpened():
  365. ret, frame = cap.read()
  366. if not ret:
  367. break
  368. # 推理
  369. results = model.predict(source=frame, imgsz=params.imgsz, conf=params.conf, iou=params.iou, device=params.device)
  370. annotated_frame = results[0].plot()
  371. out.write(annotated_frame)
  372. frame_count += 1
  373. cap.release()
  374. out.release()
  375. logging.info(f"推理并推流完成,共处理帧数: {frame_count}")
  376. return {"code": 0, "msg": "success", "result": None}
  377. except Exception as e:
  378. logging.error(f"/yolov12/stream 发生异常: {e}")
  379. return {"code": 1, "msg": str(e), "result": None}
  380. # 全局异常处理器:参数校验失败时统一返回格式
  381. @app_fastapi.exception_handler(RequestValidationError)
  382. async def validation_exception_handler(request, exc):
  383. err_msg = f"参数校验失败: 路径={request.url.path}, 错误={exc.errors()}"
  384. logging.error(err_msg)
  385. return JSONResponse(
  386. status_code=status.HTTP_200_OK,
  387. content={
  388. "code": 422,
  389. "msg": err_msg,
  390. "result": None
  391. }
  392. )
  393. if __name__ == "__main__":
  394. threading.Thread(target=start_gradio, daemon=True).start()
  395. uvicorn.run(app_fastapi, host="0.0.0.0", port=8000)