From 0a9693c5cc1abe8680f76654130e173872d19a1c Mon Sep 17 00:00:00 2001
From: yuruo <yuruotong1@163.com>
Date: Tue, 18 Mar 2025 09:24:37 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0vision=5Fagent?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 gradio_ui/agent/vision_agent.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)
diff --git a/gradio_ui/agent/vision_agent.py b/gradio_ui/agent/vision_agent.py
index c9e9585..b0a2376 100644
--- a/gradio_ui/agent/vision_agent.py
+++ b/gradio_ui/agent/vision_agent.py
@@ -10,6 +10,8 @@ import time
 from pydantic import BaseModel
 import base64
 from PIL import Image
+from transformers import AutoConfig
+import os
 
 class UIElement(BaseModel):
     element_id: int
@@ -33,20 +35,32 @@ class VisionAgent:
         
         # load the image caption model and processor
         self.caption_processor = AutoProcessor.from_pretrained(
-            "weights/AI-ModelScope/Florence-2-base", 
+            "weights/AI-ModelScope/Florence-2-base-ft", 
+            trust_remote_code=True,
+            local_files_only=True
+        )
+        config = AutoConfig.from_pretrained(
+            "weights/AI-ModelScope/Florence-2-base-ft",  # 指向包含 configuration_florence2.py 的目录
             trust_remote_code=True,
             local_files_only=True
         )
         
         try:
+            # 修改：加载模型和权重都从 florence 目录
+            florence_base_path = "weights/AI-ModelScope/Florence-2-base-ft"
+            
+            # 直接从 florence 目录完整加载模型（包括权重）
             self.caption_model = AutoModelForCausalLM.from_pretrained(
-                caption_model_path, 
+                florence_base_path,  # 这里使用包含代码和权重的完整目录
                 torch_dtype=self.dtype,
-                trust_remote_code=True
+                trust_remote_code=True,
+                local_files_only=True
             ).to(self.device)
             
+            # 不需要额外加载权重，因为权重已经包含在 florence_base_path 中
+            
         except Exception as e:
-            print(f"Model loading failed for path: {caption_model_path}")
+            print(f"Model loading failed: {e}")
             raise e
         self.prompt = "<CAPTION>"