diff --git a/gradio_ui/agent/vision_agent.py b/gradio_ui/agent/vision_agent.py
index 416c007..89b9e53 100644
--- a/gradio_ui/agent/vision_agent.py
+++ b/gradio_ui/agent/vision_agent.py
@@ -1,3 +1,4 @@
+import os
from typing import List, Optional
import cv2
import torch
@@ -18,7 +19,7 @@ class UIElement(BaseModel):
text: Optional[str] = None
class VisionAgent:
- def __init__(self, yolo_model_path: str, caption_model_path: str, florence_model_path: str):
+ def __init__(self, yolo_model_path: str, caption_model_path: str):
"""
Initialize the vision agent
@@ -33,36 +34,19 @@ class VisionAgent:
# load the image caption model and processor
self.caption_processor = AutoProcessor.from_pretrained(
- "microsoft/Florence-2-base-ft",
- cache_dir=florence_model_path,
- trust_remote_code=True,
+ "processor",
+ trust_remote_code=True
)
- # load the model according to the device type
try:
- if self.device.type == 'cuda':
- # CUDA device uses float16
- self.caption_model = AutoModelForCausalLM.from_pretrained(
- caption_model_path,
- torch_dtype=torch.float16,
- trust_remote_code=True
- ).to(self.device)
- elif self.device.type == 'mps':
- # MPS device uses float32 (MPS has limited support for float16)
- self.caption_model = AutoModelForCausalLM.from_pretrained(
- caption_model_path,
- torch_dtype=torch.float32,
- trust_remote_code=True
- ).to(self.device)
- else:
- # CPU uses float32
- self.caption_model = AutoModelForCausalLM.from_pretrained(
- caption_model_path,
- torch_dtype=torch.float32,
- trust_remote_code=True
- ).to(self.device)
+ self.caption_model = AutoModelForCausalLM.from_pretrained(
+ caption_model_path,
+ torch_dtype=self.dtype,
+ trust_remote_code=True
+ ).to(self.device)
except Exception as e:
+ print(f"Model loading failed for path: {caption_model_path}")
raise e
self.prompt = "
"
diff --git a/gradio_ui/app.py b/gradio_ui/app.py
index 35fdb33..1779b89 100644
--- a/gradio_ui/app.py
+++ b/gradio_ui/app.py
@@ -14,7 +14,7 @@ from gradio_ui.loop import (
import base64
from xbrain.utils.config import Config
-from util.download_weights import FLORENCE_MODEL_DIR, OMNI_PARSER_MODEL_DIR
+from util.download_weights import OMNI_PARSER_MODEL_DIR
CONFIG_DIR = Path("~/.anthropic").expanduser()
API_KEY_FILE = CONFIG_DIR / "api_key"
@@ -318,9 +318,7 @@ def run():
api_key.change(fn=update_api_key, inputs=[api_key, state], outputs=None)
chatbot.clear(fn=clear_chat, inputs=[state], outputs=[chatbot])
vision_agent = VisionAgent(yolo_model_path=os.path.join(OMNI_PARSER_MODEL_DIR, "icon_detect", "model.pt"),
- caption_model_path=os.path.join(OMNI_PARSER_MODEL_DIR, "icon_caption"),
- florence_model_path=os.path.join(FLORENCE_MODEL_DIR)
- )
+ caption_model_path=os.path.join(OMNI_PARSER_MODEL_DIR, "icon_caption"))
vision_agent_state = gr.State({"agent": vision_agent})
submit_button.click(process_input, [chat_input, state, vision_agent_state], [chatbot, task_list])
stop_button.click(stop_app, [state], None)
diff --git a/main.py b/main.py
index c60f1be..7f7a318 100644
--- a/main.py
+++ b/main.py
@@ -1,12 +1,19 @@
+# import os
+# os.environ["HF_ENDPOINT"] = "https://hf-mirror.com/"
+
from gradio_ui import app
from util import download_weights
+
import torch
+
def run():
if not torch.cuda.is_available():
print("Warning: GPU is not available, we will use CPU, the application may run slower!\nyou computer will very likely heat up!")
print("Downloading the weight files...")
# download the weight files
- download_weights.download_models()
+ # download_weights.download_models()
+ # 配置 HuggingFace 镜像
+ # print("HuggingFace mirror configured to use ModelScope registry")
app.run()
diff --git a/processor/added_tokens.json b/processor/added_tokens.json
new file mode 100644
index 0000000..bb62c40
--- /dev/null
+++ b/processor/added_tokens.json
@@ -0,0 +1,1026 @@
+{
+ "": 51270,
+ "": 51274,
+ "": 51276,
+ "": 51272,
+ "": 50268,
+ "": 50266,
+ "": 51287,
+ "": 51285,
+ "": 51281,
+ "": 51283,
+ "": 51278,
+ "": 51288,
+ "": 51269,
+ "": 51273,
+ "": 51275,
+ "": 50269,
+ "": 50369,
+ "": 50370,
+ "": 50371,
+ "": 50372,
+ "": 50373,
+ "": 50374,
+ "": 50375,
+ "": 50376,
+ "": 50377,
+ "": 50378,
+ "": 50279,
+ "": 50379,
+ "": 50380,
+ "": 50381,
+ "": 50382,
+ "": 50383,
+ "": 50384,
+ "": 50385,
+ "": 50386,
+ "": 50387,
+ "": 50388,
+ "": 50280,
+ "": 50389,
+ "": 50390,
+ "": 50391,
+ "": 50392,
+ "": 50393,
+ "": 50394,
+ "": 50395,
+ "": 50396,
+ "": 50397,
+ "": 50398,
+ "": 50281,
+ "": 50399,
+ "": 50400,
+ "": 50401,
+ "": 50402,
+ "": 50403,
+ "": 50404,
+ "": 50405,
+ "": 50406,
+ "": 50407,
+ "": 50408,
+ "": 50282,
+ "": 50409,
+ "": 50410,
+ "": 50411,
+ "": 50412,
+ "": 50413,
+ "": 50414,
+ "": 50415,
+ "": 50416,
+ "": 50417,
+ "": 50418,
+ "": 50283,
+ "": 50419,
+ "": 50420,
+ "": 50421,
+ "": 50422,
+ "": 50423,
+ "": 50424,
+ "": 50425,
+ "": 50426,
+ "": 50427,
+ "": 50428,
+ "": 50284,
+ "": 50429,
+ "": 50430,
+ "": 50431,
+ "": 50432,
+ "": 50433,
+ "": 50434,
+ "": 50435,
+ "": 50436,
+ "": 50437,
+ "": 50438,
+ "": 50285,
+ "": 50439,
+ "": 50440,
+ "": 50441,
+ "": 50442,
+ "": 50443,
+ "": 50444,
+ "": 50445,
+ "": 50446,
+ "": 50447,
+ "": 50448,
+ "": 50286,
+ "": 50449,
+ "": 50450,
+ "": 50451,
+ "": 50452,
+ "": 50453,
+ "": 50454,
+ "": 50455,
+ "": 50456,
+ "": 50457,
+ "": 50458,
+ "": 50287,
+ "": 50459,
+ "": 50460,
+ "": 50461,
+ "": 50462,
+ "": 50463,
+ "": 50464,
+ "": 50465,
+ "": 50466,
+ "": 50467,
+ "": 50468,
+ "": 50288,
+ "": 50270,
+ "": 50469,
+ "": 50470,
+ "": 50471,
+ "": 50472,
+ "": 50473,
+ "": 50474,
+ "": 50475,
+ "": 50476,
+ "": 50477,
+ "": 50478,
+ "": 50289,
+ "": 50479,
+ "": 50480,
+ "": 50481,
+ "": 50482,
+ "": 50483,
+ "": 50484,
+ "": 50485,
+ "": 50486,
+ "": 50487,
+ "": 50488,
+ "": 50290,
+ "": 50489,
+ "": 50490,
+ "": 50491,
+ "": 50492,
+ "": 50493,
+ "": 50494,
+ "": 50495,
+ "": 50496,
+ "": 50497,
+ "": 50498,
+ "": 50291,
+ "": 50499,
+ "": 50500,
+ "": 50501,
+ "": 50502,
+ "": 50503,
+ "": 50504,
+ "": 50505,
+ "": 50506,
+ "": 50507,
+ "": 50508,
+ "": 50292,
+ "": 50509,
+ "": 50510,
+ "": 50511,
+ "": 50512,
+ "": 50513,
+ "": 50514,
+ "": 50515,
+ "": 50516,
+ "": 50517,
+ "": 50518,
+ "": 50293,
+ "": 50519,
+ "": 50520,
+ "": 50521,
+ "": 50522,
+ "": 50523,
+ "": 50524,
+ "": 50525,
+ "": 50526,
+ "": 50527,
+ "": 50528,
+ "": 50294,
+ "": 50529,
+ "": 50530,
+ "": 50531,
+ "": 50532,
+ "": 50533,
+ "": 50534,
+ "": 50535,
+ "": 50536,
+ "": 50537,
+ "": 50538,
+ "": 50295,
+ "": 50539,
+ "": 50540,
+ "": 50541,
+ "": 50542,
+ "": 50543,
+ "": 50544,
+ "": 50545,
+ "": 50546,
+ "": 50547,
+ "": 50548,
+ "": 50296,
+ "": 50549,
+ "": 50550,
+ "": 50551,
+ "": 50552,
+ "": 50553,
+ "": 50554,
+ "": 50555,
+ "": 50556,
+ "": 50557,
+ "": 50558,
+ "": 50297,
+ "": 50559,
+ "": 50560,
+ "": 50561,
+ "": 50562,
+ "": 50563,
+ "": 50564,
+ "": 50565,
+ "": 50566,
+ "": 50567,
+ "": 50568,
+ "": 50298,
+ "": 50271,
+ "": 50569,
+ "": 50570,
+ "": 50571,
+ "": 50572,
+ "": 50573,
+ "": 50574,
+ "": 50575,
+ "": 50576,
+ "": 50577,
+ "": 50578,
+ "": 50299,
+ "": 50579,
+ "": 50580,
+ "": 50581,
+ "": 50582,
+ "": 50583,
+ "": 50584,
+ "": 50585,
+ "": 50586,
+ "": 50587,
+ "": 50588,
+ "": 50300,
+ "": 50589,
+ "": 50590,
+ "": 50591,
+ "