本地processor

2025-12-26 05:16:21 +08:00 · 2025-03-17 12:55:16 +03:00 · 2025-03-17 12:55:16 +03:00 · 9c5ede79b1
commit 9c5ede79b1
parent b778dcc87d
8 changed files with 327103 additions and 23 deletions
--- a/gradio_ui/agent/vision_agent.py
+++ b/gradio_ui/agent/vision_agent.py
@ -33,35 +33,19 @@ class VisionAgent:
        
        # load the image caption model and processor
        self.caption_processor = AutoProcessor.from_pretrained(
-            "microsoft/Florence-2-base", 
+            "processor", 
            trust_remote_code=True
        )
        
-        # load the model according to the device type
        try:
-            if self.device.type == 'cuda':
-                # CUDA device uses float16
-                self.caption_model = AutoModelForCausalLM.from_pretrained(
-                    caption_model_path, 
-                    torch_dtype=torch.float16,
-                    trust_remote_code=True
-                ).to(self.device)
-            elif self.device.type == 'mps':
-                # MPS device uses float32 (MPS has limited support for float16)
-                self.caption_model = AutoModelForCausalLM.from_pretrained(
-                    caption_model_path, 
-                    torch_dtype=torch.float32,
-                    trust_remote_code=True
-                ).to(self.device)
-            else:
-                # CPU uses float32
-                self.caption_model = AutoModelForCausalLM.from_pretrained(
-                    caption_model_path, 
-                    torch_dtype=torch.float32,
-                    trust_remote_code=True
-                ).to(self.device)
+            self.caption_model = AutoModelForCausalLM.from_pretrained(
+                caption_model_path, 
+                torch_dtype=torch.float32,
+                trust_remote_code=True
+            ).to(self.device)
            
        except Exception as e:
+            print(f"Model loading failed for path: {caption_model_path}")
            raise e
        self.prompt = "<CAPTION>"
        
--- a/processor/added_tokens.json
+++ b/processor/added_tokens.json
--- a/processor/merges.txt
+++ b/processor/merges.txt
--- a/processor/preprocessor_config.json
+++ b/processor/preprocessor_config.json
@ -0,0 +1,33 @@
+{
+  "auto_map": {
+    "AutoProcessor": "microsoft/Florence-2-base--processing_florence2.Florence2Processor"
+  },
+  "crop_size": {
+    "height": 768,
+    "width": 768
+  },
+  "do_center_crop": false,
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_processor_type": "CLIPImageProcessor",
+  "image_seq_length": 577,
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "processor_class": "Florence2Processor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 768,
+    "width": 768
+  }
+}
--- a/processor/special_tokens_map.json
+++ b/processor/special_tokens_map.json
--- a/processor/tokenizer.json
+++ b/processor/tokenizer.json
--- a/processor/tokenizer_config.json
+++ b/processor/tokenizer_config.json
--- a/processor/vocab.json
+++ b/processor/vocab.json