本地processor

2026-03-22 13:07:17 +08:00 · 2025-03-17 12:55:16 +03:00
parent b778dcc87d
commit 9c5ede79b1
8 changed files with 327103 additions and 23 deletions
--- a/gradio_ui/agent/vision_agent.py
+++ b/gradio_ui/agent/vision_agent.py
@@ -33,35 +33,19 @@ class VisionAgent:
        # load the image caption model and processor
        self.caption_processor = AutoProcessor.from_pretrained(
-            "microsoft/Florence-2-base", 
+            "processor", 
            trust_remote_code=True
        )
        # load the model according to the device type
        try:
-            if self.device.type == 'cuda':
+            self.caption_model = AutoModelForCausalLM.from_pretrained(
-                # CUDA device uses float16
+                caption_model_path, 
-                self.caption_model = AutoModelForCausalLM.from_pretrained(
+                torch_dtype=torch.float32,
-                    caption_model_path, 
+                trust_remote_code=True
-                    torch_dtype=torch.float16,
+            ).to(self.device)
                    trust_remote_code=True
                ).to(self.device)
            elif self.device.type == 'mps':
                # MPS device uses float32 (MPS has limited support for float16)
                self.caption_model = AutoModelForCausalLM.from_pretrained(
                    caption_model_path, 
                    torch_dtype=torch.float32,
                    trust_remote_code=True
                ).to(self.device)
            else:
                # CPU uses float32
                self.caption_model = AutoModelForCausalLM.from_pretrained(
                    caption_model_path, 
                    torch_dtype=torch.float32,
                    trust_remote_code=True
                ).to(self.device)
        except Exception as e:
            print(f"Model loading failed for path: {caption_model_path}")
            raise e
        self.prompt = "<CAPTION>"
--- a/processor/added_tokens.json
+++ b/processor/added_tokens.json
--- a/processor/merges.txt
+++ b/processor/merges.txt
--- a/processor/preprocessor_config.json
+++ b/processor/preprocessor_config.json
@@ -0,0 +1,33 @@
 {
  "auto_map": {
    "AutoProcessor": "microsoft/Florence-2-base--processing_florence2.Florence2Processor"
  },
  "crop_size": {
    "height": 768,
    "width": 768
  },
  "do_center_crop": false,
  "do_convert_rgb": null,
  "do_normalize": true,
  "do_rescale": true,
  "do_resize": true,
  "image_mean": [
    0.485,
    0.456,
    0.406
  ],
  "image_processor_type": "CLIPImageProcessor",
  "image_seq_length": 577,
  "image_std": [
    0.229,
    0.224,
    0.225
  ],
  "processor_class": "Florence2Processor",
  "resample": 3,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "height": 768,
    "width": 768
  }
 }
--- a/processor/special_tokens_map.json
+++ b/processor/special_tokens_map.json
--- a/processor/tokenizer.json
+++ b/processor/tokenizer.json
--- a/processor/tokenizer_config.json
+++ b/processor/tokenizer_config.json
--- a/processor/vocab.json
+++ b/processor/vocab.json