diff --git a/omniserver.py b/omniserver.py deleted file mode 100644 index f4cb916..0000000 --- a/omniserver.py +++ /dev/null @@ -1,59 +0,0 @@ -''' -python -m omniparserserver --som_model_path ../../weights/icon_detect/model.pt --caption_model_name florence2 --caption_model_path ../../weights/icon_caption_florence --device cuda --BOX_TRESHOLD 0.05 -''' - -import sys -import os -import time -from fastapi import FastAPI -from pydantic import BaseModel -import argparse -import uvicorn -root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -sys.path.append(root_dir) -# from util.omniparser import Omniparser -from gradio_ui.agent.vision_agent import VisionAgent - -def parse_arguments(): - parser = argparse.ArgumentParser(description='Omniparser API') - parser.add_argument('--som_model_path', type=str, default='./weights/icon_detect/model.pt', help='Path to the som model') - parser.add_argument('--caption_model_name', type=str, default='florence2', help='Name of the caption model') - parser.add_argument('--caption_model_path', type=str, default='./weights/icon_caption', help='Path to the caption model') - parser.add_argument('--device', type=str, default='cpu', help='Device to run the model') - parser.add_argument('--BOX_TRESHOLD', type=float, default=0.05, help='Threshold for box detection') - parser.add_argument('--host', type=str, default='0.0.0.0', help='Host for the API') - parser.add_argument('--port', type=int, default=8000, help='Port for the API') - args = parser.parse_args() - return args - -args = parse_arguments() -config = vars(args) - -app = FastAPI() -# omniparser = Omniparser(config) -yolo_model_path = config['som_model_path'] -caption_model_path = config['caption_model_path'] -vision_agent = VisionAgent(yolo_model_path=yolo_model_path, caption_model_path=caption_model_path) -class ParseRequest(BaseModel): - base64_image: str - -@app.post("/parse/") -async def parse(parse_request: ParseRequest): - print('start parsing...') - start = time.time() - # dino_labled_img, parsed_content_list = omniparser.parse(parse_request.base64_image) - parsed_content_list = vision_agent(parse_request.base64_image) - - latency = time.time() - start - print('time:', latency) - return {"parsed_content_list": parsed_content_list, 'latency': latency} - -@app.get("/probe/") -async def root(): - return {"message": "Omniparser API ready"} - - - - -if __name__ == "__main__": - uvicorn.run("omniserver:app", host=args.host, port=args.port, reload=True) \ No newline at end of file