Added local ollama models (#2433)

* added local ollama models * add ollama_base_url config * Update listen.py * add docs * Update opendevin/server/listen.py Co-authored-by: Graham Neubig <neubig@gmail.com> * lint --------- Co-authored-by: Graham Neubig <neubig@gmail.com>
2025-12-26 05:48:36 +08:00 · 2024-07-04 21:26:26 +05:30 · 2024-07-04 21:26:26 +05:30 · 688bd2a8fc
commit 688bd2a8fc
parent 6853cbb4f6
3 changed files with 19 additions and 1 deletions
--- a/docs/modules/usage/llms/localLLMs.md
+++ b/docs/modules/usage/llms/localLLMs.md
@ -35,8 +35,11 @@ But when running `docker run`, you'll need to add a few more arguments:
 --add-host host.docker.internal:host-gateway \
 -e LLM_API_KEY="ollama" \
 -e LLM_BASE_URL="http://host.docker.internal:11434" \
+-e LLM_OLLAMA_BASE_URL="http://host.docker.internal:11434" \
 ```

+LLM_OLLAMA_BASE_URL is optional. If you set it, it will be used to show the available installed models in the UI. 
+
 For example:

 ```bash
--- a/opendevin/core/config.py
+++ b/opendevin/core/config.py
@ -47,6 +47,7 @@ class LLMConfig(metaclass=Singleton):
        max_output_tokens: The maximum number of output tokens. This is sent to the LLM.
        input_cost_per_token: The cost per input token. This will available in logs for the user to check.
        output_cost_per_token: The cost per output token. This will available in logs for the user to check.
+        ollama_base_url: The base URL for the OLLAMA API.
    """

    model: str = 'gpt-4o'
@ -71,6 +72,7 @@ class LLMConfig(metaclass=Singleton):
    max_output_tokens: int | None = None
    input_cost_per_token: float | None = None
    output_cost_per_token: float | None = None
+    ollama_base_url: str | None = None

    def defaults_to_dict(self) -> dict:
        """
--- a/opendevin/server/listen.py
+++ b/opendevin/server/listen.py
@ -3,6 +3,7 @@ import re
 import uuid
 import warnings

+import requests
 from pathspec import PathSpec
 from pathspec.patterns import GitWildMatchPattern

@ -190,7 +191,7 @@ async def attach_session(request: Request, call_next):
 async def websocket_endpoint(websocket: WebSocket):
    """
    WebSocket endpoint for receiving events from the client (i.e., the browser).
-    Once connected, you can send various actions:
+    Once connected, the client can send various actions:
    - Initialize the agent:
    session management, and event streaming.
        ```json
@ -311,6 +312,18 @@ async def get_litellm_models():
    )
    bedrock_model_list = bedrock.list_foundation_models()
    model_list = litellm_model_list_without_bedrock + bedrock_model_list
+    ollama_base_url = config.llm.ollama_base_url
+    if config.llm.model.startswith('ollama'):
+        if not ollama_base_url:
+            ollama_base_url = config.llm.base_url
+    if ollama_base_url:
+        ollama_url = ollama_base_url.strip('/') + '/api/tags'
+        try:
+            ollama_models_list = requests.get(ollama_url, timeout=3).json()['models']
+            for model in ollama_models_list:
+                model_list.append('ollama/' + model['name'])
+        except requests.exceptions.RequestException as e:
+            logger.error(f'Error getting OLLAMA models: {e}', exc_info=True)

    return list(sorted(set(model_list)))