[Runtime] Reduce dependency to speed up CI and reduce image size (#3195)

* reduce dependency for runtime * try making llama-index an optional dependency that's not installed by default * do not install llama-index in CI * do not install llama-index in the app docker as well
2025-12-26 05:48:36 +08:00 · 2024-08-01 01:55:09 +08:00 · 2024-08-01 01:55:09 +08:00 · 1d49ef253b
commit 1d49ef253b
parent 938ed027c2
9 changed files with 99 additions and 89 deletions
--- a/.github/workflows/dummy-agent-test.yml
+++ b/.github/workflows/dummy-agent-test.yml
@ -25,7 +25,7 @@ jobs:
      - name: Set up environment
        run: |
          curl -sSL https://install.python-poetry.org | python3 -
-          poetry install --without evaluation
+          poetry install --without evaluation,llama-index
          poetry run playwright install --with-deps chromium
          wget https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json -P /tmp/llama_index/models--BAAI--bge-small-en-v1.5/snapshots/5c38ec7c405ec4b44b94cc5a9bb96e735b38267a/1_Pooling/
      - name: Run tests
--- a/.github/workflows/review-pr.yml
+++ b/.github/workflows/review-pr.yml
@ -50,7 +50,7 @@ jobs:
      run: |
        curl -sSL https://install.python-poetry.org | python3 -
        export PATH="/github/home/.local/bin:$PATH"
-        poetry install --without evaluation
+        poetry install --without evaluation,llama-index
        poetry run playwright install --with-deps chromium

    - name: Run OpenDevin
--- a/.github/workflows/run-unit-tests.yml
+++ b/.github/workflows/run-unit-tests.yml
@ -70,7 +70,7 @@ jobs:
          cache: "poetry"

      - name: Install Python dependencies using Poetry
-        run: poetry install
+        run: poetry install --without evaluation,llama-index

      - name: Install & Start Docker
        if: env.INSTALL_DOCKER == '1'
@ -153,7 +153,7 @@ jobs:
          cache: "poetry"

      - name: Install Python dependencies using Poetry
-        run: poetry install --without evaluation
+        run: poetry install --without evaluation,llama-index

      - name: Build Environment
        run: make build
--- a/.github/workflows/solve-issue.yml
+++ b/.github/workflows/solve-issue.yml
@ -40,7 +40,7 @@ jobs:
      run: |
        curl -sSL https://install.python-poetry.org | python3 -
        export PATH="/github/home/.local/bin:$PATH"
-        poetry install --without evaluation
+        poetry install --without evaluation,llama-index
        poetry run playwright install --with-deps chromium


--- a/2
+++ b/2
@ -141,7 +141,7 @@ install-python-dependencies:
 		export HNSWLIB_NO_NATIVE=1; \
 		poetry run pip install chroma-hnswlib; \
 	fi
-	@poetry install
+	@poetry install --without llama-index
 	@if [ -f "/etc/manjaro-release" ]; then \
 		echo "$(BLUE)Detected Manjaro Linux. Installing Playwright dependencies...$(RESET)"; \
 		poetry run pip install playwright; \
--- a/containers/app/Dockerfile
+++ b/containers/app/Dockerfile
@ -26,7 +26,7 @@ RUN apt-get update -y \

 COPY ./pyproject.toml ./poetry.lock ./
 RUN touch README.md
-RUN export POETRY_CACHE_DIR && poetry install --without evaluation --no-root && rm -rf $POETRY_CACHE_DIR
+RUN export POETRY_CACHE_DIR && poetry install --without evaluation,llama-index --no-root && rm -rf $POETRY_CACHE_DIR

 FROM python:3.12.3-slim AS runtime

--- a/opendevin/memory/memory.py
+++ b/opendevin/memory/memory.py
@ -1,10 +1,5 @@
 import threading

-import chromadb
-import llama_index.embeddings.openai.base as llama_openai
-from llama_index.core import Document, VectorStoreIndex
-from llama_index.core.retrievers import VectorIndexRetriever
-from llama_index.vector_stores.chroma import ChromaVectorStore
 from openai._exceptions import APIConnectionError, InternalServerError, RateLimitError
 from tenacity import (
    retry,
@ -17,94 +12,102 @@ from opendevin.core.config import LLMConfig
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.utils import json

-# TODO: this could be made configurable
-num_retries: int = 10
-retry_min_wait: int = 3
-retry_max_wait: int = 300
+try:
+    import chromadb
+    import llama_index.embeddings.openai.base as llama_openai
+    from llama_index.core import Document, VectorStoreIndex
+    from llama_index.core.retrievers import VectorIndexRetriever
+    from llama_index.vector_stores.chroma import ChromaVectorStore

-# llama-index includes a retry decorator around openai.get_embeddings() function
-# it is initialized with hard-coded values and errors
-# this non-customizable behavior is creating issues when it's retrying faster than providers' rate limits
-# this block attempts to banish it and replace it with our decorator, to allow users to set their own limits
+    LLAMA_INDEX_AVAILABLE = True
+except ImportError:
+    LLAMA_INDEX_AVAILABLE = False

-if hasattr(llama_openai.get_embeddings, '__wrapped__'):
-    original_get_embeddings = llama_openai.get_embeddings.__wrapped__
-else:
-    logger.warning('Cannot set custom retry limits.')
-    num_retries = 1
-    original_get_embeddings = llama_openai.get_embeddings
+if LLAMA_INDEX_AVAILABLE:
+    # TODO: this could be made configurable
+    num_retries: int = 10
+    retry_min_wait: int = 3
+    retry_max_wait: int = 300

+    # llama-index includes a retry decorator around openai.get_embeddings() function
+    # it is initialized with hard-coded values and errors
+    # this non-customizable behavior is creating issues when it's retrying faster than providers' rate limits
+    # this block attempts to banish it and replace it with our decorator, to allow users to set their own limits

-def attempt_on_error(retry_state):
-    logger.error(
-        f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize these settings in the configuration.',
-        exc_info=False,
+    if hasattr(llama_openai.get_embeddings, '__wrapped__'):
+        original_get_embeddings = llama_openai.get_embeddings.__wrapped__
+    else:
+        logger.warning('Cannot set custom retry limits.')
+        num_retries = 1
+        original_get_embeddings = llama_openai.get_embeddings
+
+    def attempt_on_error(retry_state):
+        logger.error(
+            f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize these settings in the configuration.',
+            exc_info=False,
+        )
+        return None
+
+    @retry(
+        reraise=True,
+        stop=stop_after_attempt(num_retries),
+        wait=wait_random_exponential(min=retry_min_wait, max=retry_max_wait),
+        retry=retry_if_exception_type(
+            (RateLimitError, APIConnectionError, InternalServerError)
+        ),
+        after=attempt_on_error,
    )
-    return None
+    def wrapper_get_embeddings(*args, **kwargs):
+        return original_get_embeddings(*args, **kwargs)

+    llama_openai.get_embeddings = wrapper_get_embeddings

-@retry(
-    reraise=True,
-    stop=stop_after_attempt(num_retries),
-    wait=wait_random_exponential(min=retry_min_wait, max=retry_max_wait),
-    retry=retry_if_exception_type(
-        (RateLimitError, APIConnectionError, InternalServerError)
-    ),
-    after=attempt_on_error,
-)
-def wrapper_get_embeddings(*args, **kwargs):
-    return original_get_embeddings(*args, **kwargs)
+    class EmbeddingsLoader:
+        """Loader for embedding model initialization."""

+        @staticmethod
+        def get_embedding_model(strategy: str, llm_config: LLMConfig):
+            supported_ollama_embed_models = [
+                'llama2',
+                'mxbai-embed-large',
+                'nomic-embed-text',
+                'all-minilm',
+                'stable-code',
+            ]
+            if strategy in supported_ollama_embed_models:
+                from llama_index.embeddings.ollama import OllamaEmbedding

-llama_openai.get_embeddings = wrapper_get_embeddings
+                return OllamaEmbedding(
+                    model_name=strategy,
+                    base_url=llm_config.embedding_base_url,
+                    ollama_additional_kwargs={'mirostat': 0},
+                )
+            elif strategy == 'openai':
+                from llama_index.embeddings.openai import OpenAIEmbedding

+                return OpenAIEmbedding(
+                    model='text-embedding-ada-002',
+                    api_key=llm_config.api_key,
+                )
+            elif strategy == 'azureopenai':
+                from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding

-class EmbeddingsLoader:
-    """Loader for embedding model initialization."""
+                return AzureOpenAIEmbedding(
+                    model='text-embedding-ada-002',
+                    deployment_name=llm_config.embedding_deployment_name,
+                    api_key=llm_config.api_key,
+                    azure_endpoint=llm_config.base_url,
+                    api_version=llm_config.api_version,
+                )
+            elif (strategy is not None) and (strategy.lower() == 'none'):
+                # TODO: this works but is not elegant enough. The incentive is when
+                # an agent using embeddings is not used, there is no reason we need to
+                # initialize an embedding model
+                return None
+            else:
+                from llama_index.embeddings.huggingface import HuggingFaceEmbedding

-    @staticmethod
-    def get_embedding_model(strategy: str, llm_config: LLMConfig):
-        supported_ollama_embed_models = [
-            'llama2',
-            'mxbai-embed-large',
-            'nomic-embed-text',
-            'all-minilm',
-            'stable-code',
-        ]
-        if strategy in supported_ollama_embed_models:
-            from llama_index.embeddings.ollama import OllamaEmbedding
-
-            return OllamaEmbedding(
-                model_name=strategy,
-                base_url=llm_config.embedding_base_url,
-                ollama_additional_kwargs={'mirostat': 0},
-            )
-        elif strategy == 'openai':
-            from llama_index.embeddings.openai import OpenAIEmbedding
-
-            return OpenAIEmbedding(
-                model='text-embedding-ada-002',
-                api_key=llm_config.api_key,
-            )
-        elif strategy == 'azureopenai':
-            from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
-
-            return AzureOpenAIEmbedding(
-                model='text-embedding-ada-002',
-                deployment_name=llm_config.embedding_deployment_name,
-                api_key=llm_config.api_key,
-                azure_endpoint=llm_config.base_url,
-                api_version=llm_config.api_version,
-            )
-        elif (strategy is not None) and (strategy.lower() == 'none'):
-            # TODO: this works but is not elegant enough. The incentive is when
-            # an agent using embeddings is not used, there is no reason we need to
-            # initialize an embedding model
-            return None
-        else:
-            from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-
-            return HuggingFaceEmbedding(model_name='BAAI/bge-small-en-v1.5')
+                return HuggingFaceEmbedding(model_name='BAAI/bge-small-en-v1.5')


 class LongTermMemory:
@ -112,6 +115,12 @@ class LongTermMemory:

    def __init__(self, llm_config: LLMConfig, memory_max_threads: int = 1):
        """Initialize the chromadb and set up ChromaVectorStore for later use."""
+        if not LLAMA_INDEX_AVAILABLE:
+            raise ImportError(
+                'llama_index and its dependencies are not installed. '
+                'To use LongTermMemory, please run: poetry install --with llama-index'
+            )
+
        db = chromadb.Client(chromadb.Settings(anonymized_telemetry=False))
        self.collection = db.get_or_create_collection(name='memories')
        vector_store = ChromaVectorStore(chroma_collection=self.collection)
--- a/opendevin/runtime/utils/runtime_templates/Dockerfile.j2
+++ b/opendevin/runtime/utils/runtime_templates/Dockerfile.j2
@ -56,7 +56,7 @@ RUN mv /opendevin/{{ source_code_dirname }} /opendevin/code
 # 3. Clear poetry, apt, mamba caches
 RUN cd /opendevin/code && \
    /opendevin/miniforge3/bin/mamba run -n base poetry env use python3.11 && \
-    /opendevin/miniforge3/bin/mamba run -n base poetry install --no-interaction --no-root && \
+    /opendevin/miniforge3/bin/mamba run -n base poetry install --only main,runtime --no-interaction --no-root && \
    apt-get update && \
    /opendevin/miniforge3/bin/mamba run -n base poetry run pip install playwright && \
    /opendevin/miniforge3/bin/mamba run -n base poetry run playwright install --with-deps chromium && \
--- a/pyproject.toml
+++ b/pyproject.toml
@ -85,6 +85,7 @@ python-pptx = "*"
 pylatexenc = "*"
 opencv-python = "*"

+
 [build-system]
 build-backend = "poetry.core.masonry.api"
 requires = [