[Runtime] Reduce dependency to speed up CI and reduce image size (#3195)

* reduce dependency for runtime

* try making llama-index an optional dependency that's not installed by default

* do not install llama-index in CI

* do not install llama-index in the app docker as well
This commit is contained in:
Xingyao Wang 2024-08-01 01:55:09 +08:00 committed by GitHub
parent 938ed027c2
commit 1d49ef253b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 99 additions and 89 deletions

View File

@ -25,7 +25,7 @@ jobs:
- name: Set up environment
run: |
curl -sSL https://install.python-poetry.org | python3 -
poetry install --without evaluation
poetry install --without evaluation,llama-index
poetry run playwright install --with-deps chromium
wget https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json -P /tmp/llama_index/models--BAAI--bge-small-en-v1.5/snapshots/5c38ec7c405ec4b44b94cc5a9bb96e735b38267a/1_Pooling/
- name: Run tests

View File

@ -50,7 +50,7 @@ jobs:
run: |
curl -sSL https://install.python-poetry.org | python3 -
export PATH="/github/home/.local/bin:$PATH"
poetry install --without evaluation
poetry install --without evaluation,llama-index
poetry run playwright install --with-deps chromium
- name: Run OpenDevin

View File

@ -70,7 +70,7 @@ jobs:
cache: "poetry"
- name: Install Python dependencies using Poetry
run: poetry install
run: poetry install --without evaluation,llama-index
- name: Install & Start Docker
if: env.INSTALL_DOCKER == '1'
@ -153,7 +153,7 @@ jobs:
cache: "poetry"
- name: Install Python dependencies using Poetry
run: poetry install --without evaluation
run: poetry install --without evaluation,llama-index
- name: Build Environment
run: make build

View File

@ -40,7 +40,7 @@ jobs:
run: |
curl -sSL https://install.python-poetry.org | python3 -
export PATH="/github/home/.local/bin:$PATH"
poetry install --without evaluation
poetry install --without evaluation,llama-index
poetry run playwright install --with-deps chromium

View File

@ -141,7 +141,7 @@ install-python-dependencies:
export HNSWLIB_NO_NATIVE=1; \
poetry run pip install chroma-hnswlib; \
fi
@poetry install
@poetry install --without llama-index
@if [ -f "/etc/manjaro-release" ]; then \
echo "$(BLUE)Detected Manjaro Linux. Installing Playwright dependencies...$(RESET)"; \
poetry run pip install playwright; \

View File

@ -26,7 +26,7 @@ RUN apt-get update -y \
COPY ./pyproject.toml ./poetry.lock ./
RUN touch README.md
RUN export POETRY_CACHE_DIR && poetry install --without evaluation --no-root && rm -rf $POETRY_CACHE_DIR
RUN export POETRY_CACHE_DIR && poetry install --without evaluation,llama-index --no-root && rm -rf $POETRY_CACHE_DIR
FROM python:3.12.3-slim AS runtime

View File

@ -1,10 +1,5 @@
import threading
import chromadb
import llama_index.embeddings.openai.base as llama_openai
from llama_index.core import Document, VectorStoreIndex
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.vector_stores.chroma import ChromaVectorStore
from openai._exceptions import APIConnectionError, InternalServerError, RateLimitError
from tenacity import (
retry,
@ -17,94 +12,102 @@ from opendevin.core.config import LLMConfig
from opendevin.core.logger import opendevin_logger as logger
from opendevin.core.utils import json
# TODO: this could be made configurable
num_retries: int = 10
retry_min_wait: int = 3
retry_max_wait: int = 300
try:
import chromadb
import llama_index.embeddings.openai.base as llama_openai
from llama_index.core import Document, VectorStoreIndex
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.vector_stores.chroma import ChromaVectorStore
# llama-index includes a retry decorator around openai.get_embeddings() function
# it is initialized with hard-coded values and errors
# this non-customizable behavior is creating issues when it's retrying faster than providers' rate limits
# this block attempts to banish it and replace it with our decorator, to allow users to set their own limits
LLAMA_INDEX_AVAILABLE = True
except ImportError:
LLAMA_INDEX_AVAILABLE = False
if hasattr(llama_openai.get_embeddings, '__wrapped__'):
original_get_embeddings = llama_openai.get_embeddings.__wrapped__
else:
logger.warning('Cannot set custom retry limits.')
num_retries = 1
original_get_embeddings = llama_openai.get_embeddings
if LLAMA_INDEX_AVAILABLE:
# TODO: this could be made configurable
num_retries: int = 10
retry_min_wait: int = 3
retry_max_wait: int = 300
# llama-index includes a retry decorator around openai.get_embeddings() function
# it is initialized with hard-coded values and errors
# this non-customizable behavior is creating issues when it's retrying faster than providers' rate limits
# this block attempts to banish it and replace it with our decorator, to allow users to set their own limits
def attempt_on_error(retry_state):
logger.error(
f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize these settings in the configuration.',
exc_info=False,
if hasattr(llama_openai.get_embeddings, '__wrapped__'):
original_get_embeddings = llama_openai.get_embeddings.__wrapped__
else:
logger.warning('Cannot set custom retry limits.')
num_retries = 1
original_get_embeddings = llama_openai.get_embeddings
def attempt_on_error(retry_state):
logger.error(
f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize these settings in the configuration.',
exc_info=False,
)
return None
@retry(
reraise=True,
stop=stop_after_attempt(num_retries),
wait=wait_random_exponential(min=retry_min_wait, max=retry_max_wait),
retry=retry_if_exception_type(
(RateLimitError, APIConnectionError, InternalServerError)
),
after=attempt_on_error,
)
return None
def wrapper_get_embeddings(*args, **kwargs):
return original_get_embeddings(*args, **kwargs)
llama_openai.get_embeddings = wrapper_get_embeddings
@retry(
reraise=True,
stop=stop_after_attempt(num_retries),
wait=wait_random_exponential(min=retry_min_wait, max=retry_max_wait),
retry=retry_if_exception_type(
(RateLimitError, APIConnectionError, InternalServerError)
),
after=attempt_on_error,
)
def wrapper_get_embeddings(*args, **kwargs):
return original_get_embeddings(*args, **kwargs)
class EmbeddingsLoader:
"""Loader for embedding model initialization."""
@staticmethod
def get_embedding_model(strategy: str, llm_config: LLMConfig):
supported_ollama_embed_models = [
'llama2',
'mxbai-embed-large',
'nomic-embed-text',
'all-minilm',
'stable-code',
]
if strategy in supported_ollama_embed_models:
from llama_index.embeddings.ollama import OllamaEmbedding
llama_openai.get_embeddings = wrapper_get_embeddings
return OllamaEmbedding(
model_name=strategy,
base_url=llm_config.embedding_base_url,
ollama_additional_kwargs={'mirostat': 0},
)
elif strategy == 'openai':
from llama_index.embeddings.openai import OpenAIEmbedding
return OpenAIEmbedding(
model='text-embedding-ada-002',
api_key=llm_config.api_key,
)
elif strategy == 'azureopenai':
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
class EmbeddingsLoader:
"""Loader for embedding model initialization."""
return AzureOpenAIEmbedding(
model='text-embedding-ada-002',
deployment_name=llm_config.embedding_deployment_name,
api_key=llm_config.api_key,
azure_endpoint=llm_config.base_url,
api_version=llm_config.api_version,
)
elif (strategy is not None) and (strategy.lower() == 'none'):
# TODO: this works but is not elegant enough. The incentive is when
# an agent using embeddings is not used, there is no reason we need to
# initialize an embedding model
return None
else:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
@staticmethod
def get_embedding_model(strategy: str, llm_config: LLMConfig):
supported_ollama_embed_models = [
'llama2',
'mxbai-embed-large',
'nomic-embed-text',
'all-minilm',
'stable-code',
]
if strategy in supported_ollama_embed_models:
from llama_index.embeddings.ollama import OllamaEmbedding
return OllamaEmbedding(
model_name=strategy,
base_url=llm_config.embedding_base_url,
ollama_additional_kwargs={'mirostat': 0},
)
elif strategy == 'openai':
from llama_index.embeddings.openai import OpenAIEmbedding
return OpenAIEmbedding(
model='text-embedding-ada-002',
api_key=llm_config.api_key,
)
elif strategy == 'azureopenai':
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
return AzureOpenAIEmbedding(
model='text-embedding-ada-002',
deployment_name=llm_config.embedding_deployment_name,
api_key=llm_config.api_key,
azure_endpoint=llm_config.base_url,
api_version=llm_config.api_version,
)
elif (strategy is not None) and (strategy.lower() == 'none'):
# TODO: this works but is not elegant enough. The incentive is when
# an agent using embeddings is not used, there is no reason we need to
# initialize an embedding model
return None
else:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
return HuggingFaceEmbedding(model_name='BAAI/bge-small-en-v1.5')
return HuggingFaceEmbedding(model_name='BAAI/bge-small-en-v1.5')
class LongTermMemory:
@ -112,6 +115,12 @@ class LongTermMemory:
def __init__(self, llm_config: LLMConfig, memory_max_threads: int = 1):
"""Initialize the chromadb and set up ChromaVectorStore for later use."""
if not LLAMA_INDEX_AVAILABLE:
raise ImportError(
'llama_index and its dependencies are not installed. '
'To use LongTermMemory, please run: poetry install --with llama-index'
)
db = chromadb.Client(chromadb.Settings(anonymized_telemetry=False))
self.collection = db.get_or_create_collection(name='memories')
vector_store = ChromaVectorStore(chroma_collection=self.collection)

View File

@ -56,7 +56,7 @@ RUN mv /opendevin/{{ source_code_dirname }} /opendevin/code
# 3. Clear poetry, apt, mamba caches
RUN cd /opendevin/code && \
/opendevin/miniforge3/bin/mamba run -n base poetry env use python3.11 && \
/opendevin/miniforge3/bin/mamba run -n base poetry install --no-interaction --no-root && \
/opendevin/miniforge3/bin/mamba run -n base poetry install --only main,runtime --no-interaction --no-root && \
apt-get update && \
/opendevin/miniforge3/bin/mamba run -n base poetry run pip install playwright && \
/opendevin/miniforge3/bin/mamba run -n base poetry run playwright install --with-deps chromium && \

View File

@ -85,6 +85,7 @@ python-pptx = "*"
pylatexenc = "*"
opencv-python = "*"
[build-system]
build-backend = "poetry.core.masonry.api"
requires = [