Added siliconflow API support

2026-03-22 11:17:17 +08:00 · 2025-04-03 07:12:40 +08:00
parent 2df50b308c
commit d711c85644
3 changed files with 145 additions and 4 deletions
--- a/.env.example
+++ b/.env.example
@@ -27,6 +27,9 @@ MOONSHOT_API_KEY=
 UNBOUND_ENDPOINT=https://api.getunbound.ai
 UNBOUND_API_KEY=

+SiliconFLOW_ENDPOINT=https://api.siliconflow.cn/v1/
+SiliconFLOW_API_KEY=
+
 # Set to false to disable anonymized telemetry
 ANONYMIZED_TELEMETRY=false

--- a/src/utils/llm.py
+++ b/src/utils/llm.py
@@ -37,7 +37,7 @@ from typing import (
    Literal,
    Optional,
    Union,
-    cast,
+    cast, List,
 )


@@ -136,3 +136,91 @@ class DeepSeekR1ChatOllama(ChatOllama):
        if "**JSON Response:**" in content:
            content = content.split("**JSON Response:**")[-1]
        return AIMessage(content=content, reasoning_content=reasoning_content)
+
+
+class SiliconFlowChat(ChatOpenAI):
+    """Wrapper for SiliconFlow Chat API, fully compatible with OpenAI-spec format."""
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+        # Ensure the API client is initialized with SiliconFlow's endpoint and key
+        self.client = OpenAI(
+            api_key=kwargs.get("api_key"),
+            base_url=kwargs.get("base_url")
+        )
+
+    async def ainvoke(
+            self,
+            input: LanguageModelInput,
+            config: Optional[RunnableConfig] = None,
+            *,
+            stop: Optional[List[str]] = None,
+            **kwargs: Any,
+    ) -> AIMessage:
+        """Async call SiliconFlow API."""
+
+        # Convert input messages into OpenAI-compatible format
+        message_history = []
+        for input_msg in input:
+            if isinstance(input_msg, SystemMessage):
+                message_history.append({"role": "system", "content": input_msg.content})
+            elif isinstance(input_msg, AIMessage):
+                message_history.append({"role": "assistant", "content": input_msg.content})
+            else:  # HumanMessage or similar
+                message_history.append({"role": "user", "content": input_msg.content})
+
+        # Send request to SiliconFlow API (OpenAI-spec endpoint)
+        response = await self.client.chat.completions.create(
+            model=self.model_name,
+            messages=message_history,
+            stop=stop,
+            **kwargs,
+        )
+
+        # Extract the AI response (SiliconFlow's response must match OpenAI format)
+        if hasattr(response.choices[0].message, "reasoning_content"):
+            reasoning_content = response.choices[0].message.reasoning_content
+        else:
+            reasoning_content = None
+
+        content = response.choices[0].message.content
+        return AIMessage(content=content, reasoning_content=reasoning_content)  # Return reasoning_content if needed
+
+    def invoke(
+            self,
+            input: LanguageModelInput,
+            config: Optional[RunnableConfig] = None,
+            *,
+            stop: Optional[List[str]] = None,
+            **kwargs: Any,
+    ) -> AIMessage:
+        """Sync call SiliconFlow API."""
+
+        # Same conversion as async version
+        message_history = []
+        for input_msg in input:
+            if isinstance(input_msg, SystemMessage):
+                message_history.append({"role": "system", "content": input_msg.content})
+            elif isinstance(input_msg, AIMessage):
+                message_history.append({"role": "assistant", "content": input_msg.content})
+            else:
+                message_history.append({"role": "user", "content": input_msg.content})
+
+        # Sync call
+        response = self.client.chat.completions.create(
+            model=self.model_name,
+            messages=message_history,
+            stop=stop,
+            **kwargs,
+        )
+
+        # Handle reasoning_content (if supported)
+        reasoning_content = None
+        if hasattr(response.choices[0].message, "reasoning_content"):
+            reasoning_content = response.choices[0].message.reasoning_content
+
+        return AIMessage(
+            content=response.choices[0].message.content,
+            reasoning_content=reasoning_content,  # Only if SiliconFlow supports it
+        )
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -14,7 +14,7 @@ from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_ollama import ChatOllama
 from langchain_openai import AzureChatOpenAI, ChatOpenAI

-from .llm import DeepSeekR1ChatOpenAI, DeepSeekR1ChatOllama
+from .llm import DeepSeekR1ChatOpenAI, DeepSeekR1ChatOllama,SiliconFlowChat

 PROVIDER_DISPLAY_NAMES = {
    "openai": "OpenAI",
@@ -165,9 +165,26 @@ def get_llm_model(provider: str, **kwargs):
        return ChatOpenAI(
            model=kwargs.get("model_name", "gpt-4o-mini"),
            temperature=kwargs.get("temperature", 0.0),
-            base_url = os.getenv("UNBOUND_ENDPOINT", "https://api.getunbound.ai"),
+            base_url=os.getenv("UNBOUND_ENDPOINT", "https://api.getunbound.ai"),
            api_key=api_key,
        )
+    elif provider == "siliconflow":
+        if not kwargs.get("api_key", ""):
+            api_key = os.getenv("SiliconFLOW_API_KEY", "")
+        else:
+            api_key = kwargs.get("api_key")
+        if not kwargs.get("base_url", ""):
+            base_url = os.getenv("SiliconFLOW_ENDPOINT", "")
+        else:
+            base_url = kwargs.get("base_url")
+        return SiliconFlowChat(
+            api_key=api_key,
+            base_url=base_url,
+            model_name=kwargs.get("model_name", "Qwen/QwQ-32B"),
+            temperature=kwargs.get("temperature", 0.0),
+            max_tokens=kwargs.get("max_tokens", 512),
+            frequency_penalty=kwargs.get("frequency_penalty", 0.5),
+        )
    else:
        raise ValueError(f"Unsupported provider: {provider}")

@@ -185,7 +202,40 @@ model_names = {
    "mistral": ["pixtral-large-latest", "mistral-large-latest", "mistral-small-latest", "ministral-8b-latest"],
    "alibaba": ["qwen-plus", "qwen-max", "qwen-turbo", "qwen-long"],
    "moonshot": ["moonshot-v1-32k-vision-preview", "moonshot-v1-8k-vision-preview"],
-    "unbound": ["gemini-2.0-flash","gpt-4o-mini", "gpt-4o", "gpt-4.5-preview"]
+    "unbound": ["gemini-2.0-flash", "gpt-4o-mini", "gpt-4o", "gpt-4.5-preview"],
+    "siliconflow": [
+        "deepseek-ai/DeepSeek-R1",
+        "deepseek-ai/DeepSeek-V3",
+        "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
+        "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
+        "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+        "deepseek-ai/DeepSeek-V2.5",
+        "deepseek-ai/deepseek-vl2",
+        "Qwen/Qwen2.5-72B-Instruct-128K",
+        "Qwen/Qwen2.5-72B-Instruct",
+        "Qwen/Qwen2.5-32B-Instruct",
+        "Qwen/Qwen2.5-14B-Instruct",
+        "Qwen/Qwen2.5-7B-Instruct",
+        "Qwen/Qwen2.5-Coder-32B-Instruct",
+        "Qwen/Qwen2.5-Coder-7B-Instruct",
+        "Qwen/Qwen2-7B-Instruct",
+        "Qwen/Qwen2-1.5B-Instruct",
+        "Qwen/QwQ-32B-Preview",
+        "Qwen/Qwen2-VL-72B-Instruct",
+        "Qwen/Qwen2.5-VL-32B-Instruct",
+        "Qwen/Qwen2.5-VL-72B-Instruct",
+        "TeleAI/TeleChat2",
+        "THUDM/glm-4-9b-chat",
+        "Vendor-A/Qwen/Qwen2.5-72B-Instruct",
+        "internlm/internlm2_5-7b-chat",
+        "internlm/internlm2_5-20b-chat",
+        "Pro/Qwen/Qwen2.5-7B-Instruct",
+        "Pro/Qwen/Qwen2-7B-Instruct",
+        "Pro/Qwen/Qwen2-1.5B-Instruct",
+        "Pro/THUDM/chatglm3-6b",
+        "Pro/THUDM/glm-4-9b-chat",
+    ],
 }