Spaces:
Running
Running
| """ | |
| Standalone model inference and client management for AnyCoder Backend API. | |
| No Gradio dependencies - works with FastAPI/backend only. | |
| """ | |
| import os | |
| from typing import Optional | |
| from openai import OpenAI | |
| from mistralai import Mistral | |
| # Import genai for Gemini (legacy - no longer used with Poe API) | |
| try: | |
| from google import genai | |
| from google.genai import types | |
| GEMINI_AVAILABLE = True | |
| except ImportError: | |
| GEMINI_AVAILABLE = False | |
| def get_inference_client(model_id: str, provider: str = "auto"): | |
| """ | |
| Return an appropriate client based on model_id. | |
| Returns OpenAI-compatible client for all models or raises error if not configured. | |
| """ | |
| if model_id == "gemini-3.0-pro": | |
| # Use Poe (OpenAI-compatible) client for Gemini 3.0 Pro | |
| return OpenAI( | |
| api_key=os.getenv("POE_API_KEY"), | |
| base_url="https://api.poe.com/v1" | |
| ) | |
| elif model_id == "qwen3-30b-a3b-instruct-2507": | |
| # Use DashScope OpenAI client | |
| return OpenAI( | |
| api_key=os.getenv("DASHSCOPE_API_KEY"), | |
| base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", | |
| ) | |
| elif model_id == "qwen3-30b-a3b-thinking-2507": | |
| # Use DashScope OpenAI client for Thinking model | |
| return OpenAI( | |
| api_key=os.getenv("DASHSCOPE_API_KEY"), | |
| base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", | |
| ) | |
| elif model_id == "qwen3-coder-30b-a3b-instruct": | |
| # Use DashScope OpenAI client for Coder model | |
| return OpenAI( | |
| api_key=os.getenv("DASHSCOPE_API_KEY"), | |
| base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", | |
| ) | |
| elif model_id == "gpt-5.1": | |
| # Use Poe (OpenAI-compatible) client for GPT-5.1 model | |
| return OpenAI( | |
| api_key=os.getenv("POE_API_KEY"), | |
| base_url="https://api.poe.com/v1" | |
| ) | |
| elif model_id == "gpt-5.1-instant": | |
| # Use Poe (OpenAI-compatible) client for GPT-5.1 Instant model | |
| return OpenAI( | |
| api_key=os.getenv("POE_API_KEY"), | |
| base_url="https://api.poe.com/v1" | |
| ) | |
| elif model_id == "gpt-5.1-codex": | |
| # Use Poe (OpenAI-compatible) client for GPT-5.1 Codex model | |
| return OpenAI( | |
| api_key=os.getenv("POE_API_KEY"), | |
| base_url="https://api.poe.com/v1" | |
| ) | |
| elif model_id == "gpt-5.1-codex-mini": | |
| # Use Poe (OpenAI-compatible) client for GPT-5.1 Codex Mini model | |
| return OpenAI( | |
| api_key=os.getenv("POE_API_KEY"), | |
| base_url="https://api.poe.com/v1" | |
| ) | |
| elif model_id == "grok-4": | |
| # Use Poe (OpenAI-compatible) client for Grok-4 model | |
| return OpenAI( | |
| api_key=os.getenv("POE_API_KEY"), | |
| base_url="https://api.poe.com/v1" | |
| ) | |
| elif model_id == "Grok-Code-Fast-1": | |
| # Use Poe (OpenAI-compatible) client for Grok-Code-Fast-1 model | |
| return OpenAI( | |
| api_key=os.getenv("POE_API_KEY"), | |
| base_url="https://api.poe.com/v1" | |
| ) | |
| elif model_id == "claude-opus-4.1": | |
| # Use Poe (OpenAI-compatible) client for Claude-Opus-4.1 | |
| return OpenAI( | |
| api_key=os.getenv("POE_API_KEY"), | |
| base_url="https://api.poe.com/v1" | |
| ) | |
| elif model_id == "claude-opus-4.5": | |
| # Use Poe (OpenAI-compatible) client for Claude-Opus-4.5 | |
| return OpenAI( | |
| api_key=os.getenv("POE_API_KEY"), | |
| base_url="https://api.poe.com/v1" | |
| ) | |
| elif model_id == "claude-sonnet-4.5": | |
| # Use Poe (OpenAI-compatible) client for Claude-Sonnet-4.5 | |
| return OpenAI( | |
| api_key=os.getenv("POE_API_KEY"), | |
| base_url="https://api.poe.com/v1" | |
| ) | |
| elif model_id == "claude-haiku-4.5": | |
| # Use Poe (OpenAI-compatible) client for Claude-Haiku-4.5 | |
| return OpenAI( | |
| api_key=os.getenv("POE_API_KEY"), | |
| base_url="https://api.poe.com/v1" | |
| ) | |
| elif model_id == "x-ai/grok-4.1-fast": | |
| # Use OpenRouter client for Grok 4.1 Fast model | |
| return OpenAI( | |
| api_key=os.getenv("OPENROUTER_API_KEY"), | |
| base_url="https://openrouter.ai/api/v1", | |
| ) | |
| elif model_id.startswith("openrouter/"): | |
| # OpenRouter models | |
| return OpenAI( | |
| api_key=os.getenv("OPENROUTER_API_KEY"), | |
| base_url="https://openrouter.ai/api/v1", | |
| ) | |
| elif model_id == "MiniMaxAI/MiniMax-M2": | |
| # Use HuggingFace Router with Novita provider for MiniMax M2 model | |
| return OpenAI( | |
| base_url="https://router.huggingface.co/v1", | |
| api_key=os.getenv("HF_TOKEN"), | |
| default_headers={"X-HF-Bill-To": "huggingface"} | |
| ) | |
| elif model_id == "step-3": | |
| # Use StepFun API client for Step-3 model | |
| return OpenAI( | |
| api_key=os.getenv("STEP_API_KEY"), | |
| base_url="https://api.stepfun.com/v1" | |
| ) | |
| elif model_id == "codestral-2508" or model_id == "mistral-medium-2508": | |
| # Use Mistral client for Mistral models | |
| return Mistral(api_key=os.getenv("MISTRAL_API_KEY")) | |
| elif model_id == "gemini-2.5-flash": | |
| # Use Google Gemini (OpenAI-compatible) client | |
| return OpenAI( | |
| api_key=os.getenv("GEMINI_API_KEY"), | |
| base_url="https://generativelanguage.googleapis.com/v1beta/openai/", | |
| ) | |
| elif model_id == "gemini-2.5-pro": | |
| # Use Google Gemini Pro (OpenAI-compatible) client | |
| return OpenAI( | |
| api_key=os.getenv("GEMINI_API_KEY"), | |
| base_url="https://generativelanguage.googleapis.com/v1beta/openai/", | |
| ) | |
| elif model_id == "gemini-flash-lite-latest": | |
| # Use Google Gemini Flash Lite Latest (OpenAI-compatible) client | |
| return OpenAI( | |
| api_key=os.getenv("GEMINI_API_KEY"), | |
| base_url="https://generativelanguage.googleapis.com/v1beta/openai/", | |
| ) | |
| elif model_id == "kimi-k2-turbo-preview": | |
| # Use Moonshot AI (OpenAI-compatible) client for Kimi K2 Turbo (Preview) | |
| return OpenAI( | |
| api_key=os.getenv("MOONSHOT_API_KEY"), | |
| base_url="https://api.moonshot.ai/v1", | |
| ) | |
| elif model_id == "moonshotai/Kimi-K2-Thinking": | |
| # Use HuggingFace Router with Novita provider | |
| return OpenAI( | |
| base_url="https://router.huggingface.co/v1", | |
| api_key=os.getenv("HF_TOKEN"), | |
| default_headers={"X-HF-Bill-To": "huggingface"} | |
| ) | |
| elif model_id == "moonshotai/Kimi-K2-Instruct": | |
| # Use HuggingFace Router with Groq provider | |
| return OpenAI( | |
| base_url="https://router.huggingface.co/v1", | |
| api_key=os.getenv("HF_TOKEN"), | |
| default_headers={"X-HF-Bill-To": "huggingface"} | |
| ) | |
| elif model_id.startswith("deepseek-ai/"): | |
| # DeepSeek models via HuggingFace Router with Novita provider | |
| return OpenAI( | |
| base_url="https://router.huggingface.co/v1", | |
| api_key=os.getenv("HF_TOKEN"), | |
| default_headers={"X-HF-Bill-To": "huggingface"} | |
| ) | |
| elif model_id.startswith("zai-org/GLM-4"): | |
| # GLM models via HuggingFace Router | |
| return OpenAI( | |
| base_url="https://router.huggingface.co/v1", | |
| api_key=os.getenv("HF_TOKEN"), | |
| default_headers={"X-HF-Bill-To": "huggingface"} | |
| ) | |
| elif model_id.startswith("moonshotai/Kimi-K2"): | |
| # Kimi K2 models via HuggingFace Router | |
| return OpenAI( | |
| base_url="https://router.huggingface.co/v1", | |
| api_key=os.getenv("HF_TOKEN"), | |
| default_headers={"X-HF-Bill-To": "huggingface"} | |
| ) | |
| elif model_id == "stealth-model-1": | |
| # Use stealth model with generic configuration | |
| api_key = os.getenv("STEALTH_MODEL_1_API_KEY") | |
| if not api_key: | |
| raise ValueError("STEALTH_MODEL_1_API_KEY environment variable is required") | |
| base_url = os.getenv("STEALTH_MODEL_1_BASE_URL") | |
| if not base_url: | |
| raise ValueError("STEALTH_MODEL_1_BASE_URL environment variable is required") | |
| return OpenAI( | |
| api_key=api_key, | |
| base_url=base_url, | |
| ) | |
| else: | |
| # Unknown model - try HuggingFace Inference API | |
| return OpenAI( | |
| base_url="https://api-inference.huggingface.co/v1", | |
| api_key=os.getenv("HF_TOKEN") | |
| ) | |
| def get_real_model_id(model_id: str) -> str: | |
| """Get the real model ID with provider suffixes if needed""" | |
| if model_id == "stealth-model-1": | |
| # Get the real model ID from environment variable | |
| real_model_id = os.getenv("STEALTH_MODEL_1_ID") | |
| if not real_model_id: | |
| raise ValueError("STEALTH_MODEL_1_ID environment variable is required") | |
| return real_model_id | |
| elif model_id == "zai-org/GLM-4.6": | |
| # GLM-4.6 requires Cerebras provider suffix in model string for API calls | |
| return "zai-org/GLM-4.6:cerebras" | |
| elif model_id == "MiniMaxAI/MiniMax-M2": | |
| # MiniMax M2 needs Novita provider suffix | |
| return "MiniMaxAI/MiniMax-M2:novita" | |
| elif model_id == "moonshotai/Kimi-K2-Thinking": | |
| # Kimi K2 Thinking needs Together AI provider | |
| return "moonshotai/Kimi-K2-Thinking:together" | |
| elif model_id == "moonshotai/Kimi-K2-Instruct": | |
| # Kimi K2 Instruct needs Groq provider | |
| return "moonshotai/Kimi-K2-Instruct:groq" | |
| elif model_id.startswith("deepseek-ai/DeepSeek-V3") or model_id.startswith("deepseek-ai/DeepSeek-R1"): | |
| # DeepSeek V3 and R1 models need Novita provider | |
| return f"{model_id}:novita" | |
| elif model_id == "zai-org/GLM-4.5": | |
| # GLM-4.5 needs fireworks-ai provider | |
| return "zai-org/GLM-4.5:fireworks-ai" | |
| return model_id | |
| def create_gemini3_messages(messages: list) -> tuple: | |
| """ | |
| Convert OpenAI-style messages to Gemini 3 format. | |
| Returns (contents, tools, config) | |
| """ | |
| if not GEMINI_AVAILABLE: | |
| raise ImportError("google-genai package required for Gemini 3") | |
| contents = [] | |
| system_prompt = None | |
| for msg in messages: | |
| if msg['role'] == 'system': | |
| system_prompt = msg['content'] | |
| elif msg['role'] in ['user', 'assistant']: | |
| contents.append( | |
| types.Content( | |
| role="user" if msg['role'] == 'user' else "model", | |
| parts=[types.Part.from_text(text=msg['content'])] | |
| ) | |
| ) | |
| # Add system prompt as first user message if exists | |
| if system_prompt: | |
| contents.insert(0, types.Content( | |
| role="user", | |
| parts=[types.Part.from_text(text=f"System instructions: {system_prompt}")] | |
| )) | |
| # Configure tools and thinking | |
| tools = [types.Tool(googleSearch=types.GoogleSearch())] | |
| config = types.GenerateContentConfig( | |
| thinkingConfig=types.ThinkingConfig(thinkingLevel="HIGH"), | |
| tools=tools, | |
| max_output_tokens=16384 | |
| ) | |
| return contents, config | |
| def is_native_sdk_model(model_id: str) -> bool: | |
| """Check if model uses native SDK (not OpenAI-compatible)""" | |
| return False # All models now use OpenAI-compatible APIs | |
| def is_mistral_model(model_id: str) -> bool: | |
| """Check if model uses Mistral SDK""" | |
| return model_id in ["codestral-2508", "mistral-medium-2508"] | |