[fix]:[FL-44][清理遗留聊天 / 模型注册模块]
Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
from functools import lru_cache
|
||||
import json
|
||||
import re
|
||||
from typing import Literal
|
||||
from urllib.parse import quote_plus
|
||||
@@ -43,11 +42,6 @@ class Settings(BaseSettings):
|
||||
refresh_cookie_secure: bool = False
|
||||
refresh_cookie_samesite: Literal["lax", "strict", "none"] = "lax"
|
||||
|
||||
llm_provider_api_keys: str = ""
|
||||
llm_request_timeout_seconds: int = 60
|
||||
chat_context_message_limit: int = 12
|
||||
chat_default_system_prompt: str = "You are a helpful assistant."
|
||||
|
||||
celery_broker_url: str | None = None
|
||||
celery_result_backend: str | None = None
|
||||
celery_timezone: str = "Asia/Shanghai"
|
||||
@@ -84,8 +78,6 @@ class Settings(BaseSettings):
|
||||
@field_validator(
|
||||
"access_token_expire_minutes",
|
||||
"refresh_token_expire_days",
|
||||
"llm_request_timeout_seconds",
|
||||
"chat_context_message_limit",
|
||||
"db_port",
|
||||
"scheduler_expire_interval_seconds",
|
||||
"flower_api_timeout_seconds",
|
||||
@@ -137,41 +129,6 @@ class Settings(BaseSettings):
|
||||
return None
|
||||
return "|".join(f"(?:{part})" for part in regex_parts)
|
||||
|
||||
@property
|
||||
def llm_provider_key_map(self) -> dict[str, str]:
|
||||
raw = self.llm_provider_api_keys.strip()
|
||||
if not raw:
|
||||
return {}
|
||||
|
||||
if raw.startswith("{"):
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
if not isinstance(data, dict):
|
||||
return {}
|
||||
normalized: dict[str, str] = {}
|
||||
for provider, value in data.items():
|
||||
if not isinstance(provider, str) or not isinstance(value, str):
|
||||
continue
|
||||
provider_key = provider.strip().lower()
|
||||
secret = value.strip()
|
||||
if provider_key and secret:
|
||||
normalized[provider_key] = secret
|
||||
return normalized
|
||||
|
||||
mapping: dict[str, str] = {}
|
||||
for token in re.split(r"[,\n;]+", raw):
|
||||
pair = token.strip()
|
||||
if not pair or "=" not in pair:
|
||||
continue
|
||||
provider, value = pair.split("=", 1)
|
||||
provider_key = provider.strip().lower()
|
||||
secret = value.strip()
|
||||
if provider_key and secret:
|
||||
mapping[provider_key] = secret
|
||||
return mapping
|
||||
|
||||
@property
|
||||
def resolved_database_url(self) -> str:
|
||||
explicit_database_url = (self.database_url or "").strip()
|
||||
|
||||
@@ -390,7 +390,6 @@ def init_db() -> None:
|
||||
line,
|
||||
line_tower,
|
||||
menu,
|
||||
model_registry,
|
||||
object_group,
|
||||
question_bank,
|
||||
rbac,
|
||||
|
||||
@@ -4,7 +4,7 @@ Import all model modules during package initialization so SQLAlchemy can
|
||||
resolve string-based relationships regardless of route/service import order.
|
||||
"""
|
||||
|
||||
from . import atp_model, audit_log, auth_session, elevation, file_storage, fl_analysis, hot_search, lightning_event, lightning_sample, line, line_tower, menu, model_registry, object_group, question_bank, rbac, system_param, tower_model, tower_profile, user, worker_registry
|
||||
from . import atp_model, audit_log, auth_session, elevation, file_storage, fl_analysis, hot_search, lightning_event, lightning_sample, line, line_tower, menu, object_group, question_bank, rbac, system_param, tower_model, tower_profile, user, worker_registry
|
||||
|
||||
__all__ = [
|
||||
"atp_model",
|
||||
@@ -19,7 +19,6 @@ __all__ = [
|
||||
"line",
|
||||
"line_tower",
|
||||
"menu",
|
||||
"model_registry",
|
||||
"object_group",
|
||||
"question_bank",
|
||||
"rbac",
|
||||
|
||||
@@ -1,187 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from sqlalchemy import (
|
||||
JSON,
|
||||
Boolean,
|
||||
DateTime,
|
||||
ForeignKey,
|
||||
Integer,
|
||||
Numeric,
|
||||
String,
|
||||
Text,
|
||||
UniqueConstraint,
|
||||
)
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from ..core.database import Base
|
||||
from .base import utcnow
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .user import User
|
||||
|
||||
|
||||
class ModelRegistry(Base):
|
||||
__tablename__ = "llm_models"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
||||
code: Mapped[str] = mapped_column(String(64), unique=True, index=True)
|
||||
name: Mapped[str] = mapped_column(String(128), index=True)
|
||||
provider: Mapped[str] = mapped_column(String(64), index=True)
|
||||
provider_model: Mapped[str] = mapped_column(String(128), index=True)
|
||||
status: Mapped[str] = mapped_column(String(16), default="DRAFT", index=True)
|
||||
capabilities: Mapped[list[str]] = mapped_column(JSON, default=list)
|
||||
description: Mapped[str] = mapped_column(Text(), default="")
|
||||
base_url: Mapped[str | None] = mapped_column(String(255))
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
default=utcnow,
|
||||
onupdate=utcnow,
|
||||
)
|
||||
|
||||
route_rules: Mapped[list[ModelRouteRule]] = relationship(
|
||||
"ModelRouteRule",
|
||||
back_populates="target_model",
|
||||
lazy="selectin",
|
||||
primaryjoin="ModelRegistry.code == ModelRouteRule.target_model_code",
|
||||
)
|
||||
api_keys: Mapped[list[ModelApiKey]] = relationship(
|
||||
"ModelApiKey",
|
||||
back_populates="model",
|
||||
lazy="selectin",
|
||||
cascade="all, delete-orphan",
|
||||
order_by="ModelApiKey.version.desc()",
|
||||
)
|
||||
health_checks: Mapped[list[ModelHealthCheck]] = relationship(
|
||||
"ModelHealthCheck",
|
||||
back_populates="model",
|
||||
lazy="selectin",
|
||||
cascade="all, delete-orphan",
|
||||
order_by="ModelHealthCheck.created_at.desc()",
|
||||
)
|
||||
test_runs: Mapped[list[ModelTestRun]] = relationship(
|
||||
"ModelTestRun",
|
||||
back_populates="model",
|
||||
lazy="selectin",
|
||||
cascade="all, delete-orphan",
|
||||
order_by="ModelTestRun.created_at.desc()",
|
||||
)
|
||||
|
||||
|
||||
class ModelRouteRule(Base):
|
||||
__tablename__ = "model_route_rules"
|
||||
__table_args__ = (
|
||||
UniqueConstraint("route_type", "route_key", name="uq_model_route_type_key"),
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
||||
route_type: Mapped[str] = mapped_column(String(16), index=True)
|
||||
route_key: Mapped[str] = mapped_column(String(128), index=True)
|
||||
target_model_code: Mapped[str] = mapped_column(
|
||||
String(64),
|
||||
ForeignKey("llm_models.code", ondelete="RESTRICT"),
|
||||
index=True,
|
||||
)
|
||||
priority: Mapped[int] = mapped_column(Integer, default=100, index=True)
|
||||
enabled: Mapped[bool] = mapped_column(Boolean, default=True, index=True)
|
||||
note: Mapped[str | None] = mapped_column(String(255))
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
default=utcnow,
|
||||
onupdate=utcnow,
|
||||
)
|
||||
|
||||
target_model: Mapped[ModelRegistry] = relationship(
|
||||
"ModelRegistry",
|
||||
back_populates="route_rules",
|
||||
lazy="selectin",
|
||||
primaryjoin="ModelRouteRule.target_model_code == ModelRegistry.code",
|
||||
)
|
||||
|
||||
|
||||
class ModelApiKey(Base):
|
||||
__tablename__ = "model_api_keys"
|
||||
__table_args__ = (
|
||||
UniqueConstraint("model_id", "version", name="uq_model_key_model_version"),
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
||||
model_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("llm_models.id", ondelete="CASCADE"),
|
||||
index=True,
|
||||
)
|
||||
version: Mapped[int] = mapped_column(Integer, index=True)
|
||||
secret_hash: Mapped[str] = mapped_column(String(128))
|
||||
secret_masked: Mapped[str] = mapped_column(String(64))
|
||||
secret_fingerprint: Mapped[str] = mapped_column(String(32), index=True)
|
||||
is_active: Mapped[bool] = mapped_column(Boolean, default=True, index=True)
|
||||
rotation_note: Mapped[str | None] = mapped_column(String(255))
|
||||
created_by_user_id: Mapped[str | None] = mapped_column(
|
||||
String(36),
|
||||
ForeignKey("users.user_id", ondelete="SET NULL"),
|
||||
index=True,
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
|
||||
|
||||
model: Mapped[ModelRegistry] = relationship("ModelRegistry", back_populates="api_keys", lazy="selectin")
|
||||
created_by: Mapped[User | None] = relationship("User", lazy="selectin")
|
||||
|
||||
|
||||
class ModelHealthCheck(Base):
|
||||
__tablename__ = "model_health_checks"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
||||
model_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("llm_models.id", ondelete="CASCADE"),
|
||||
index=True,
|
||||
)
|
||||
status: Mapped[str] = mapped_column(String(16), index=True)
|
||||
reason: Mapped[str] = mapped_column(String(255))
|
||||
latency_ms: Mapped[int | None] = mapped_column(Integer)
|
||||
detail_json: Mapped[dict[str, Any] | None] = mapped_column(JSON)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
|
||||
|
||||
model: Mapped[ModelRegistry] = relationship("ModelRegistry", back_populates="health_checks", lazy="selectin")
|
||||
|
||||
|
||||
class ModelTestRun(Base):
|
||||
__tablename__ = "model_test_runs"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
||||
model_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("llm_models.id", ondelete="CASCADE"),
|
||||
index=True,
|
||||
)
|
||||
kind: Mapped[str] = mapped_column(String(32), default="SMOKE", index=True)
|
||||
status: Mapped[str] = mapped_column(String(16), index=True)
|
||||
input_tokens: Mapped[int] = mapped_column(Integer, default=0)
|
||||
output_tokens: Mapped[int] = mapped_column(Integer, default=0)
|
||||
latency_ms: Mapped[int | None] = mapped_column(Integer)
|
||||
error_message: Mapped[str | None] = mapped_column(Text())
|
||||
created_by_user_id: Mapped[str | None] = mapped_column(
|
||||
String(36),
|
||||
ForeignKey("users.user_id", ondelete="SET NULL"),
|
||||
index=True,
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
|
||||
|
||||
model: Mapped[ModelRegistry] = relationship("ModelRegistry", back_populates="test_runs", lazy="selectin")
|
||||
created_by: Mapped[User | None] = relationship("User", lazy="selectin")
|
||||
|
||||
|
||||
class ModelUsageLog(Base):
|
||||
__tablename__ = "model_usage_logs"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
||||
model_code: Mapped[str] = mapped_column(String(64), index=True)
|
||||
source: Mapped[str] = mapped_column(String(32), default="RUNTIME", index=True)
|
||||
request_count: Mapped[int] = mapped_column(Integer, default=1)
|
||||
success_count: Mapped[int] = mapped_column(Integer, default=1)
|
||||
total_tokens: Mapped[int] = mapped_column(Integer, default=0)
|
||||
total_cost_usd: Mapped[Decimal] = mapped_column(Numeric(12, 6), default=Decimal("0"))
|
||||
recorded_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, index=True)
|
||||
@@ -1,216 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
ModelStatus = Literal["DRAFT", "ENABLED", "DISABLED", "DEPRECATED"]
|
||||
ModelRouteType = Literal["GLOBAL", "CAPABILITY", "BUSINESS", "AGENT"]
|
||||
ModelHealthStatus = Literal["HEALTHY", "DEGRADED", "UNHEALTHY"]
|
||||
ModelTestStatus = Literal["PASSED", "FAILED"]
|
||||
|
||||
|
||||
class ModelUsageSummary(BaseModel):
|
||||
request_count: int = 0
|
||||
success_count: int = 0
|
||||
total_tokens: int = 0
|
||||
total_cost_usd: float = 0.0
|
||||
success_rate: float | None = None
|
||||
|
||||
|
||||
class ModelTestSummary(BaseModel):
|
||||
total_runs: int = 0
|
||||
passed_runs: int = 0
|
||||
failed_runs: int = 0
|
||||
pass_rate: float | None = None
|
||||
|
||||
|
||||
class ModelRegistryPublic(BaseModel):
|
||||
id: int
|
||||
code: str
|
||||
name: str
|
||||
provider: str
|
||||
provider_model: str
|
||||
status: ModelStatus
|
||||
capabilities: list[str] = Field(default_factory=list)
|
||||
description: str = ""
|
||||
base_url: str | None = None
|
||||
active_key_masked: str | None = None
|
||||
active_key_version: int | None = None
|
||||
active_key_fingerprint: str | None = None
|
||||
active_key_rotated_at: datetime | None = None
|
||||
latest_health_status: ModelHealthStatus | None = None
|
||||
latest_health_reason: str | None = None
|
||||
latest_health_at: datetime | None = None
|
||||
route_bindings_count: int = 0
|
||||
usage_7d: ModelUsageSummary = Field(default_factory=ModelUsageSummary)
|
||||
tests_7d: ModelTestSummary = Field(default_factory=ModelTestSummary)
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class ModelListResponse(BaseModel):
|
||||
items: list[ModelRegistryPublic]
|
||||
total: int
|
||||
|
||||
|
||||
class ModelCreateRequest(BaseModel):
|
||||
code: str = Field(min_length=2, max_length=64, pattern=r"^[a-z0-9][a-z0-9._-]{1,63}$")
|
||||
name: str = Field(min_length=2, max_length=128)
|
||||
provider: str = Field(min_length=2, max_length=64)
|
||||
provider_model: str = Field(min_length=1, max_length=128)
|
||||
status: ModelStatus = "DRAFT"
|
||||
capabilities: list[str] = Field(default_factory=list)
|
||||
description: str = Field(default="", max_length=2000)
|
||||
base_url: str | None = Field(default=None, max_length=255)
|
||||
api_key: str | None = Field(default=None, min_length=8, max_length=1024)
|
||||
|
||||
|
||||
class ModelUpdateRequest(BaseModel):
|
||||
name: str | None = Field(default=None, min_length=2, max_length=128)
|
||||
provider: str | None = Field(default=None, min_length=2, max_length=64)
|
||||
provider_model: str | None = Field(default=None, min_length=1, max_length=128)
|
||||
capabilities: list[str] | None = None
|
||||
description: str | None = Field(default=None, max_length=2000)
|
||||
base_url: str | None = Field(default=None, max_length=255)
|
||||
|
||||
|
||||
class ModelTransitionRequest(BaseModel):
|
||||
status: ModelStatus
|
||||
note: str | None = Field(default=None, max_length=255)
|
||||
|
||||
|
||||
class ModelRouteRulePublic(BaseModel):
|
||||
id: int
|
||||
route_type: ModelRouteType
|
||||
route_key: str
|
||||
target_model_code: str
|
||||
priority: int
|
||||
enabled: bool
|
||||
note: str | None = None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class ModelRouteRuleListResponse(BaseModel):
|
||||
items: list[ModelRouteRulePublic]
|
||||
total: int
|
||||
|
||||
|
||||
class ModelRouteRuleCreateRequest(BaseModel):
|
||||
route_type: ModelRouteType
|
||||
route_key: str | None = Field(default=None, max_length=128)
|
||||
target_model_code: str = Field(min_length=2, max_length=64)
|
||||
priority: int = 100
|
||||
enabled: bool = True
|
||||
note: str | None = Field(default=None, max_length=255)
|
||||
|
||||
|
||||
class ModelRouteRuleUpdateRequest(BaseModel):
|
||||
route_type: ModelRouteType | None = None
|
||||
route_key: str | None = Field(default=None, max_length=128)
|
||||
target_model_code: str | None = Field(default=None, min_length=2, max_length=64)
|
||||
priority: int | None = None
|
||||
enabled: bool | None = None
|
||||
note: str | None = Field(default=None, max_length=255)
|
||||
|
||||
|
||||
class ModelApiKeyPublic(BaseModel):
|
||||
id: int
|
||||
model_id: int
|
||||
version: int
|
||||
secret_masked: str
|
||||
secret_fingerprint: str
|
||||
is_active: bool
|
||||
rotation_note: str | None = None
|
||||
created_by_user_id: str | None = None
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class ModelApiKeyListResponse(BaseModel):
|
||||
items: list[ModelApiKeyPublic]
|
||||
total: int
|
||||
|
||||
|
||||
class ModelRotateKeyRequest(BaseModel):
|
||||
api_key: str = Field(min_length=8, max_length=1024)
|
||||
note: str | None = Field(default=None, max_length=255)
|
||||
|
||||
|
||||
class ModelHealthCheckPublic(BaseModel):
|
||||
id: int
|
||||
model_id: int
|
||||
status: ModelHealthStatus
|
||||
reason: str
|
||||
latency_ms: int | None = None
|
||||
detail_json: dict | None = None
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class ModelHealthCheckListResponse(BaseModel):
|
||||
items: list[ModelHealthCheckPublic]
|
||||
total: int
|
||||
|
||||
|
||||
class ModelTestRunRequest(BaseModel):
|
||||
kind: str = Field(default="SMOKE", min_length=2, max_length=32)
|
||||
input_tokens: int = Field(default=0, ge=0)
|
||||
output_tokens: int = Field(default=0, ge=0)
|
||||
|
||||
|
||||
class ModelTestChatRequest(BaseModel):
|
||||
message: str = Field(min_length=1, max_length=8000)
|
||||
system_prompt: str | None = Field(default=None, max_length=4000)
|
||||
|
||||
|
||||
class ModelTestRunPublic(BaseModel):
|
||||
id: int
|
||||
model_id: int
|
||||
model_code: str
|
||||
kind: str
|
||||
status: ModelTestStatus
|
||||
input_tokens: int
|
||||
output_tokens: int
|
||||
latency_ms: int | None = None
|
||||
error_message: str | None = None
|
||||
created_by_user_id: str | None = None
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class ModelTestChatResponse(BaseModel):
|
||||
model_id: int
|
||||
model_code: str
|
||||
provider: str
|
||||
provider_model: str
|
||||
reply: str | None = None
|
||||
latency_ms: int | None = None
|
||||
prompt_tokens: int | None = None
|
||||
completion_tokens: int | None = None
|
||||
total_tokens: int | None = None
|
||||
test_status: ModelTestStatus
|
||||
error_message: str | None = None
|
||||
|
||||
|
||||
class ModelTestRunListResponse(BaseModel):
|
||||
items: list[ModelTestRunPublic]
|
||||
total: int
|
||||
|
||||
|
||||
class ModelUsageIngestRequest(BaseModel):
|
||||
model_code: str = Field(min_length=2, max_length=64)
|
||||
source: str = Field(default="RUNTIME", min_length=2, max_length=32)
|
||||
request_count: int = Field(default=1, ge=1)
|
||||
success_count: int = Field(default=1, ge=0)
|
||||
total_tokens: int = Field(default=0, ge=0)
|
||||
total_cost_usd: float = Field(default=0.0, ge=0)
|
||||
|
||||
|
||||
class ModelSummaryResponse(BaseModel):
|
||||
total_models: int
|
||||
status_counts: dict[str, int]
|
||||
total_route_rules: int
|
||||
route_type_counts: dict[str, int]
|
||||
enabled_without_healthy_check: int
|
||||
usage_7d: ModelUsageSummary
|
||||
tests_7d: ModelTestSummary
|
||||
@@ -44,9 +44,7 @@ REMOVED_MENU_CODES = {
|
||||
"admin.schedule",
|
||||
"admin.mindmap",
|
||||
"admin.mermaid_mgr",
|
||||
"admin.chat",
|
||||
"admin.api_tester",
|
||||
"admin.models",
|
||||
"admin.orchestration",
|
||||
"admin.mdresolve",
|
||||
"admin.data_query",
|
||||
|
||||
@@ -42,9 +42,7 @@ REMOVED_MENU_CODES = {
|
||||
"admin.schedule",
|
||||
"admin.mindmap",
|
||||
"admin.mermaid_mgr",
|
||||
"admin.chat",
|
||||
"admin.api_tester",
|
||||
"admin.models",
|
||||
"admin.orchestration",
|
||||
"admin.mdresolve",
|
||||
"admin.data_query",
|
||||
|
||||
@@ -61,9 +61,7 @@ DISABLED_MENU_CODES: set[str] = {
|
||||
"admin.schedule",
|
||||
"admin.mindmap",
|
||||
"admin.mermaid_mgr",
|
||||
"admin.chat",
|
||||
"admin.api_tester",
|
||||
"admin.models",
|
||||
"admin.orchestration",
|
||||
"admin.mdresolve",
|
||||
"admin.data_query",
|
||||
|
||||
@@ -1,252 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
import httpx
|
||||
from fastapi import HTTPException, status
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..core.config import get_settings
|
||||
from ..models.model_registry import ModelApiKey, ModelRegistry, ModelRouteRule
|
||||
|
||||
settings = get_settings()
|
||||
CHAT_CAPABILITY_ROUTE_KEY = "chat.default"
|
||||
GLOBAL_ROUTE_KEY = "__global__"
|
||||
DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1"
|
||||
|
||||
|
||||
@dataclass
|
||||
class LlmCompletionResult:
|
||||
content: str
|
||||
model_code: str
|
||||
provider: str
|
||||
provider_model: str
|
||||
prompt_tokens: int | None
|
||||
completion_tokens: int | None
|
||||
total_tokens: int | None
|
||||
latency_ms: int
|
||||
|
||||
|
||||
def create_assistant_reply(
|
||||
db: Session,
|
||||
*,
|
||||
user_message: str,
|
||||
context_messages: list[tuple[str, str]],
|
||||
system_prompt: str,
|
||||
) -> LlmCompletionResult:
|
||||
model = _resolve_chat_model(db)
|
||||
return create_reply_with_model(
|
||||
model=model,
|
||||
user_message=user_message,
|
||||
context_messages=context_messages,
|
||||
system_prompt=system_prompt,
|
||||
)
|
||||
|
||||
|
||||
def create_reply_with_model(
|
||||
*,
|
||||
model: ModelRegistry,
|
||||
user_message: str,
|
||||
context_messages: list[tuple[str, str]],
|
||||
system_prompt: str,
|
||||
) -> LlmCompletionResult:
|
||||
provider_key = _resolve_provider_key(model.provider)
|
||||
endpoint = _build_endpoint(model.base_url)
|
||||
payload = {
|
||||
"model": model.provider_model,
|
||||
"messages": _build_messages(
|
||||
system_prompt=system_prompt,
|
||||
context_messages=context_messages,
|
||||
user_message=user_message,
|
||||
),
|
||||
}
|
||||
|
||||
started = time.perf_counter()
|
||||
try:
|
||||
with httpx.Client(timeout=settings.llm_request_timeout_seconds) as client:
|
||||
response = client.post(
|
||||
endpoint,
|
||||
headers={
|
||||
"Authorization": f"Bearer {provider_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=payload,
|
||||
)
|
||||
except httpx.TimeoutException as exc:
|
||||
raise HTTPException(status_code=status.HTTP_504_GATEWAY_TIMEOUT, detail="LLM request timeout") from exc
|
||||
except httpx.HTTPError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=f"LLM request failed: {exc.__class__.__name__}") from exc
|
||||
|
||||
latency_ms = int((time.perf_counter() - started) * 1000)
|
||||
if response.status_code >= 400:
|
||||
detail = _extract_http_error_detail(response)
|
||||
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=f"LLM response error: {detail}")
|
||||
|
||||
body = response.json()
|
||||
content = _extract_content(body)
|
||||
if not content:
|
||||
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="LLM returned empty content")
|
||||
|
||||
usage = body.get("usage") if isinstance(body, dict) else None
|
||||
prompt_tokens = _to_int(usage.get("prompt_tokens")) if isinstance(usage, dict) else None
|
||||
completion_tokens = _to_int(usage.get("completion_tokens")) if isinstance(usage, dict) else None
|
||||
total_tokens = _to_int(usage.get("total_tokens")) if isinstance(usage, dict) else None
|
||||
|
||||
return LlmCompletionResult(
|
||||
content=content,
|
||||
model_code=model.code,
|
||||
provider=model.provider,
|
||||
provider_model=model.provider_model,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
latency_ms=latency_ms,
|
||||
)
|
||||
|
||||
|
||||
def _resolve_chat_model(db: Session) -> ModelRegistry:
|
||||
capability_model = _resolve_model_from_route(db, route_type="CAPABILITY", route_key=CHAT_CAPABILITY_ROUTE_KEY)
|
||||
if capability_model:
|
||||
return capability_model
|
||||
|
||||
global_model = _resolve_model_from_route(db, route_type="GLOBAL", route_key=GLOBAL_ROUTE_KEY)
|
||||
if global_model:
|
||||
return global_model
|
||||
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="No enabled model route for chat (CAPABILITY:chat.default or GLOBAL)",
|
||||
)
|
||||
|
||||
|
||||
def _resolve_model_from_route(
|
||||
db: Session,
|
||||
*,
|
||||
route_type: str,
|
||||
route_key: str,
|
||||
) -> ModelRegistry | None:
|
||||
rows = db.execute(
|
||||
select(ModelRouteRule, ModelRegistry)
|
||||
.join(ModelRegistry, ModelRouteRule.target_model_code == ModelRegistry.code)
|
||||
.where(
|
||||
ModelRouteRule.route_type == route_type,
|
||||
ModelRouteRule.route_key == route_key,
|
||||
ModelRouteRule.enabled.is_(True),
|
||||
ModelRegistry.status == "ENABLED",
|
||||
)
|
||||
.order_by(ModelRouteRule.priority.asc(), ModelRouteRule.id.asc())
|
||||
).all()
|
||||
if not rows:
|
||||
return None
|
||||
|
||||
for _, model in rows:
|
||||
active_key_exists = db.scalar(
|
||||
select(ModelApiKey.id).where(
|
||||
ModelApiKey.model_id == model.id,
|
||||
ModelApiKey.is_active.is_(True),
|
||||
)
|
||||
)
|
||||
if active_key_exists is not None:
|
||||
return model
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_provider_key(provider: str) -> str:
|
||||
key = settings.llm_provider_key_map.get(provider.strip().lower())
|
||||
if key:
|
||||
return key
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Missing provider key for {provider}. Configure LLM_PROVIDER_API_KEYS.",
|
||||
)
|
||||
|
||||
|
||||
def _build_messages(
|
||||
*,
|
||||
system_prompt: str,
|
||||
context_messages: list[tuple[str, str]],
|
||||
user_message: str,
|
||||
) -> list[dict[str, str]]:
|
||||
messages: list[dict[str, str]] = []
|
||||
normalized_system_prompt = system_prompt.strip()
|
||||
if normalized_system_prompt:
|
||||
messages.append({"role": "system", "content": normalized_system_prompt})
|
||||
|
||||
for role, content in context_messages:
|
||||
if role not in {"user", "assistant"}:
|
||||
continue
|
||||
normalized_content = content.strip()
|
||||
if not normalized_content:
|
||||
continue
|
||||
messages.append({"role": role, "content": normalized_content})
|
||||
|
||||
messages.append({"role": "user", "content": user_message.strip()})
|
||||
return messages
|
||||
|
||||
|
||||
def _build_endpoint(base_url: str | None) -> str:
|
||||
normalized = (base_url or "").strip().rstrip("/")
|
||||
if not normalized:
|
||||
return f"{DEFAULT_OPENAI_BASE_URL}/chat/completions"
|
||||
if normalized.endswith("/chat/completions"):
|
||||
return normalized
|
||||
return f"{normalized}/chat/completions"
|
||||
|
||||
|
||||
def _extract_content(body: object) -> str:
|
||||
if not isinstance(body, dict):
|
||||
return ""
|
||||
|
||||
choices = body.get("choices")
|
||||
if not isinstance(choices, list) or not choices:
|
||||
return ""
|
||||
first = choices[0]
|
||||
if not isinstance(first, dict):
|
||||
return ""
|
||||
message = first.get("message")
|
||||
if not isinstance(message, dict):
|
||||
return ""
|
||||
|
||||
content = message.get("content")
|
||||
if isinstance(content, str):
|
||||
return content.strip()
|
||||
if isinstance(content, list):
|
||||
texts: list[str] = []
|
||||
for item in content:
|
||||
if isinstance(item, dict):
|
||||
text = item.get("text")
|
||||
if isinstance(text, str) and text.strip():
|
||||
texts.append(text.strip())
|
||||
return "\n".join(texts).strip()
|
||||
return ""
|
||||
|
||||
|
||||
def _extract_http_error_detail(response: httpx.Response) -> str:
|
||||
try:
|
||||
payload = response.json()
|
||||
except json.JSONDecodeError:
|
||||
return f"HTTP {response.status_code}"
|
||||
if isinstance(payload, dict):
|
||||
detail = payload.get("error")
|
||||
if isinstance(detail, dict):
|
||||
message = detail.get("message")
|
||||
if isinstance(message, str) and message.strip():
|
||||
return message.strip()
|
||||
message = payload.get("message")
|
||||
if isinstance(message, str) and message.strip():
|
||||
return message.strip()
|
||||
detail_field = payload.get("detail")
|
||||
if isinstance(detail_field, str) and detail_field.strip():
|
||||
return detail_field.strip()
|
||||
return f"HTTP {response.status_code}"
|
||||
|
||||
|
||||
def _to_int(value: object) -> int | None:
|
||||
if isinstance(value, int):
|
||||
return value
|
||||
if isinstance(value, str) and value.isdigit():
|
||||
return int(value)
|
||||
return None
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,54 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
import os
|
||||
import unittest
|
||||
|
||||
os.environ.setdefault("DATABASE_URL", "sqlite+pysqlite:///:memory:")
|
||||
os.environ.setdefault("MINIO_ENABLED", "false")
|
||||
|
||||
from api.app import models # noqa: F401
|
||||
from api.app.core.config import Settings
|
||||
from api.app.core.database import Base
|
||||
from api.app.services import admin_service, legacy_admin_rbac_service, legacy_authz_service
|
||||
|
||||
|
||||
class LegacyLlmCleanupTest(unittest.TestCase):
|
||||
def test_llm_registry_tables_removed_from_metadata(self) -> None:
|
||||
removed_tables = {
|
||||
"llm_models",
|
||||
"model_route_rules",
|
||||
"model_api_keys",
|
||||
"model_health_checks",
|
||||
"model_test_runs",
|
||||
"model_usage_logs",
|
||||
}
|
||||
|
||||
self.assertNotIn("model_registry", models.__all__)
|
||||
self.assertTrue(removed_tables.isdisjoint(Base.metadata.tables))
|
||||
|
||||
def test_llm_config_fields_removed(self) -> None:
|
||||
removed_fields = {
|
||||
"llm_provider_api_keys",
|
||||
"llm_request_timeout_seconds",
|
||||
"chat_context_message_limit",
|
||||
"chat_default_system_prompt",
|
||||
}
|
||||
|
||||
self.assertTrue(removed_fields.isdisjoint(Settings.model_fields))
|
||||
|
||||
def test_legacy_menu_filters_no_longer_reference_chat_and_models(self) -> None:
|
||||
for codes in (
|
||||
admin_service.REMOVED_MENU_CODES,
|
||||
legacy_admin_rbac_service.REMOVED_MENU_CODES,
|
||||
legacy_authz_service.DISABLED_MENU_CODES,
|
||||
):
|
||||
self.assertNotIn("admin.chat", codes)
|
||||
self.assertNotIn("admin.models", codes)
|
||||
|
||||
def test_calendar_service_module_removed(self) -> None:
|
||||
self.assertIsNone(importlib.util.find_spec("api.app.services.calendar_event_service"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user