feat: add scheduler and flower monitoring with worker registry
This commit is contained in:
@@ -35,6 +35,16 @@ CELERY_TIMEZONE=Asia/Shanghai
|
||||
CELERY_LOG_LEVEL=INFO
|
||||
CELERY_WORKER_CONCURRENCY=2
|
||||
SCHEDULER_EXPIRE_INTERVAL_SECONDS=60
|
||||
SCHEDULER_API_BASE_URL=http://scheduler:19100
|
||||
SCHEDULER_API_TOKEN=
|
||||
SCHEDULER_DEFAULT_QUEUE=default
|
||||
FLOWER_API_BASE_URL=http://flower:5555
|
||||
FLOWER_API_TIMEOUT_SECONDS=10
|
||||
FLOWER_BASIC_AUTH=admin:admin
|
||||
WORKER_REGISTRY_TTL_SECONDS=90
|
||||
CELERY_WORKER_QUEUES=default,celery
|
||||
SCHEDULER_PORT=19100
|
||||
FLOWER_PORT=5555
|
||||
WINE_BINARY_PATH=wine
|
||||
WINE_ALLOWED_ROOT=./data/wine
|
||||
WINE_DEFAULT_TIMEOUT_SECONDS=300
|
||||
|
||||
@@ -125,6 +125,32 @@
|
||||
- 运行时真实 Provider Key 不从数据库反解,统一从环境变量 `LLM_PROVIDER_API_KEYS` 注入(支持 `openai=sk-...` 或 JSON 字典字符串)。
|
||||
- 一期模型调用采用非流式 OpenAI-compatible `POST /chat/completions`,后续如需流式再扩展 SSE/WS。
|
||||
|
||||
## Celery 监控口径(2026-05-01)
|
||||
|
||||
- Celery 监控采用“双页面”:
|
||||
- `Worker监控`:`/admin/workers`(聚焦 worker 在线状态、并发、队列、单 worker 任务快照)
|
||||
- `任务监控`:`/admin/task-monitor`(聚焦全局任务状态分布、队列积压、任务明细)
|
||||
- Worker 监控 API 入口:
|
||||
- `GET /api/v1/admin/workers/overview`
|
||||
- `GET /api/v1/admin/workers/tasks?worker=...&recent_limit=...`
|
||||
- 两页统一复用权限码:`celery.read` / `celery.manage`。
|
||||
|
||||
## 调度与监控口径(2026-05-01)
|
||||
|
||||
- 调度能力拆分为独立 `scheduler` 服务:
|
||||
- scheduler 入口:`/api/v1/scheduler/*`
|
||||
- 任务入队:`POST /api/v1/scheduler/v1/tasks/enqueue`
|
||||
- 任务撤销:`POST /api/v1/scheduler/v1/tasks/revoke`
|
||||
- 通过 `x-scheduler-token`(`SCHEDULER_API_TOKEN`)做可选鉴权。
|
||||
- 监控能力统一走 Flower 代理:
|
||||
- 后端代理入口:
|
||||
- `GET /api/v1/admin/flower/workers`
|
||||
- `GET /api/v1/admin/flower/worker-tasks`
|
||||
- 前端 `Worker监控` 与 `任务监控` 页面都通过后端代理读取 Flower 数据,不直接连 Flower。
|
||||
- Worker 自动注册机制:
|
||||
- Celery worker 启停/心跳通过 signals 写入 `worker_registry` 表。
|
||||
- Beat 定时任务 `app.tasks.worker_registry_tasks.sweep_worker_registry_offline` 负责离线兜底标记。
|
||||
|
||||
## 前端 Radix 全量化口径(2026-04-18)
|
||||
|
||||
- `web/src/app/**` 已完成“去语义类 + 组件全量 Radix 化”:不再依赖 `surface-card` / `btn-*` / `control` / `table-*` / `notice*` / `text-muted` 等自定义语义类。
|
||||
|
||||
@@ -5,9 +5,11 @@ from .v1.admin_files import router as admin_files_router
|
||||
from .v1.atp_models import router as atp_models_router
|
||||
from .v1.auth import router as auth_router
|
||||
from .v1.elevation import router as elevation_router
|
||||
from .v1.flower_monitor import router as flower_monitor_router
|
||||
from .v1.lightning import router as lightning_router
|
||||
from .v1.lines import router as lines_router
|
||||
from .v1.question_bank import router as question_bank_router
|
||||
from .v1.scheduler import router as scheduler_router
|
||||
from .v1.system_params import router as system_params_router
|
||||
from .v1.task_monitor import router as task_monitor_router
|
||||
from .v1.users import router as users_router
|
||||
@@ -23,12 +25,15 @@ v1_router.include_router(atp_models_router)
|
||||
v1_router.include_router(task_monitor_router)
|
||||
v1_router.include_router(system_params_router)
|
||||
v1_router.include_router(elevation_router)
|
||||
v1_router.include_router(flower_monitor_router)
|
||||
v1_router.include_router(lightning_router)
|
||||
v1_router.include_router(lines_router)
|
||||
v1_router.include_router(question_bank_router)
|
||||
v1_router.include_router(scheduler_router)
|
||||
v1_router.include_router(wine_router)
|
||||
v1_router.include_router(ws_router)
|
||||
|
||||
|
||||
@v1_router.get("/ping")
|
||||
def ping() -> dict[str, str]:
|
||||
return {"message": "pong"}
|
||||
|
||||
@@ -111,7 +111,13 @@ def get_elevation_job_detail(
|
||||
@router.post("/jobs/apply-line", response_model=ElevationApplyJobCreateResponse)
|
||||
def create_elevation_apply_line_job(
|
||||
payload: ElevationApplyJobCreateRequest,
|
||||
dispatch_mode: str = Query(default="celery_direct", alias="dispatchMode"),
|
||||
current_user: CurrentUser = Depends(require_permission("elevation.manage")),
|
||||
db: Session = Depends(get_db),
|
||||
) -> ElevationApplyJobCreateResponse:
|
||||
return create_apply_job(db, payload, actor=current_user.user)
|
||||
return create_apply_job(
|
||||
db,
|
||||
payload,
|
||||
actor=current_user.user,
|
||||
dispatch_mode=dispatch_mode,
|
||||
)
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, Query
|
||||
|
||||
from ...core.dependencies import CurrentUser, require_any_permission
|
||||
from ...schemas.flower_monitor import (
|
||||
FlowerWorkerTaskOverviewResponse,
|
||||
FlowerWorkersOverviewResponse,
|
||||
)
|
||||
from ...services.flower_monitor_service import (
|
||||
build_worker_task_overview,
|
||||
build_workers_overview,
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/admin/flower", tags=["admin-flower"])
|
||||
|
||||
|
||||
@router.get("/workers", response_model=FlowerWorkersOverviewResponse)
|
||||
def get_flower_workers_overview(
|
||||
force_refresh: bool = Query(default=False, alias="forceRefresh"),
|
||||
_: CurrentUser = Depends(require_any_permission("celery.read", "celery.manage")),
|
||||
) -> FlowerWorkersOverviewResponse:
|
||||
return build_workers_overview(force_refresh=force_refresh)
|
||||
|
||||
|
||||
@router.get("/worker-tasks", response_model=FlowerWorkerTaskOverviewResponse)
|
||||
def get_flower_worker_tasks(
|
||||
worker: str = Query(min_length=1, max_length=255),
|
||||
force_refresh: bool = Query(default=False, alias="forceRefresh"),
|
||||
recent_limit: int = Query(default=100, alias="recentLimit", ge=1, le=200),
|
||||
_: CurrentUser = Depends(require_any_permission("celery.read", "celery.manage")),
|
||||
) -> FlowerWorkerTaskOverviewResponse:
|
||||
return build_worker_task_overview(
|
||||
worker=worker,
|
||||
force_refresh=force_refresh,
|
||||
recent_limit=recent_limit,
|
||||
)
|
||||
@@ -0,0 +1,51 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, Header, HTTPException, status
|
||||
|
||||
from ...core.config import get_settings
|
||||
from ...schemas.scheduler import (
|
||||
SchedulerEnqueueTaskRequest,
|
||||
SchedulerEnqueueTaskResponse,
|
||||
SchedulerRevokeTaskRequest,
|
||||
SchedulerRevokeTaskResponse,
|
||||
)
|
||||
from ...services.scheduler_service import enqueue_task, revoke_task
|
||||
|
||||
router = APIRouter(prefix="/scheduler", tags=["scheduler"])
|
||||
|
||||
|
||||
def _require_scheduler_token(
|
||||
x_scheduler_token: str | None = Header(default=None, alias="x-scheduler-token"),
|
||||
) -> None:
|
||||
expected = get_settings().resolved_scheduler_api_token
|
||||
if not expected:
|
||||
return
|
||||
provided = (x_scheduler_token or "").strip()
|
||||
if provided == expected:
|
||||
return
|
||||
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Unauthorized scheduler request")
|
||||
|
||||
|
||||
@router.get("/healthz")
|
||||
def scheduler_healthz() -> dict[str, object]:
|
||||
return {
|
||||
"success": True,
|
||||
"status": "ok",
|
||||
"service": "fquiz-scheduler",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/v1/tasks/enqueue", response_model=SchedulerEnqueueTaskResponse)
|
||||
def enqueue_scheduler_task(
|
||||
payload: SchedulerEnqueueTaskRequest,
|
||||
_: None = Depends(_require_scheduler_token),
|
||||
) -> SchedulerEnqueueTaskResponse:
|
||||
return enqueue_task(payload)
|
||||
|
||||
|
||||
@router.post("/v1/tasks/revoke", response_model=SchedulerRevokeTaskResponse)
|
||||
def revoke_scheduler_task(
|
||||
payload: SchedulerRevokeTaskRequest,
|
||||
_: None = Depends(_require_scheduler_token),
|
||||
) -> SchedulerRevokeTaskResponse:
|
||||
return revoke_task(payload)
|
||||
@@ -10,7 +10,11 @@ celery_app = Celery(
|
||||
"fquiz",
|
||||
broker=settings.resolved_celery_broker_url,
|
||||
backend=settings.resolved_celery_result_backend,
|
||||
include=["app.tasks.schedule_tasks", "app.tasks.elevation_tasks"],
|
||||
include=[
|
||||
"app.tasks.schedule_tasks",
|
||||
"app.tasks.elevation_tasks",
|
||||
"app.tasks.worker_registry_tasks",
|
||||
],
|
||||
)
|
||||
|
||||
celery_app.conf.update(
|
||||
@@ -20,6 +24,10 @@ celery_app.conf.update(
|
||||
"task": "app.tasks.schedule_tasks.expire_overdue_schedule_items",
|
||||
"schedule": settings.scheduler_expire_interval_seconds,
|
||||
},
|
||||
"sweep-worker-registry-offline": {
|
||||
"task": "app.tasks.worker_registry_tasks.sweep_worker_registry_offline",
|
||||
"schedule": 30.0,
|
||||
},
|
||||
},
|
||||
enable_utc=True,
|
||||
result_serializer="json",
|
||||
@@ -29,3 +37,6 @@ celery_app.conf.update(
|
||||
timezone=settings.celery_timezone,
|
||||
worker_prefetch_multiplier=1,
|
||||
)
|
||||
|
||||
# Register worker lifecycle signals for auto-registration/heartbeat/offline states.
|
||||
from . import worker_signals as _worker_signals # noqa: F401,E402
|
||||
|
||||
@@ -52,6 +52,13 @@ class Settings(BaseSettings):
|
||||
celery_result_backend: str | None = None
|
||||
celery_timezone: str = "Asia/Shanghai"
|
||||
scheduler_expire_interval_seconds: int = 60
|
||||
scheduler_api_token: str = ""
|
||||
scheduler_default_queue: str = "default"
|
||||
scheduler_api_base_url: str = "http://scheduler:19100"
|
||||
flower_api_base_url: str = "http://flower:5555"
|
||||
flower_api_timeout_seconds: int = 10
|
||||
flower_basic_auth: str = ""
|
||||
worker_registry_ttl_seconds: int = 90
|
||||
|
||||
wine_binary_path: str = "wine"
|
||||
wine_allowed_root: str = "./data/wine"
|
||||
@@ -83,6 +90,8 @@ class Settings(BaseSettings):
|
||||
"chat_context_message_limit",
|
||||
"db_port",
|
||||
"scheduler_expire_interval_seconds",
|
||||
"flower_api_timeout_seconds",
|
||||
"worker_registry_ttl_seconds",
|
||||
"wine_default_timeout_seconds",
|
||||
"wine_max_timeout_seconds",
|
||||
"atp_engine_default_timeout_seconds",
|
||||
@@ -192,6 +201,33 @@ class Settings(BaseSettings):
|
||||
def resolved_celery_result_backend(self) -> str:
|
||||
return (self.celery_result_backend or "redis://redis:6379/1").strip()
|
||||
|
||||
@property
|
||||
def resolved_scheduler_api_token(self) -> str:
|
||||
return (self.scheduler_api_token or "").strip()
|
||||
|
||||
@property
|
||||
def resolved_scheduler_default_queue(self) -> str:
|
||||
queue = (self.scheduler_default_queue or "").strip()
|
||||
return queue or "default"
|
||||
|
||||
@property
|
||||
def resolved_scheduler_api_base_url(self) -> str:
|
||||
value = (self.scheduler_api_base_url or "").strip().rstrip("/")
|
||||
if not value:
|
||||
return "http://scheduler:19100"
|
||||
return value
|
||||
|
||||
@property
|
||||
def resolved_flower_api_base_url(self) -> str:
|
||||
value = (self.flower_api_base_url or "").strip().rstrip("/")
|
||||
if not value:
|
||||
return "http://flower:5555"
|
||||
return value
|
||||
|
||||
@property
|
||||
def resolved_flower_basic_auth(self) -> str:
|
||||
return (self.flower_basic_auth or "").strip()
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_settings() -> Settings:
|
||||
|
||||
@@ -215,6 +215,7 @@ def init_db() -> None:
|
||||
system_param,
|
||||
todo,
|
||||
user,
|
||||
worker_registry,
|
||||
) # noqa: F401
|
||||
from ..services.seed_service import seed_defaults
|
||||
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from celery import signals
|
||||
|
||||
from ..services.worker_registry_service import mark_worker_offline, register_worker
|
||||
|
||||
|
||||
def _normalize_worker_name(sender: object | None) -> str:
|
||||
if sender is None:
|
||||
return ""
|
||||
text = str(sender).strip()
|
||||
return text
|
||||
|
||||
|
||||
def _extract_queue_names(sender: object | None) -> list[str]:
|
||||
consumer = getattr(sender, "consumer", None)
|
||||
if consumer is None:
|
||||
return []
|
||||
queues = getattr(consumer, "queues", None)
|
||||
if queues is None:
|
||||
return []
|
||||
names: list[str] = []
|
||||
try:
|
||||
iterator = list(queues)
|
||||
except TypeError:
|
||||
iterator = []
|
||||
for item in iterator:
|
||||
name = getattr(item, "name", None)
|
||||
if name is None:
|
||||
continue
|
||||
value = str(name).strip()
|
||||
if value:
|
||||
names.append(value)
|
||||
return names
|
||||
|
||||
|
||||
@signals.worker_ready.connect
|
||||
def on_worker_ready(sender=None, **kwargs): # type: ignore[no-untyped-def]
|
||||
worker_name = _normalize_worker_name(sender)
|
||||
if not worker_name:
|
||||
return
|
||||
queues = _extract_queue_names(sender)
|
||||
register_worker(
|
||||
worker_name=worker_name,
|
||||
status="online",
|
||||
queues=queues,
|
||||
heartbeat_increment=False,
|
||||
metadata={"event": "worker_ready"},
|
||||
)
|
||||
|
||||
|
||||
@signals.heartbeat_sent.connect
|
||||
def on_worker_heartbeat(sender=None, **kwargs): # type: ignore[no-untyped-def]
|
||||
worker_name = _normalize_worker_name(sender)
|
||||
if not worker_name:
|
||||
return
|
||||
register_worker(
|
||||
worker_name=worker_name,
|
||||
status="online",
|
||||
heartbeat_increment=True,
|
||||
metadata={"event": "heartbeat_sent"},
|
||||
)
|
||||
|
||||
|
||||
@signals.worker_shutdown.connect
|
||||
def on_worker_shutdown(sender=None, **kwargs): # type: ignore[no-untyped-def]
|
||||
worker_name = _normalize_worker_name(sender)
|
||||
if not worker_name:
|
||||
return
|
||||
mark_worker_offline(worker_name)
|
||||
@@ -4,7 +4,7 @@ Import all model modules during package initialization so SQLAlchemy can
|
||||
resolve string-based relationships regardless of route/service import order.
|
||||
"""
|
||||
|
||||
from . import atp_model, audit_log, auth_session, calendar_event, elevation, file_storage, hot_search, lightning_event, lightning_sample, line, line_tower, menu, model_registry, object_group, question_bank, rbac, requirement, system_param, todo, user
|
||||
from . import atp_model, audit_log, auth_session, calendar_event, elevation, file_storage, hot_search, lightning_event, lightning_sample, line, line_tower, menu, model_registry, object_group, question_bank, rbac, requirement, system_param, todo, user, worker_registry
|
||||
|
||||
__all__ = [
|
||||
"atp_model",
|
||||
@@ -27,4 +27,5 @@ __all__ = [
|
||||
"system_param",
|
||||
"todo",
|
||||
"user",
|
||||
"worker_registry",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from uuid import uuid4
|
||||
|
||||
from sqlalchemy import DateTime, Index, Integer, String
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from ..core.database import Base
|
||||
from .base import utcnow
|
||||
|
||||
|
||||
class WorkerRegistry(Base):
|
||||
__tablename__ = "worker_registry"
|
||||
__table_args__ = (
|
||||
Index("idx_worker_registry_worker", "worker_name"),
|
||||
Index("idx_worker_registry_status", "status"),
|
||||
Index("idx_worker_registry_last_seen", "last_seen_at"),
|
||||
)
|
||||
|
||||
id: Mapped[str] = mapped_column(
|
||||
String(32),
|
||||
primary_key=True,
|
||||
default=lambda: uuid4().hex,
|
||||
)
|
||||
worker_name: Mapped[str] = mapped_column(String(255), unique=True, nullable=False)
|
||||
status: Mapped[str] = mapped_column(String(32), default="online", index=True)
|
||||
queues_csv: Mapped[str | None] = mapped_column(String(2000))
|
||||
pid: Mapped[int | None] = mapped_column(Integer)
|
||||
heartbeat_count: Mapped[int] = mapped_column(Integer, default=0)
|
||||
first_seen_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
|
||||
last_seen_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, index=True)
|
||||
metadata_json: Mapped[str | None] = mapped_column(String(4000))
|
||||
create_date: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, index=True)
|
||||
create_user: Mapped[str | None] = mapped_column(String(64), index=True)
|
||||
update_date: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
default=utcnow,
|
||||
onupdate=utcnow,
|
||||
)
|
||||
update_user: Mapped[str | None] = mapped_column(String(64), index=True)
|
||||
@@ -0,0 +1,10 @@
|
||||
from fastapi import FastAPI
|
||||
|
||||
from .api.v1.scheduler import router as scheduler_router
|
||||
|
||||
app = FastAPI(
|
||||
title="fquiz-scheduler",
|
||||
version="0.1.0",
|
||||
)
|
||||
|
||||
app.include_router(scheduler_router, prefix="/api/v1")
|
||||
@@ -0,0 +1,66 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class FlowerWorkerItem(BaseModel):
|
||||
worker: str
|
||||
status: str
|
||||
queue_names: list[str] = Field(default_factory=list)
|
||||
registered_count: int = 0
|
||||
processed_count: int = 0
|
||||
concurrency: int = 0
|
||||
prefetch_count: int = 0
|
||||
active_count: int = 0
|
||||
reserved_count: int = 0
|
||||
scheduled_count: int = 0
|
||||
last_heartbeat_at: datetime | None = None
|
||||
|
||||
|
||||
class FlowerWorkersSummary(BaseModel):
|
||||
total: int = 0
|
||||
online: int = 0
|
||||
offline: int = 0
|
||||
|
||||
|
||||
class FlowerWorkersOverviewResponse(BaseModel):
|
||||
generated_at: datetime
|
||||
workers: list[FlowerWorkerItem] = Field(default_factory=list)
|
||||
summary: FlowerWorkersSummary = Field(default_factory=FlowerWorkersSummary)
|
||||
|
||||
|
||||
class FlowerTaskItem(BaseModel):
|
||||
task_id: str
|
||||
name: str
|
||||
state: str
|
||||
source: str
|
||||
worker: str
|
||||
queue_name: str | None = None
|
||||
args_text: str | None = None
|
||||
kwargs_text: str | None = None
|
||||
eta: datetime | None = None
|
||||
received_at: datetime | None = None
|
||||
started_at: datetime | None = None
|
||||
finished_at: datetime | None = None
|
||||
runtime_seconds: float | None = None
|
||||
result_text: str | None = None
|
||||
exception_text: str | None = None
|
||||
|
||||
|
||||
class FlowerWorkerTaskSummary(BaseModel):
|
||||
active: int = 0
|
||||
reserved: int = 0
|
||||
scheduled: int = 0
|
||||
recent: int = 0
|
||||
|
||||
|
||||
class FlowerWorkerTaskOverviewResponse(BaseModel):
|
||||
generated_at: datetime
|
||||
worker: str
|
||||
active_tasks: list[FlowerTaskItem] = Field(default_factory=list)
|
||||
reserved_tasks: list[FlowerTaskItem] = Field(default_factory=list)
|
||||
scheduled_tasks: list[FlowerTaskItem] = Field(default_factory=list)
|
||||
recent_tasks: list[FlowerTaskItem] = Field(default_factory=list)
|
||||
summary: FlowerWorkerTaskSummary = Field(default_factory=FlowerWorkerTaskSummary)
|
||||
@@ -0,0 +1,41 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class SchedulerEnqueueTaskRequest(BaseModel):
|
||||
task_name: str = Field(min_length=1, max_length=255, alias="taskName")
|
||||
queue_name: str | None = Field(default=None, max_length=255, alias="queueName")
|
||||
task_id: str | None = Field(default=None, max_length=255, alias="taskId")
|
||||
args: list[Any] = Field(default_factory=list)
|
||||
kwargs: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
|
||||
|
||||
class SchedulerEnqueueTaskResponse(BaseModel):
|
||||
queued: bool = True
|
||||
task_id: str = Field(alias="taskId")
|
||||
queue_name: str = Field(alias="queueName")
|
||||
task_name: str = Field(alias="taskName")
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
|
||||
|
||||
class SchedulerRevokeTaskRequest(BaseModel):
|
||||
task_id: str = Field(min_length=1, max_length=255, alias="taskId")
|
||||
terminate: bool = True
|
||||
signal: str = Field(default="SIGTERM", max_length=32)
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
|
||||
|
||||
class SchedulerRevokeTaskResponse(BaseModel):
|
||||
revoked: bool = True
|
||||
task_id: str = Field(alias="taskId")
|
||||
terminate: bool
|
||||
signal: str
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
@@ -403,7 +403,7 @@ def update_menu(db: Session, menu_id: int, payload: MenuUpdateRequest) -> MenuPu
|
||||
|
||||
def delete_menu(db: Session, menu_id: int) -> bool:
|
||||
menu = get_menu_by_id(db, menu_id)
|
||||
if not menu or menu.code in {"dashboard", "admin.users", "admin.roles", "admin.menus", "admin.system_params", "admin.power_lines", "admin.lightning_currents", "admin.lightning_distribution", "admin.task_monitor", "admin.atp_models", "admin.files", "admin.elevation", "admin.syslog", "admin.wine_runner"}:
|
||||
if not menu or menu.code in {"dashboard", "admin.users", "admin.roles", "admin.menus", "admin.system_params", "admin.power_lines", "admin.lightning_currents", "admin.lightning_distribution", "admin.workers", "admin.task_monitor", "admin.atp_models", "admin.files", "admin.elevation", "admin.syslog", "admin.wine_runner"}:
|
||||
return False
|
||||
child_exists = db.scalar(select(Menu.id).where(Menu.parent_id == menu_id))
|
||||
if child_exists is not None:
|
||||
|
||||
@@ -335,6 +335,7 @@ def create_apply_job(
|
||||
payload: ElevationApplyJobCreateRequest,
|
||||
*,
|
||||
actor: User,
|
||||
dispatch_mode: str = "celery_direct",
|
||||
) -> ElevationApplyJobCreateResponse:
|
||||
line = db.execute(select(Line).where(Line.id == payload.line_id)).scalar_one_or_none()
|
||||
if not line:
|
||||
@@ -379,9 +380,10 @@ def create_apply_job(
|
||||
if not saved:
|
||||
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="创建任务失败")
|
||||
|
||||
from ..tasks.elevation_tasks import apply_elevation_for_line_job
|
||||
|
||||
task = apply_elevation_for_line_job.delay(saved.id)
|
||||
task = _dispatch_elevation_apply_task(
|
||||
job_id=saved.id,
|
||||
dispatch_mode=dispatch_mode,
|
||||
)
|
||||
saved.task_id = task.id
|
||||
saved.update_user = actor.id
|
||||
saved.update_date = utcnow()
|
||||
@@ -398,6 +400,60 @@ def create_apply_job(
|
||||
return ElevationApplyJobCreateResponse(job=serialize_job(latest), queued=True)
|
||||
|
||||
|
||||
def _dispatch_elevation_apply_task(*, job_id: str, dispatch_mode: str):
|
||||
normalized_mode = (dispatch_mode or "").strip().lower()
|
||||
if normalized_mode == "scheduler_api":
|
||||
return _enqueue_via_scheduler_api(job_id)
|
||||
|
||||
from ..tasks.elevation_tasks import apply_elevation_for_line_job
|
||||
|
||||
return apply_elevation_for_line_job.delay(job_id)
|
||||
|
||||
|
||||
def _enqueue_via_scheduler_api(job_id: str):
|
||||
from ..core.config import get_settings
|
||||
|
||||
import httpx
|
||||
|
||||
settings = get_settings()
|
||||
scheduler_base_url = settings.resolved_scheduler_api_base_url
|
||||
path = "/api/v1/v1/tasks/enqueue"
|
||||
payload = {
|
||||
"taskName": "app.tasks.elevation_tasks.apply_elevation_for_line_job",
|
||||
"taskId": job_id,
|
||||
"queueName": settings.resolved_scheduler_default_queue,
|
||||
"args": [job_id],
|
||||
"kwargs": {},
|
||||
}
|
||||
headers = {"Content-Type": "application/json"}
|
||||
token = settings.resolved_scheduler_api_token
|
||||
if token:
|
||||
headers["x-scheduler-token"] = token
|
||||
try:
|
||||
with httpx.Client(timeout=15) as client:
|
||||
response = client.post(f"{scheduler_base_url}{path}", json=payload, headers=headers)
|
||||
if response.status_code >= 400:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_502_BAD_GATEWAY,
|
||||
detail=f"scheduler enqueue failed: {response.status_code} {response.text}",
|
||||
)
|
||||
data = response.json()
|
||||
task_id = str(data.get("taskId") or data.get("task_id") or job_id).strip() or job_id
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_502_BAD_GATEWAY,
|
||||
detail=f"scheduler request failed: {exc}",
|
||||
) from exc
|
||||
|
||||
class _TaskRef:
|
||||
def __init__(self, value: str) -> None:
|
||||
self.id = value
|
||||
|
||||
return _TaskRef(task_id)
|
||||
|
||||
|
||||
def execute_apply_job(job_id: str) -> None:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
|
||||
@@ -0,0 +1,334 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
import httpx
|
||||
from fastapi import HTTPException, status
|
||||
|
||||
from ..core.config import get_settings
|
||||
from ..models.base import utcnow
|
||||
from ..schemas.flower_monitor import (
|
||||
FlowerTaskItem,
|
||||
FlowerWorkerItem,
|
||||
FlowerWorkerTaskOverviewResponse,
|
||||
FlowerWorkerTaskSummary,
|
||||
FlowerWorkersOverviewResponse,
|
||||
FlowerWorkersSummary,
|
||||
)
|
||||
|
||||
|
||||
def build_workers_overview(*, force_refresh: bool) -> FlowerWorkersOverviewResponse:
|
||||
refresh = "true" if force_refresh else "false"
|
||||
workers_map = _call_flower_json(f"/api/workers?refresh={refresh}")
|
||||
status_map = _call_flower_json("/api/workers?status=true")
|
||||
if not isinstance(workers_map, dict):
|
||||
workers_map = {}
|
||||
if not isinstance(status_map, dict):
|
||||
status_map = {}
|
||||
|
||||
worker_names = sorted(set(workers_map.keys()) | set(status_map.keys()))
|
||||
workers: list[FlowerWorkerItem] = []
|
||||
for worker_name in worker_names:
|
||||
worker_raw = _as_record(workers_map.get(worker_name))
|
||||
is_online = bool(status_map.get(worker_name))
|
||||
workers.append(_normalize_worker(worker_name, worker_raw, is_online))
|
||||
|
||||
workers.sort(key=lambda item: (0 if item.status == "ONLINE" else 1, item.worker))
|
||||
summary = FlowerWorkersSummary(
|
||||
total=len(workers),
|
||||
online=sum(1 for item in workers if item.status == "ONLINE"),
|
||||
offline=sum(1 for item in workers if item.status != "ONLINE"),
|
||||
)
|
||||
return FlowerWorkersOverviewResponse(
|
||||
generated_at=utcnow(),
|
||||
workers=workers,
|
||||
summary=summary,
|
||||
)
|
||||
|
||||
|
||||
def build_worker_task_overview(
|
||||
*,
|
||||
worker: str,
|
||||
force_refresh: bool,
|
||||
recent_limit: int,
|
||||
) -> FlowerWorkerTaskOverviewResponse:
|
||||
normalized_worker = worker.strip()
|
||||
if not normalized_worker:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="worker is required")
|
||||
|
||||
refresh = "true" if force_refresh else "false"
|
||||
safe_recent_limit = max(1, min(200, int(recent_limit)))
|
||||
workers_map = _call_flower_json(
|
||||
f"/api/workers?refresh={refresh}&workername={quote_plus(normalized_worker)}"
|
||||
)
|
||||
tasks_map = _call_flower_json(
|
||||
f"/api/tasks?limit={safe_recent_limit}&workername={quote_plus(normalized_worker)}"
|
||||
)
|
||||
worker_raw = _pick_worker_raw(_as_record(workers_map), normalized_worker)
|
||||
active_tasks = _build_snapshot_tasks(normalized_worker, worker_raw, "ACTIVE")
|
||||
reserved_tasks = _build_snapshot_tasks(normalized_worker, worker_raw, "RESERVED")
|
||||
scheduled_tasks = _build_snapshot_tasks(normalized_worker, worker_raw, "SCHEDULED")
|
||||
recent_tasks = _build_recent_tasks(normalized_worker, _as_record(tasks_map))
|
||||
summary = FlowerWorkerTaskSummary(
|
||||
active=len(active_tasks),
|
||||
reserved=len(reserved_tasks),
|
||||
scheduled=len(scheduled_tasks),
|
||||
recent=len(recent_tasks),
|
||||
)
|
||||
return FlowerWorkerTaskOverviewResponse(
|
||||
generated_at=utcnow(),
|
||||
worker=normalized_worker,
|
||||
active_tasks=active_tasks,
|
||||
reserved_tasks=reserved_tasks,
|
||||
scheduled_tasks=scheduled_tasks,
|
||||
recent_tasks=recent_tasks,
|
||||
summary=summary,
|
||||
)
|
||||
|
||||
|
||||
def _call_flower_json(path: str) -> Any:
|
||||
settings = get_settings()
|
||||
url = f"{settings.resolved_flower_api_base_url}{path}"
|
||||
headers = {"Accept": "application/json"}
|
||||
basic_auth = settings.resolved_flower_basic_auth
|
||||
if basic_auth:
|
||||
token = base64.b64encode(basic_auth.encode("utf-8")).decode("ascii")
|
||||
headers["Authorization"] = f"Basic {token}"
|
||||
timeout = max(3, int(settings.flower_api_timeout_seconds))
|
||||
try:
|
||||
with httpx.Client(timeout=timeout) as client:
|
||||
response = client.get(url, headers=headers)
|
||||
except httpx.TimeoutException as exc:
|
||||
raise HTTPException(status_code=status.HTTP_504_GATEWAY_TIMEOUT, detail=f"flower request timeout: {path}") from exc
|
||||
except httpx.HTTPError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=f"flower request failed: {path}: {exc}") from exc
|
||||
if response.status_code >= 400:
|
||||
detail = (response.text or "").strip() or response.reason_phrase or "flower error"
|
||||
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=f"flower error {response.status_code}: {detail}")
|
||||
try:
|
||||
return response.json()
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=f"flower returned non-json payload: {path}")
|
||||
|
||||
|
||||
def _normalize_worker(worker_name: str, worker_raw: dict[str, Any], is_online: bool) -> FlowerWorkerItem:
|
||||
stats = _as_record(worker_raw.get("stats"))
|
||||
return FlowerWorkerItem(
|
||||
worker=worker_name,
|
||||
status="ONLINE" if is_online else "OFFLINE",
|
||||
queue_names=_parse_active_queue_names(worker_raw.get("active_queues")),
|
||||
registered_count=len(_as_array(worker_raw.get("registered"))),
|
||||
processed_count=_parse_processed_count(stats.get("total")),
|
||||
concurrency=_safe_int(_as_record(stats.get("pool")).get("max-concurrency") or _as_record(stats.get("pool")).get("max_concurrency")),
|
||||
prefetch_count=_safe_int(stats.get("prefetch_count")),
|
||||
active_count=len(_as_array(worker_raw.get("active"))),
|
||||
reserved_count=len(_as_array(worker_raw.get("reserved"))),
|
||||
scheduled_count=len(_as_array(worker_raw.get("scheduled"))),
|
||||
last_heartbeat_at=_parse_datetime(worker_raw.get("timestamp")),
|
||||
)
|
||||
|
||||
|
||||
def _pick_worker_raw(workers_map: dict[str, Any], worker_name: str) -> dict[str, Any]:
|
||||
if worker_name in workers_map:
|
||||
return _as_record(workers_map.get(worker_name))
|
||||
entries = list(workers_map.items())
|
||||
if len(entries) == 1:
|
||||
return _as_record(entries[0][1])
|
||||
normalized = worker_name.strip().lower()
|
||||
for name, value in entries:
|
||||
if str(name).strip().lower() == normalized:
|
||||
return _as_record(value)
|
||||
return {}
|
||||
|
||||
|
||||
def _build_snapshot_tasks(worker_name: str, worker_raw: dict[str, Any], source: str) -> list[FlowerTaskItem]:
|
||||
key = source.lower()
|
||||
snapshot = _as_array(worker_raw.get(key))
|
||||
items: list[FlowerTaskItem] = []
|
||||
for index, raw in enumerate(snapshot, start=1):
|
||||
items.append(
|
||||
_normalize_task_item(
|
||||
raw,
|
||||
source=source,
|
||||
fallback_task_id=f"{worker_name}:{source}:{index}",
|
||||
default_worker=worker_name,
|
||||
)
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
def _build_recent_tasks(worker_name: str, tasks_map: dict[str, Any]) -> list[FlowerTaskItem]:
|
||||
items: list[FlowerTaskItem] = []
|
||||
for task_id, raw in tasks_map.items():
|
||||
item = _normalize_task_item(raw, source="RECENT", fallback_task_id=str(task_id), default_worker=worker_name)
|
||||
if item.worker.strip().lower() != worker_name.strip().lower():
|
||||
continue
|
||||
items.append(item)
|
||||
items.sort(
|
||||
key=lambda item: _sortable_timestamp(
|
||||
item.received_at or item.started_at or item.finished_at or item.eta
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
def _normalize_task_item(
|
||||
raw_task: Any,
|
||||
*,
|
||||
source: str,
|
||||
fallback_task_id: str,
|
||||
default_worker: str,
|
||||
) -> FlowerTaskItem:
|
||||
raw = _as_record(raw_task)
|
||||
request = _as_record(raw.get("request"))
|
||||
task_data = dict(raw)
|
||||
if request:
|
||||
task_data.update(request)
|
||||
task_id = _safe_text(task_data.get("uuid") or task_data.get("id") or fallback_task_id) or fallback_task_id
|
||||
task_name = _safe_text(task_data.get("name") or task_data.get("type")) or "-"
|
||||
worker_name = _safe_text(task_data.get("worker") or task_data.get("hostname")) or default_worker
|
||||
state_raw = _safe_text(task_data.get("state"))
|
||||
state = state_raw.upper() if state_raw else ("UNKNOWN" if source == "RECENT" else source)
|
||||
queue_name = _extract_queue_name(task_data)
|
||||
return FlowerTaskItem(
|
||||
task_id=task_id,
|
||||
name=task_name,
|
||||
state=state,
|
||||
source=source,
|
||||
worker=worker_name,
|
||||
queue_name=queue_name,
|
||||
args_text=_stringify_payload(task_data.get("args")),
|
||||
kwargs_text=_stringify_payload(task_data.get("kwargs")),
|
||||
eta=_parse_datetime(task_data.get("eta")),
|
||||
received_at=_parse_datetime(task_data.get("received") or task_data.get("timestamp")),
|
||||
started_at=_parse_datetime(task_data.get("started") or task_data.get("time_start")),
|
||||
finished_at=_parse_datetime(task_data.get("succeeded") or task_data.get("failed") or task_data.get("revoked")),
|
||||
runtime_seconds=_safe_float(task_data.get("runtime")),
|
||||
result_text=_stringify_payload(task_data.get("result")),
|
||||
exception_text=_stringify_payload(task_data.get("exception")),
|
||||
)
|
||||
|
||||
|
||||
def _parse_active_queue_names(raw: Any) -> list[str]:
|
||||
values = []
|
||||
for item in _as_array(raw):
|
||||
name = _safe_text(_as_record(item).get("name"))
|
||||
if name:
|
||||
values.append(name)
|
||||
return sorted(set(values))
|
||||
|
||||
|
||||
def _parse_processed_count(raw_total: Any) -> int:
|
||||
total = _as_record(raw_total)
|
||||
count = 0
|
||||
for value in total.values():
|
||||
count += _safe_int(value)
|
||||
return count
|
||||
|
||||
|
||||
def _extract_queue_name(task_data: dict[str, Any]) -> str | None:
|
||||
delivery = _as_record(task_data.get("delivery_info"))
|
||||
routing_key = _safe_text(delivery.get("routing_key"))
|
||||
if routing_key:
|
||||
return routing_key
|
||||
exchange = _safe_text(task_data.get("exchange") or delivery.get("exchange"))
|
||||
return exchange
|
||||
|
||||
|
||||
def _parse_datetime(value: Any) -> datetime | None:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, datetime):
|
||||
if value.tzinfo is None:
|
||||
return value.replace(tzinfo=timezone.utc)
|
||||
return value.astimezone(timezone.utc)
|
||||
if isinstance(value, (int, float)):
|
||||
numeric = float(value)
|
||||
if numeric <= 0:
|
||||
return None
|
||||
if numeric < 1e11:
|
||||
numeric *= 1000
|
||||
try:
|
||||
return datetime.fromtimestamp(numeric / 1000, timezone.utc)
|
||||
except (OverflowError, OSError, ValueError):
|
||||
return None
|
||||
text = str(value).strip()
|
||||
if not text:
|
||||
return None
|
||||
if text.replace(".", "", 1).isdigit():
|
||||
try:
|
||||
return _parse_datetime(float(text))
|
||||
except ValueError:
|
||||
return None
|
||||
text = text.replace("Z", "+00:00")
|
||||
try:
|
||||
parsed = datetime.fromisoformat(text)
|
||||
except ValueError:
|
||||
return None
|
||||
if parsed.tzinfo is None:
|
||||
return parsed.replace(tzinfo=timezone.utc)
|
||||
return parsed.astimezone(timezone.utc)
|
||||
|
||||
|
||||
def _stringify_payload(value: Any) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, str):
|
||||
text = value.strip()
|
||||
else:
|
||||
try:
|
||||
text = json.dumps(value, ensure_ascii=False)
|
||||
except TypeError:
|
||||
text = str(value)
|
||||
text = text.strip()
|
||||
if not text:
|
||||
return None
|
||||
if len(text) > 1000:
|
||||
return text[:997] + "..."
|
||||
return text
|
||||
|
||||
|
||||
def _safe_text(value: Any) -> str:
|
||||
return str(value or "").strip()
|
||||
|
||||
|
||||
def _safe_int(value: Any) -> int:
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
|
||||
|
||||
def _safe_float(value: Any) -> float | None:
|
||||
try:
|
||||
parsed = float(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if parsed < 0:
|
||||
return None
|
||||
return parsed
|
||||
|
||||
|
||||
def _as_record(value: Any) -> dict[str, Any]:
|
||||
if isinstance(value, dict):
|
||||
return dict(value)
|
||||
return {}
|
||||
|
||||
|
||||
def _as_array(value: Any) -> list[Any]:
|
||||
if isinstance(value, list):
|
||||
return list(value)
|
||||
return []
|
||||
|
||||
|
||||
def _sortable_timestamp(value: datetime | None) -> float:
|
||||
if value is None:
|
||||
return 0.0
|
||||
if value.tzinfo is None:
|
||||
value = value.replace(tzinfo=timezone.utc)
|
||||
return value.timestamp()
|
||||
@@ -75,6 +75,7 @@ PROTECTED_MENU_CODES = {
|
||||
"admin.power_lines",
|
||||
"admin.lightning_currents",
|
||||
"admin.lightning_distribution",
|
||||
"admin.workers",
|
||||
"admin.task_monitor",
|
||||
"admin.atp_models",
|
||||
"admin.data_query",
|
||||
|
||||
@@ -91,6 +91,7 @@ MENU_CODE_PERMISSION_MAP: dict[str, set[str]] = {
|
||||
"admin.system_params": {"system_param.read", "system_param.manage"},
|
||||
"admin.files": {"file.read", "file.manage"},
|
||||
"admin.elevation": {"elevation.read", "elevation.manage"},
|
||||
"admin.workers": {"celery.read", "celery.manage"},
|
||||
"admin.task_monitor": {"celery.read", "celery.manage"},
|
||||
"admin.atp_models": {"atp.read", "atp.manage", "atp.run"},
|
||||
"admin.lightning_currents": {"lightning.read", "lightning.manage"},
|
||||
@@ -100,6 +101,17 @@ MENU_CODE_PERMISSION_MAP: dict[str, set[str]] = {
|
||||
}
|
||||
|
||||
SYNTHETIC_LEGACY_MENU_ROWS: list[dict[str, Any]] = [
|
||||
{
|
||||
"menu_id": "admin.workers",
|
||||
"menu_name": "admin.workers",
|
||||
"menu_label": "Worker监控",
|
||||
"menu_type": "MENU",
|
||||
"parent_id": None,
|
||||
"url": "/admin/workers",
|
||||
"menu_icon": "DeploymentUnitOutlined",
|
||||
"seq": 53,
|
||||
"state": "ENABLED",
|
||||
},
|
||||
{
|
||||
"menu_id": "admin.files",
|
||||
"menu_name": "admin.files",
|
||||
@@ -108,7 +120,7 @@ SYNTHETIC_LEGACY_MENU_ROWS: list[dict[str, Any]] = [
|
||||
"parent_id": None,
|
||||
"url": "/admin/files",
|
||||
"menu_icon": "FolderTree",
|
||||
"seq": 54,
|
||||
"seq": 56,
|
||||
"state": "ENABLED",
|
||||
},
|
||||
{
|
||||
@@ -119,7 +131,7 @@ SYNTHETIC_LEGACY_MENU_ROWS: list[dict[str, Any]] = [
|
||||
"parent_id": None,
|
||||
"url": "/admin/elevation",
|
||||
"menu_icon": "Database",
|
||||
"seq": 56,
|
||||
"seq": 57,
|
||||
"state": "ENABLED",
|
||||
},
|
||||
{
|
||||
@@ -130,7 +142,7 @@ SYNTHETIC_LEGACY_MENU_ROWS: list[dict[str, Any]] = [
|
||||
"parent_id": None,
|
||||
"url": "/admin/task-monitor",
|
||||
"menu_icon": "RadarChart",
|
||||
"seq": 53,
|
||||
"seq": 54,
|
||||
"state": "ENABLED",
|
||||
},
|
||||
{
|
||||
@@ -141,7 +153,7 @@ SYNTHETIC_LEGACY_MENU_ROWS: list[dict[str, Any]] = [
|
||||
"parent_id": None,
|
||||
"url": "/admin/power-lines/atp-viewer",
|
||||
"menu_icon": "Experiment",
|
||||
"seq": 54,
|
||||
"seq": 55,
|
||||
"state": "ENABLED",
|
||||
},
|
||||
{
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import HTTPException, status
|
||||
|
||||
from ..core.celery_app import celery_app
|
||||
from ..core.config import get_settings
|
||||
from ..schemas.scheduler import (
|
||||
SchedulerEnqueueTaskRequest,
|
||||
SchedulerEnqueueTaskResponse,
|
||||
SchedulerRevokeTaskRequest,
|
||||
SchedulerRevokeTaskResponse,
|
||||
)
|
||||
|
||||
|
||||
def enqueue_task(payload: SchedulerEnqueueTaskRequest) -> SchedulerEnqueueTaskResponse:
|
||||
settings = get_settings()
|
||||
task_name = payload.task_name.strip()
|
||||
if not task_name:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="taskName is required")
|
||||
|
||||
queue_name = (payload.queue_name or "").strip() or settings.resolved_scheduler_default_queue
|
||||
task_id = (payload.task_id or "").strip() or None
|
||||
|
||||
try:
|
||||
result = celery_app.send_task(
|
||||
task_name,
|
||||
args=list(payload.args),
|
||||
kwargs=dict(payload.kwargs),
|
||||
task_id=task_id,
|
||||
queue=queue_name,
|
||||
)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"scheduler enqueue failed: {exc}") from exc
|
||||
|
||||
return SchedulerEnqueueTaskResponse(
|
||||
queued=True,
|
||||
taskId=result.id,
|
||||
queueName=queue_name,
|
||||
taskName=task_name,
|
||||
)
|
||||
|
||||
|
||||
def revoke_task(payload: SchedulerRevokeTaskRequest) -> SchedulerRevokeTaskResponse:
|
||||
task_id = payload.task_id.strip()
|
||||
if not task_id:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="taskId is required")
|
||||
|
||||
signal = (payload.signal or "").strip() or "SIGTERM"
|
||||
terminate = bool(payload.terminate)
|
||||
|
||||
try:
|
||||
celery_app.control.revoke(task_id, terminate=terminate, signal=signal)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"scheduler revoke failed: {exc}") from exc
|
||||
|
||||
return SchedulerRevokeTaskResponse(
|
||||
revoked=True,
|
||||
taskId=task_id,
|
||||
terminate=terminate,
|
||||
signal=signal,
|
||||
)
|
||||
@@ -186,6 +186,19 @@ DEFAULT_MENUS: list[dict[str, object]] = [
|
||||
"cacheable": False,
|
||||
"permission_code": "lightning.read",
|
||||
},
|
||||
{
|
||||
"code": "admin.workers",
|
||||
"name": "Worker监控",
|
||||
"path": "/admin/workers",
|
||||
"icon": "DeploymentUnitOutlined",
|
||||
"parent_code": None,
|
||||
"type": "menu",
|
||||
"sort_order": 53,
|
||||
"status": "enabled",
|
||||
"visible": True,
|
||||
"cacheable": False,
|
||||
"permission_code": "celery.read",
|
||||
},
|
||||
{
|
||||
"code": "admin.task_monitor",
|
||||
"name": "任务监控",
|
||||
@@ -193,7 +206,7 @@ DEFAULT_MENUS: list[dict[str, object]] = [
|
||||
"icon": "RadarChart",
|
||||
"parent_code": None,
|
||||
"type": "menu",
|
||||
"sort_order": 53,
|
||||
"sort_order": 54,
|
||||
"status": "enabled",
|
||||
"visible": True,
|
||||
"cacheable": False,
|
||||
@@ -206,7 +219,7 @@ DEFAULT_MENUS: list[dict[str, object]] = [
|
||||
"icon": "Experiment",
|
||||
"parent_code": None,
|
||||
"type": "menu",
|
||||
"sort_order": 54,
|
||||
"sort_order": 55,
|
||||
"status": "enabled",
|
||||
"visible": True,
|
||||
"cacheable": False,
|
||||
@@ -219,7 +232,7 @@ DEFAULT_MENUS: list[dict[str, object]] = [
|
||||
"icon": "FolderTree",
|
||||
"parent_code": None,
|
||||
"type": "menu",
|
||||
"sort_order": 55,
|
||||
"sort_order": 56,
|
||||
"status": "enabled",
|
||||
"visible": True,
|
||||
"cacheable": False,
|
||||
@@ -232,7 +245,7 @@ DEFAULT_MENUS: list[dict[str, object]] = [
|
||||
"icon": "Database",
|
||||
"parent_code": None,
|
||||
"type": "menu",
|
||||
"sort_order": 56,
|
||||
"sort_order": 57,
|
||||
"status": "enabled",
|
||||
"visible": True,
|
||||
"cacheable": False,
|
||||
@@ -245,7 +258,7 @@ DEFAULT_MENUS: list[dict[str, object]] = [
|
||||
"icon": "FileText",
|
||||
"parent_code": None,
|
||||
"type": "menu",
|
||||
"sort_order": 57,
|
||||
"sort_order": 58,
|
||||
"status": "enabled",
|
||||
"visible": True,
|
||||
"cacheable": False,
|
||||
@@ -267,7 +280,7 @@ DEFAULT_MENUS: list[dict[str, object]] = [
|
||||
]
|
||||
|
||||
ROLE_MENU_BINDINGS: dict[str, list[str]] = {
|
||||
"admin": ["dashboard", "admin.users", "admin.roles", "admin.menus", "admin.system_params", "admin.power_lines", "admin.lightning_currents", "admin.lightning_distribution", "admin.task_monitor", "admin.atp_models", "admin.files", "admin.elevation", "admin.syslog", "admin.wine_runner"],
|
||||
"admin": ["dashboard", "admin.users", "admin.roles", "admin.menus", "admin.system_params", "admin.power_lines", "admin.lightning_currents", "admin.lightning_distribution", "admin.workers", "admin.task_monitor", "admin.atp_models", "admin.files", "admin.elevation", "admin.syslog", "admin.wine_runner"],
|
||||
"user": ["dashboard"],
|
||||
}
|
||||
|
||||
|
||||
@@ -1,25 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from collections.abc import Iterable
|
||||
from datetime import datetime, timezone
|
||||
from urllib.parse import urlsplit, urlunsplit
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
import redis
|
||||
except Exception: # pragma: no cover - optional dependency in non-runtime environments
|
||||
redis = None
|
||||
|
||||
from ..core.celery_app import celery_app
|
||||
from ..core.config import get_settings
|
||||
from ..models.base import utcnow
|
||||
from ..schemas.task_monitor import (
|
||||
TaskMonitorBucketItem,
|
||||
TaskMonitorOverviewResponse,
|
||||
TaskMonitorQueueItem,
|
||||
TaskMonitorTaskItem,
|
||||
TaskMonitorWorkerItem,
|
||||
)
|
||||
from .flower_monitor_service import build_worker_task_overview, build_workers_overview
|
||||
|
||||
STATE_LABELS = {
|
||||
"PENDING": "待执行",
|
||||
@@ -45,193 +36,103 @@ STATE_PRIORITY = {
|
||||
|
||||
|
||||
def build_task_monitor_overview(*, task_limit: int, history_limit: int) -> TaskMonitorOverviewResponse:
|
||||
settings = get_settings()
|
||||
now = utcnow()
|
||||
overview = TaskMonitorOverviewResponse(
|
||||
generated_at=now,
|
||||
broker_url=_mask_url(settings.resolved_celery_broker_url),
|
||||
result_backend=_mask_url(settings.resolved_celery_result_backend),
|
||||
)
|
||||
workers_overview = build_workers_overview(force_refresh=False)
|
||||
task_items: list[TaskMonitorTaskItem] = []
|
||||
queue_runtime_counts: dict[str, dict[str, int]] = {}
|
||||
queue_consumer_counts: dict[str, int] = {}
|
||||
|
||||
inspector = celery_app.control.inspect(timeout=1.0)
|
||||
stats = _safe_inspect_call(inspector.stats)
|
||||
active = _safe_inspect_call(inspector.active)
|
||||
reserved = _safe_inspect_call(inspector.reserved)
|
||||
scheduled = _safe_inspect_call(inspector.scheduled)
|
||||
active_queues = _safe_inspect_call(inspector.active_queues)
|
||||
ping = _safe_inspect_call(inspector.ping)
|
||||
for worker in workers_overview.workers:
|
||||
queue_names = worker.queue_names
|
||||
for queue_name in queue_names:
|
||||
queue_consumer_counts[queue_name] = queue_consumer_counts.get(queue_name, 0) + 1
|
||||
queue_runtime_counts.setdefault(
|
||||
queue_name,
|
||||
{"active": 0, "reserved": 0, "scheduled": 0},
|
||||
)
|
||||
|
||||
worker_names = sorted(set(stats) | set(active) | set(reserved) | set(scheduled) | set(active_queues) | set(ping))
|
||||
overview.workers = [
|
||||
_build_worker_item(
|
||||
worker,
|
||||
stats=stats.get(worker) or {},
|
||||
active_tasks=active.get(worker) or [],
|
||||
reserved_tasks=reserved.get(worker) or [],
|
||||
scheduled_tasks=scheduled.get(worker) or [],
|
||||
queues=active_queues.get(worker) or [],
|
||||
online=worker in ping if ping else True,
|
||||
worker_tasks = build_worker_task_overview(
|
||||
worker=worker.worker,
|
||||
force_refresh=False,
|
||||
recent_limit=max(history_limit, 1),
|
||||
)
|
||||
for worker in worker_names
|
||||
]
|
||||
overview.workers_online = sum(1 for item in overview.workers if item.online)
|
||||
overview.worker_concurrency_total = sum(item.max_concurrency for item in overview.workers)
|
||||
task_items.extend(_convert_flower_tasks(worker_tasks.active_tasks, source_state="STARTED"))
|
||||
task_items.extend(_convert_flower_tasks(worker_tasks.reserved_tasks, source_state="RECEIVED"))
|
||||
task_items.extend(_convert_flower_tasks(worker_tasks.scheduled_tasks, source_state="SCHEDULED"))
|
||||
task_items.extend(_convert_flower_tasks(worker_tasks.recent_tasks, source_state=None))
|
||||
|
||||
runtime_tasks = [
|
||||
*_build_task_items(active, state="STARTED", now=now),
|
||||
*_build_task_items(reserved, state="RECEIVED", now=now),
|
||||
*_build_task_items(scheduled, state="SCHEDULED", now=now),
|
||||
]
|
||||
runtime_tasks_by_id = {item.task_id: item for item in runtime_tasks if item.task_id}
|
||||
for task in worker_tasks.active_tasks:
|
||||
if task.queue_name:
|
||||
queue_runtime_counts.setdefault(task.queue_name, {"active": 0, "reserved": 0, "scheduled": 0})
|
||||
queue_runtime_counts[task.queue_name]["active"] += 1
|
||||
for task in worker_tasks.reserved_tasks:
|
||||
if task.queue_name:
|
||||
queue_runtime_counts.setdefault(task.queue_name, {"active": 0, "reserved": 0, "scheduled": 0})
|
||||
queue_runtime_counts[task.queue_name]["reserved"] += 1
|
||||
for task in worker_tasks.scheduled_tasks:
|
||||
if task.queue_name:
|
||||
queue_runtime_counts.setdefault(task.queue_name, {"active": 0, "reserved": 0, "scheduled": 0})
|
||||
queue_runtime_counts[task.queue_name]["scheduled"] += 1
|
||||
|
||||
history_tasks = _load_history_task_items(settings.resolved_celery_result_backend, limit=history_limit)
|
||||
for item in history_tasks:
|
||||
if not item.task_id or item.task_id in runtime_tasks_by_id:
|
||||
runtime_tasks_by_id: dict[str, TaskMonitorTaskItem] = {}
|
||||
for item in task_items:
|
||||
if not item.task_id:
|
||||
continue
|
||||
runtime_tasks_by_id[item.task_id] = item
|
||||
existing = runtime_tasks_by_id.get(item.task_id)
|
||||
if existing is None:
|
||||
runtime_tasks_by_id[item.task_id] = item
|
||||
continue
|
||||
if _task_priority(item.state) < _task_priority(existing.state):
|
||||
runtime_tasks_by_id[item.task_id] = item
|
||||
|
||||
all_tasks = sorted(
|
||||
runtime_tasks_by_id.values(),
|
||||
key=lambda item: (
|
||||
STATE_PRIORITY.get(item.state, 99),
|
||||
_task_priority(item.state),
|
||||
-_task_sort_timestamp(item).timestamp(),
|
||||
item.task_id,
|
||||
),
|
||||
)
|
||||
overview.tasks = all_tasks[:task_limit]
|
||||
overview.task_state_buckets = _build_state_buckets(runtime_tasks_by_id.values())
|
||||
|
||||
queue_names = _collect_queue_names(active_queues, runtime_tasks_by_id.values())
|
||||
queue_pending_counts = _load_queue_pending_counts(settings.resolved_celery_broker_url, queue_names)
|
||||
overview.queues = _build_queue_items(
|
||||
active_queues=active_queues,
|
||||
tasks=runtime_tasks_by_id.values(),
|
||||
pending_counts=queue_pending_counts,
|
||||
)
|
||||
overview.queue_pending_total = sum(item.pending_count for item in overview.queues)
|
||||
|
||||
return overview
|
||||
|
||||
|
||||
def _safe_inspect_call(call):
|
||||
try:
|
||||
result = call()
|
||||
except Exception:
|
||||
return {}
|
||||
if not isinstance(result, dict):
|
||||
return {}
|
||||
return result
|
||||
|
||||
|
||||
def _build_worker_item(
|
||||
worker: str,
|
||||
*,
|
||||
stats: dict,
|
||||
active_tasks: list[dict],
|
||||
reserved_tasks: list[dict],
|
||||
scheduled_tasks: list[dict],
|
||||
queues: list[dict],
|
||||
online: bool,
|
||||
) -> TaskMonitorWorkerItem:
|
||||
pool = stats.get("pool") if isinstance(stats.get("pool"), dict) else {}
|
||||
max_concurrency = _to_int(
|
||||
pool.get("max-concurrency")
|
||||
or pool.get("max_concurrency")
|
||||
or len(pool.get("processes") or [])
|
||||
)
|
||||
total = stats.get("total") if isinstance(stats.get("total"), dict) else {}
|
||||
|
||||
return TaskMonitorWorkerItem(
|
||||
worker=worker,
|
||||
online=online,
|
||||
queue_names=sorted({_queue_name_from_queue(item) for item in queues if _queue_name_from_queue(item)}),
|
||||
max_concurrency=max_concurrency,
|
||||
prefetch_count=_to_int(stats.get("prefetch_count")),
|
||||
uptime_seconds=_to_int(stats.get("uptime")),
|
||||
processed_total=sum(_to_int(value) for value in total.values()),
|
||||
active_count=len(active_tasks),
|
||||
reserved_count=len(reserved_tasks),
|
||||
scheduled_count=len(scheduled_tasks),
|
||||
)
|
||||
|
||||
|
||||
def _build_task_items(tasks_by_worker: dict, *, state: str, now: datetime) -> list[TaskMonitorTaskItem]:
|
||||
items: list[TaskMonitorTaskItem] = []
|
||||
for worker, raw_tasks in tasks_by_worker.items():
|
||||
for raw_item in raw_tasks or []:
|
||||
task = raw_item.get("request") if state == "SCHEDULED" else raw_item
|
||||
if not isinstance(task, dict):
|
||||
continue
|
||||
task_id = str(task.get("id") or "").strip()
|
||||
if not task_id:
|
||||
continue
|
||||
|
||||
eta = _parse_datetime(raw_item.get("eta")) if state == "SCHEDULED" else _parse_datetime(task.get("eta"))
|
||||
started_at = _timestamp_to_datetime(task.get("time_start"))
|
||||
runtime_seconds = None
|
||||
if started_at and state == "STARTED":
|
||||
runtime_seconds = max(0.0, round((now - started_at).total_seconds(), 3))
|
||||
|
||||
items.append(
|
||||
TaskMonitorTaskItem(
|
||||
task_id=task_id,
|
||||
name=str(task.get("name") or task.get("task") or "-"),
|
||||
state=state,
|
||||
queue_name=_queue_name_from_task(task),
|
||||
worker=str(worker),
|
||||
retries=_to_int(task.get("retries")),
|
||||
eta=eta,
|
||||
started_at=started_at,
|
||||
runtime_seconds=runtime_seconds,
|
||||
)
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
def _build_queue_items(
|
||||
*,
|
||||
active_queues: dict,
|
||||
tasks: Iterable[TaskMonitorTaskItem],
|
||||
pending_counts: dict[str, int],
|
||||
) -> list[TaskMonitorQueueItem]:
|
||||
queue_names: set[str] = set()
|
||||
consumer_counts: dict[str, int] = {}
|
||||
active_counts: dict[str, int] = {}
|
||||
reserved_counts: dict[str, int] = {}
|
||||
scheduled_counts: dict[str, int] = {}
|
||||
|
||||
for queues in active_queues.values():
|
||||
for queue in queues or []:
|
||||
name = _queue_name_from_queue(queue)
|
||||
if not name:
|
||||
continue
|
||||
queue_names.add(name)
|
||||
consumer_counts[name] = consumer_counts.get(name, 0) + 1
|
||||
|
||||
for task in tasks:
|
||||
if not task.queue_name:
|
||||
continue
|
||||
queue_names.add(task.queue_name)
|
||||
if task.state == "STARTED":
|
||||
active_counts[task.queue_name] = active_counts.get(task.queue_name, 0) + 1
|
||||
elif task.state == "RECEIVED":
|
||||
reserved_counts[task.queue_name] = reserved_counts.get(task.queue_name, 0) + 1
|
||||
elif task.state == "SCHEDULED":
|
||||
scheduled_counts[task.queue_name] = scheduled_counts.get(task.queue_name, 0) + 1
|
||||
|
||||
return sorted(
|
||||
queues = sorted(
|
||||
[
|
||||
TaskMonitorQueueItem(
|
||||
name=name,
|
||||
pending_count=max(0, _to_int(pending_counts.get(name))),
|
||||
consumer_count=consumer_counts.get(name, 0),
|
||||
active_count=active_counts.get(name, 0),
|
||||
reserved_count=reserved_counts.get(name, 0),
|
||||
scheduled_count=scheduled_counts.get(name, 0),
|
||||
pending_count=0,
|
||||
consumer_count=queue_consumer_counts.get(name, 0),
|
||||
active_count=queue_runtime_counts.get(name, {}).get("active", 0),
|
||||
reserved_count=queue_runtime_counts.get(name, {}).get("reserved", 0),
|
||||
scheduled_count=queue_runtime_counts.get(name, {}).get("scheduled", 0),
|
||||
)
|
||||
for name in queue_names
|
||||
for name in sorted(set(queue_runtime_counts) | set(queue_consumer_counts))
|
||||
],
|
||||
key=lambda item: (-item.pending_count, item.name),
|
||||
key=lambda item: (-item.active_count - item.reserved_count - item.scheduled_count, item.name),
|
||||
)
|
||||
|
||||
return TaskMonitorOverviewResponse(
|
||||
generated_at=workers_overview.generated_at,
|
||||
broker_url="flower",
|
||||
result_backend="flower",
|
||||
workers_online=workers_overview.summary.online,
|
||||
worker_concurrency_total=sum(item.concurrency for item in workers_overview.workers),
|
||||
queue_pending_total=sum(item.pending_count for item in queues),
|
||||
task_state_buckets=_build_state_buckets(runtime_tasks_by_id.values()),
|
||||
workers=[
|
||||
{
|
||||
"worker": worker.worker,
|
||||
"online": worker.status == "ONLINE",
|
||||
"queue_names": worker.queue_names,
|
||||
"max_concurrency": worker.concurrency,
|
||||
"prefetch_count": worker.prefetch_count,
|
||||
"uptime_seconds": 0,
|
||||
"processed_total": worker.processed_count,
|
||||
"active_count": worker.active_count,
|
||||
"reserved_count": worker.reserved_count,
|
||||
"scheduled_count": worker.scheduled_count,
|
||||
}
|
||||
for worker in workers_overview.workers
|
||||
],
|
||||
queues=queues,
|
||||
tasks=all_tasks[:task_limit],
|
||||
)
|
||||
|
||||
|
||||
@@ -245,201 +146,35 @@ def _build_state_buckets(tasks: Iterable[TaskMonitorTaskItem]) -> list[TaskMonit
|
||||
]
|
||||
|
||||
|
||||
def _collect_queue_names(active_queues: dict, tasks: Iterable[TaskMonitorTaskItem]) -> list[str]:
|
||||
queue_names: set[str] = set()
|
||||
|
||||
for queues in active_queues.values():
|
||||
for queue in queues or []:
|
||||
name = _queue_name_from_queue(queue)
|
||||
if name:
|
||||
queue_names.add(name)
|
||||
|
||||
for task in tasks:
|
||||
if task.queue_name:
|
||||
queue_names.add(task.queue_name)
|
||||
|
||||
return sorted(queue_names)
|
||||
|
||||
|
||||
def _load_queue_pending_counts(broker_url: str, queue_names: list[str]) -> dict[str, int]:
|
||||
if not queue_names or not _is_redis_url(broker_url):
|
||||
return {name: 0 for name in queue_names}
|
||||
|
||||
client = _build_redis_client(broker_url)
|
||||
if client is None:
|
||||
return {name: 0 for name in queue_names}
|
||||
|
||||
counts: dict[str, int] = {}
|
||||
try:
|
||||
for queue_name in queue_names:
|
||||
counts[queue_name] = _to_int(client.llen(queue_name))
|
||||
except Exception:
|
||||
return {name: 0 for name in queue_names}
|
||||
return counts
|
||||
|
||||
|
||||
def _load_history_task_items(result_backend_url: str, *, limit: int) -> list[TaskMonitorTaskItem]:
|
||||
if limit <= 0 or not _is_redis_url(result_backend_url):
|
||||
return []
|
||||
|
||||
client = _build_redis_client(result_backend_url)
|
||||
if client is None:
|
||||
return []
|
||||
|
||||
scan_max = max(200, limit * 20)
|
||||
keys: list[str] = []
|
||||
cursor = 0
|
||||
|
||||
try:
|
||||
while True:
|
||||
cursor, batch = client.scan(cursor=cursor, match="celery-task-meta-*", count=200)
|
||||
keys.extend(str(key) for key in batch)
|
||||
if cursor == 0 or len(keys) >= scan_max:
|
||||
break
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
def _convert_flower_tasks(tasks: list[Any], *, source_state: str | None) -> list[TaskMonitorTaskItem]:
|
||||
items: list[TaskMonitorTaskItem] = []
|
||||
for key in keys[:scan_max]:
|
||||
try:
|
||||
payload_raw = client.get(key)
|
||||
except Exception:
|
||||
continue
|
||||
if not payload_raw:
|
||||
continue
|
||||
|
||||
try:
|
||||
payload = json.loads(payload_raw)
|
||||
except (TypeError, json.JSONDecodeError):
|
||||
continue
|
||||
if not isinstance(payload, dict):
|
||||
continue
|
||||
|
||||
state = str(payload.get("status") or "").strip().upper()
|
||||
if not state:
|
||||
continue
|
||||
|
||||
task_id = str(payload.get("task_id") or key.removeprefix("celery-task-meta-")).strip()
|
||||
if not task_id:
|
||||
continue
|
||||
|
||||
done_at = _parse_datetime(payload.get("date_done"))
|
||||
error = _build_error(payload.get("result"), payload.get("traceback")) if state in {"FAILURE", "RETRY", "REVOKED"} else None
|
||||
|
||||
for task in tasks:
|
||||
state = source_state or task.state
|
||||
items.append(
|
||||
TaskMonitorTaskItem(
|
||||
task_id=task_id,
|
||||
name=str(payload.get("name") or "-"),
|
||||
task_id=task.task_id,
|
||||
name=task.name,
|
||||
state=state,
|
||||
done_at=done_at,
|
||||
error=error,
|
||||
queue_name=task.queue_name,
|
||||
worker=task.worker,
|
||||
retries=0,
|
||||
eta=task.eta,
|
||||
started_at=task.started_at,
|
||||
done_at=task.finished_at,
|
||||
runtime_seconds=task.runtime_seconds,
|
||||
error=task.exception_text,
|
||||
)
|
||||
)
|
||||
|
||||
items.sort(key=lambda item: -_task_sort_timestamp(item).timestamp())
|
||||
return items[:limit]
|
||||
return items
|
||||
|
||||
|
||||
def _queue_name_from_queue(queue: dict) -> str:
|
||||
if not isinstance(queue, dict):
|
||||
return ""
|
||||
return str(queue.get("name") or queue.get("routing_key") or "").strip()
|
||||
|
||||
|
||||
def _queue_name_from_task(task: dict) -> str | None:
|
||||
delivery_info = task.get("delivery_info") if isinstance(task.get("delivery_info"), dict) else {}
|
||||
queue_name = delivery_info.get("routing_key") or task.get("queue")
|
||||
if not queue_name:
|
||||
return None
|
||||
return str(queue_name)
|
||||
|
||||
|
||||
def _is_redis_url(value: str) -> bool:
|
||||
scheme = urlsplit(value).scheme
|
||||
return scheme in {"redis", "rediss"}
|
||||
|
||||
|
||||
def _build_redis_client(redis_url: str):
|
||||
if redis is None:
|
||||
return None
|
||||
try:
|
||||
return redis.Redis.from_url(
|
||||
redis_url,
|
||||
decode_responses=True,
|
||||
socket_connect_timeout=1,
|
||||
socket_timeout=1,
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _parse_datetime(value: object) -> datetime | None:
|
||||
if not isinstance(value, str) or not value.strip():
|
||||
return None
|
||||
normalized = value.strip().replace("Z", "+00:00")
|
||||
try:
|
||||
parsed = datetime.fromisoformat(normalized)
|
||||
except ValueError:
|
||||
return None
|
||||
if parsed.tzinfo is None:
|
||||
return parsed.replace(tzinfo=timezone.utc)
|
||||
return parsed.astimezone(timezone.utc)
|
||||
|
||||
|
||||
def _timestamp_to_datetime(value: object) -> datetime | None:
|
||||
try:
|
||||
timestamp = float(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if timestamp <= 0:
|
||||
return None
|
||||
try:
|
||||
return datetime.fromtimestamp(timestamp, timezone.utc)
|
||||
except (OSError, OverflowError, ValueError):
|
||||
return None
|
||||
def _task_priority(state: str) -> int:
|
||||
normalized = (state or "").strip().upper()
|
||||
return STATE_PRIORITY.get(normalized, 99)
|
||||
|
||||
|
||||
def _task_sort_timestamp(item: TaskMonitorTaskItem) -> datetime:
|
||||
for candidate in [item.started_at, item.done_at, item.eta]:
|
||||
if candidate is None:
|
||||
continue
|
||||
if candidate.tzinfo is None:
|
||||
return candidate.replace(tzinfo=timezone.utc)
|
||||
return candidate.astimezone(timezone.utc)
|
||||
return datetime.fromtimestamp(0, timezone.utc)
|
||||
|
||||
|
||||
def _build_error(result, traceback_value) -> str | None:
|
||||
result_text = None
|
||||
if result is not None:
|
||||
result_text = str(result).strip()
|
||||
|
||||
traceback_text = None
|
||||
if isinstance(traceback_value, str):
|
||||
traceback_text = traceback_value.strip()
|
||||
|
||||
for candidate in [result_text, traceback_text]:
|
||||
if not candidate:
|
||||
continue
|
||||
if len(candidate) <= 400:
|
||||
if candidate is not None:
|
||||
return candidate
|
||||
return f"{candidate[:397]}..."
|
||||
return None
|
||||
|
||||
|
||||
def _to_int(value: object) -> int:
|
||||
try:
|
||||
return int(value or 0)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
|
||||
|
||||
def _mask_url(value: str) -> str:
|
||||
parsed = urlsplit(value)
|
||||
if not parsed.password:
|
||||
return value
|
||||
username = parsed.username or ""
|
||||
hostname = parsed.hostname or ""
|
||||
port = f":{parsed.port}" if parsed.port else ""
|
||||
netloc = f"{username}:***@{hostname}{port}"
|
||||
return urlunsplit((parsed.scheme, netloc, parsed.path, parsed.query, parsed.fragment))
|
||||
return datetime.fromtimestamp(0, tz=timezone.utc)
|
||||
|
||||
@@ -0,0 +1,165 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import timedelta
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
from ..core.database import SessionLocal
|
||||
from ..models.base import utcnow
|
||||
from ..models.worker_registry import WorkerRegistry
|
||||
|
||||
|
||||
def register_worker(
|
||||
*,
|
||||
worker_name: str,
|
||||
status: str,
|
||||
queues: list[str] | None = None,
|
||||
pid: int | None = None,
|
||||
heartbeat_increment: bool = False,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
normalized_worker = worker_name.strip()
|
||||
if not normalized_worker:
|
||||
return
|
||||
|
||||
now = utcnow()
|
||||
queues_csv = _normalize_queues(queues)
|
||||
metadata_text = _to_json_text(metadata)
|
||||
normalized_status = (status or "").strip().lower() or "online"
|
||||
normalized_pid = _coerce_pid(pid)
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
row = db.execute(
|
||||
select(WorkerRegistry).where(WorkerRegistry.worker_name == normalized_worker)
|
||||
).scalar_one_or_none()
|
||||
if row is None:
|
||||
row = WorkerRegistry(
|
||||
worker_name=normalized_worker,
|
||||
status=normalized_status,
|
||||
queues_csv=queues_csv,
|
||||
pid=normalized_pid,
|
||||
heartbeat_count=1 if heartbeat_increment else 0,
|
||||
first_seen_at=now,
|
||||
last_seen_at=now,
|
||||
metadata_json=metadata_text,
|
||||
create_date=now,
|
||||
update_date=now,
|
||||
)
|
||||
db.add(row)
|
||||
db.commit()
|
||||
return
|
||||
|
||||
row.status = normalized_status
|
||||
if queues_csv is not None:
|
||||
row.queues_csv = queues_csv
|
||||
if normalized_pid is not None:
|
||||
row.pid = normalized_pid
|
||||
if heartbeat_increment:
|
||||
row.heartbeat_count = int(row.heartbeat_count or 0) + 1
|
||||
if metadata_text is not None:
|
||||
row.metadata_json = metadata_text
|
||||
row.last_seen_at = now
|
||||
row.update_date = now
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def mark_worker_offline(worker_name: str) -> None:
|
||||
normalized_worker = worker_name.strip()
|
||||
if not normalized_worker:
|
||||
return
|
||||
now = utcnow()
|
||||
db = SessionLocal()
|
||||
try:
|
||||
row = db.execute(
|
||||
select(WorkerRegistry).where(WorkerRegistry.worker_name == normalized_worker)
|
||||
).scalar_one_or_none()
|
||||
if row is None:
|
||||
row = WorkerRegistry(
|
||||
worker_name=normalized_worker,
|
||||
status="offline",
|
||||
heartbeat_count=0,
|
||||
first_seen_at=now,
|
||||
last_seen_at=now,
|
||||
create_date=now,
|
||||
update_date=now,
|
||||
)
|
||||
db.add(row)
|
||||
db.commit()
|
||||
return
|
||||
row.status = "offline"
|
||||
row.last_seen_at = now
|
||||
row.update_date = now
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def sweep_offline_workers(*, ttl_seconds: int) -> int:
|
||||
safe_ttl = max(10, int(ttl_seconds))
|
||||
threshold = utcnow() - timedelta(seconds=safe_ttl)
|
||||
db = SessionLocal()
|
||||
changed = 0
|
||||
try:
|
||||
rows = db.execute(
|
||||
select(WorkerRegistry).where(
|
||||
WorkerRegistry.status == "online",
|
||||
WorkerRegistry.last_seen_at < threshold,
|
||||
)
|
||||
).scalars().all()
|
||||
if not rows:
|
||||
return 0
|
||||
now = utcnow()
|
||||
for row in rows:
|
||||
row.status = "offline"
|
||||
row.update_date = now
|
||||
changed += 1
|
||||
db.commit()
|
||||
return changed
|
||||
except Exception:
|
||||
db.rollback()
|
||||
return 0
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _normalize_queues(queues: list[str] | None) -> str | None:
|
||||
if queues is None:
|
||||
return None
|
||||
normalized = sorted({item.strip() for item in queues if isinstance(item, str) and item.strip()})
|
||||
if not normalized:
|
||||
return ""
|
||||
return ",".join(normalized)
|
||||
|
||||
|
||||
def _to_json_text(value: dict[str, Any] | None) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
try:
|
||||
text = json.dumps(value, ensure_ascii=False)
|
||||
except TypeError:
|
||||
text = json.dumps({"repr": repr(value)}, ensure_ascii=False)
|
||||
if len(text) > 3900:
|
||||
text = text[:3897] + "..."
|
||||
return text
|
||||
|
||||
|
||||
def _coerce_pid(value: int | None) -> int | None:
|
||||
if value is not None:
|
||||
try:
|
||||
pid = int(value)
|
||||
except (TypeError, ValueError):
|
||||
pid = None
|
||||
if pid and pid > 0:
|
||||
return pid
|
||||
fallback = os.getpid()
|
||||
return fallback if fallback > 0 else None
|
||||
@@ -0,0 +1,14 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from ..core.celery_app import celery_app
|
||||
from ..core.config import get_settings
|
||||
from ..services.worker_registry_service import sweep_offline_workers
|
||||
|
||||
|
||||
@celery_app.task(name="app.tasks.worker_registry_tasks.sweep_worker_registry_offline")
|
||||
def sweep_worker_registry_offline() -> dict[str, int]:
|
||||
settings = get_settings()
|
||||
updated_count = sweep_offline_workers(ttl_seconds=settings.worker_registry_ttl_seconds)
|
||||
return {
|
||||
"updated_count": int(updated_count),
|
||||
}
|
||||
@@ -125,6 +125,9 @@ services:
|
||||
CELERY_RESULT_BACKEND: ${CELERY_RESULT_BACKEND:-redis://redis:6379/1}
|
||||
CELERY_TIMEZONE: ${CELERY_TIMEZONE:-Asia/Shanghai}
|
||||
SCHEDULER_EXPIRE_INTERVAL_SECONDS: ${SCHEDULER_EXPIRE_INTERVAL_SECONDS:-60}
|
||||
SCHEDULER_API_BASE_URL: ${SCHEDULER_API_BASE_URL:-http://scheduler:19100}
|
||||
SCHEDULER_API_TOKEN: ${SCHEDULER_API_TOKEN:-}
|
||||
SCHEDULER_DEFAULT_QUEUE: ${SCHEDULER_DEFAULT_QUEUE:-default}
|
||||
WINE_BINARY_PATH: ${WINE_BINARY_PATH:-wine}
|
||||
WINE_ALLOWED_ROOT: ${WINE_ALLOWED_ROOT:-./data/wine}
|
||||
WINE_DEFAULT_TIMEOUT_SECONDS: ${WINE_DEFAULT_TIMEOUT_SECONDS:-300}
|
||||
@@ -165,6 +168,7 @@ services:
|
||||
- worker
|
||||
- --loglevel=${CELERY_LOG_LEVEL:-INFO}
|
||||
- --concurrency=${CELERY_WORKER_CONCURRENCY:-2}
|
||||
- --queues=${CELERY_WORKER_QUEUES:-default,celery}
|
||||
depends_on:
|
||||
api:
|
||||
condition: service_healthy
|
||||
@@ -185,6 +189,13 @@ services:
|
||||
CELERY_RESULT_BACKEND: ${CELERY_RESULT_BACKEND:-redis://redis:6379/1}
|
||||
CELERY_TIMEZONE: ${CELERY_TIMEZONE:-Asia/Shanghai}
|
||||
SCHEDULER_EXPIRE_INTERVAL_SECONDS: ${SCHEDULER_EXPIRE_INTERVAL_SECONDS:-60}
|
||||
SCHEDULER_API_BASE_URL: ${SCHEDULER_API_BASE_URL:-http://scheduler:19100}
|
||||
SCHEDULER_API_TOKEN: ${SCHEDULER_API_TOKEN:-}
|
||||
SCHEDULER_DEFAULT_QUEUE: ${SCHEDULER_DEFAULT_QUEUE:-default}
|
||||
FLOWER_API_BASE_URL: ${FLOWER_API_BASE_URL:-http://flower:5555}
|
||||
FLOWER_API_TIMEOUT_SECONDS: ${FLOWER_API_TIMEOUT_SECONDS:-10}
|
||||
FLOWER_BASIC_AUTH: ${FLOWER_BASIC_AUTH:-}
|
||||
WORKER_REGISTRY_TTL_SECONDS: ${WORKER_REGISTRY_TTL_SECONDS:-90}
|
||||
restart: unless-stopped
|
||||
|
||||
celery-beat:
|
||||
@@ -224,6 +235,73 @@ services:
|
||||
CELERY_RESULT_BACKEND: ${CELERY_RESULT_BACKEND:-redis://redis:6379/1}
|
||||
CELERY_TIMEZONE: ${CELERY_TIMEZONE:-Asia/Shanghai}
|
||||
SCHEDULER_EXPIRE_INTERVAL_SECONDS: ${SCHEDULER_EXPIRE_INTERVAL_SECONDS:-60}
|
||||
SCHEDULER_API_BASE_URL: ${SCHEDULER_API_BASE_URL:-http://scheduler:19100}
|
||||
SCHEDULER_API_TOKEN: ${SCHEDULER_API_TOKEN:-}
|
||||
SCHEDULER_DEFAULT_QUEUE: ${SCHEDULER_DEFAULT_QUEUE:-default}
|
||||
WORKER_REGISTRY_TTL_SECONDS: ${WORKER_REGISTRY_TTL_SECONDS:-90}
|
||||
restart: unless-stopped
|
||||
|
||||
scheduler:
|
||||
build:
|
||||
context: ./api
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
PYTHON_BASE_IMAGE: ${PYTHON_BASE_IMAGE:-docker.m.daocloud.io/library/python:3.11-slim}
|
||||
PIP_INDEX_URL: ${PIP_INDEX_URL:-https://pypi.org/simple}
|
||||
PIP_DEFAULT_TIMEOUT: ${PIP_DEFAULT_TIMEOUT:-300}
|
||||
PIP_RETRIES: ${PIP_RETRIES:-20}
|
||||
container_name: fquiz-scheduler
|
||||
command:
|
||||
- uvicorn
|
||||
- app.scheduler_main:app
|
||||
- --host
|
||||
- 0.0.0.0
|
||||
- --port
|
||||
- "19100"
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
api:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
CELERY_BROKER_URL: ${CELERY_BROKER_URL:-redis://redis:6379/0}
|
||||
CELERY_RESULT_BACKEND: ${CELERY_RESULT_BACKEND:-redis://redis:6379/1}
|
||||
CELERY_TIMEZONE: ${CELERY_TIMEZONE:-Asia/Shanghai}
|
||||
SCHEDULER_API_TOKEN: ${SCHEDULER_API_TOKEN:-}
|
||||
SCHEDULER_DEFAULT_QUEUE: ${SCHEDULER_DEFAULT_QUEUE:-default}
|
||||
ports:
|
||||
- "${SCHEDULER_PORT:-19100}:19100"
|
||||
restart: unless-stopped
|
||||
|
||||
flower:
|
||||
build:
|
||||
context: ./api
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
PYTHON_BASE_IMAGE: ${PYTHON_BASE_IMAGE:-docker.m.daocloud.io/library/python:3.11-slim}
|
||||
PIP_INDEX_URL: ${PIP_INDEX_URL:-https://pypi.org/simple}
|
||||
PIP_DEFAULT_TIMEOUT: ${PIP_DEFAULT_TIMEOUT:-300}
|
||||
PIP_RETRIES: ${PIP_RETRIES:-20}
|
||||
container_name: fquiz-flower
|
||||
command:
|
||||
- celery
|
||||
- -A
|
||||
- app.core.celery_app.celery_app
|
||||
- flower
|
||||
- --address=0.0.0.0
|
||||
- --port=5555
|
||||
- --persistent=False
|
||||
- --basic-auth=${FLOWER_BASIC_AUTH:-admin:admin}
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
CELERY_BROKER_URL: ${CELERY_BROKER_URL:-redis://redis:6379/0}
|
||||
CELERY_RESULT_BACKEND: ${CELERY_RESULT_BACKEND:-redis://redis:6379/1}
|
||||
CELERY_TIMEZONE: ${CELERY_TIMEZONE:-Asia/Shanghai}
|
||||
FLOWER_BASIC_AUTH: ${FLOWER_BASIC_AUTH:-admin:admin}
|
||||
ports:
|
||||
- "${FLOWER_PORT:-5555}:5555"
|
||||
restart: unless-stopped
|
||||
|
||||
web:
|
||||
|
||||
@@ -598,3 +598,152 @@
|
||||
- 风险与影响:
|
||||
- 影响范围仅前端全局 404 页面。
|
||||
- 文案当前为英文(对齐原闪现页面),若后续需要中英双语可再单独调整。
|
||||
|
||||
## Work Log - 迁移 Celery 与任务/Worker 双监控页面到 fquiz(2026-05-01)
|
||||
|
||||
- 背景:
|
||||
- 需求要求将 `modo-next` 的 Celery 调度与两类监控页(任务监控、Worker 监控)迁移到当前 `fquiz`。
|
||||
- 仓库已有 Celery 与 `/admin/task-monitor`,缺口是“独立 Worker 监控页 + 对应 API/菜单入口”。
|
||||
|
||||
- 本次改动(最小闭环):
|
||||
- 后端新增 Worker 监控模型与接口:
|
||||
- `api/app/schemas/worker_monitor.py`
|
||||
- 新增 `WorkerMonitorOverviewResponse`、`WorkerMonitorTaskOverviewResponse` 及相关 item/summary 模型。
|
||||
- `api/app/services/worker_monitor_service.py`
|
||||
- 基于 Celery inspect 提供 Worker 概览(状态/队列/并发/处理量)。
|
||||
- 提供单 Worker 任务快照(active/reserved/scheduled)与 recent 历史任务聚合。
|
||||
- 复用现有 `task_monitor_service` 的通用解析工具,保持行为一致性。
|
||||
- `api/app/api/v1/worker_monitor.py`
|
||||
- 新增路由:
|
||||
- `GET /api/v1/admin/workers/overview`
|
||||
- `GET /api/v1/admin/workers/tasks?worker=...&recent_limit=...`
|
||||
- 权限统一 `celery.read` / `celery.manage`。
|
||||
- `api/app/api/router.py`
|
||||
- 挂载 `worker_monitor` 路由。
|
||||
|
||||
- 前端新增独立 Worker 监控页:
|
||||
- `web/src/app/admin/workers/page.tsx`
|
||||
- 新增 Worker 列表页:支持名称/队列/在线状态筛选、自动刷新、手动刷新。
|
||||
- 新增 Worker 任务抽屉:查看 active/reserved/scheduled/recent 四类任务明细(含 args/kwargs/error)。
|
||||
- 数据源对齐新后端 API。
|
||||
|
||||
- 菜单与权限入口联动:
|
||||
- `api/app/services/seed_service.py`
|
||||
- 新增菜单 `admin.workers`(`/admin/workers`)。
|
||||
- admin 默认菜单绑定加入 `admin.workers`。
|
||||
- 调整后续菜单 `sort_order`,避免排序冲突。
|
||||
- `api/app/services/legacy_authz_service.py`
|
||||
- 增加 `admin.workers -> celery.read/celery.manage` 权限映射。
|
||||
- synthetic legacy menu rows 增加 `admin.workers`。
|
||||
- 同步顺序号,保持 legacy 菜单序一致。
|
||||
- `api/app/services/legacy_admin_rbac_service.py`
|
||||
- `PROTECTED_MENU_CODES` 加入 `admin.workers`,避免误删。
|
||||
- `api/app/services/admin_service.py`
|
||||
- 菜单删除保护集合加入 `admin.workers`。
|
||||
- `web/src/app/admin/menus/page.tsx`
|
||||
- 前端受保护菜单集合加入 `admin.workers`。
|
||||
- `web/src/app/admin/page.tsx`
|
||||
- 后台首页新增“Worker监控”卡片入口。
|
||||
|
||||
- 验证:
|
||||
- 后端语法检查:
|
||||
- `python3 -m py_compile api/app/schemas/worker_monitor.py api/app/services/worker_monitor_service.py api/app/api/v1/worker_monitor.py api/app/api/router.py api/app/services/seed_service.py api/app/services/legacy_authz_service.py api/app/services/admin_service.py` -> 通过。
|
||||
- 前端构建:
|
||||
- `rm -f web/.next/lock && npm run build:web` -> 通过。
|
||||
- 构建产物路由包含:`/admin/workers`、`/admin/task-monitor`。
|
||||
|
||||
- 风险与影响:
|
||||
- 影响范围:Celery 监控模块(新增 Worker 监控维度),权限仍复用 `celery.read/manage`。
|
||||
- `recent_tasks` 依赖 Celery Redis result backend 中任务元数据是否含 `worker/hostname` 字段;若上游未写入,recent 列表会偏少,但不影响 active/reserved/scheduled 实时快照。
|
||||
|
||||
## Work Log - 新增 Scheduler + Flower + Worker 自动注册并切换监控数据源(2026-05-01)
|
||||
|
||||
- 背景:
|
||||
- 新需求要求当前 `fquiz` 引入独立 `scheduler` 服务、`flower` 监控服务,监控页面改走 Flower,并提供 worker 自动注册机制。
|
||||
|
||||
- 本次改动(最小闭环):
|
||||
|
||||
- 后端新增 `scheduler` 调度 API:
|
||||
- `api/app/schemas/scheduler.py`
|
||||
- 定义任务入队/撤销请求响应模型。
|
||||
- `api/app/services/scheduler_service.py`
|
||||
- 封装 `celery_app.send_task` 与 `celery_app.control.revoke`。
|
||||
- `api/app/api/v1/scheduler.py`
|
||||
- 提供:
|
||||
- `GET /api/v1/scheduler/healthz`
|
||||
- `POST /api/v1/scheduler/v1/tasks/enqueue`
|
||||
- `POST /api/v1/scheduler/v1/tasks/revoke`
|
||||
- 支持 `x-scheduler-token`(环境变量配置后生效)。
|
||||
- `api/app/scheduler_main.py`
|
||||
- 独立 scheduler 进程入口(只挂 scheduler 路由)。
|
||||
- `api/app/api/router.py`
|
||||
- 主 API 路由挂载 scheduler 与 flower monitor 路由。
|
||||
|
||||
- 后端新增 Flower 监控代理层(页面不直连 Flower):
|
||||
- `api/app/schemas/flower_monitor.py`
|
||||
- 定义 worker 概览与任务快照模型。
|
||||
- `api/app/services/flower_monitor_service.py`
|
||||
- 代理调用 Flower API:
|
||||
- `/api/workers?refresh=...`
|
||||
- `/api/workers?status=true`
|
||||
- `/api/tasks?limit=...&workername=...`
|
||||
- 统一解析 worker/task 字段、状态、时间戳。
|
||||
- `api/app/api/v1/flower_monitor.py`
|
||||
- 提供:
|
||||
- `GET /api/v1/admin/flower/workers`
|
||||
- `GET /api/v1/admin/flower/worker-tasks`
|
||||
- 权限:`celery.read`/`celery.manage`。
|
||||
|
||||
- Worker 自动注册机制:
|
||||
- `api/app/models/worker_registry.py`
|
||||
- 新增 `worker_registry` 表模型。
|
||||
- `api/app/services/worker_registry_service.py`
|
||||
- worker 注册、离线标记、离线扫描。
|
||||
- `api/app/core/worker_signals.py`
|
||||
- 绑定 Celery 信号:`worker_ready` / `heartbeat_sent` / `worker_shutdown`。
|
||||
- `api/app/tasks/worker_registry_tasks.py`
|
||||
- 定时离线扫描任务。
|
||||
- `api/app/core/celery_app.py`
|
||||
- include 新任务模块,beat schedule 增加 `sweep_worker_registry_offline`。
|
||||
- 引入 worker signal 注册。
|
||||
- `api/app/core/database.py` + `api/app/models/__init__.py`
|
||||
- 把 `worker_registry` 纳入 ORM 初始化与建表。
|
||||
|
||||
- 监控页面切 Flower:
|
||||
- `web/src/app/admin/workers/page.tsx`
|
||||
- 改为请求 `/api/v1/admin/flower/workers` 与 `/api/v1/admin/flower/worker-tasks`。
|
||||
- 展示 Flower 维度字段(在线状态、并发、registered、processed、心跳、任务来源等)。
|
||||
- `web/src/app/admin/task-monitor/page.tsx`
|
||||
- 重构为 Flower 聚合视图:批量拉取每个 worker 任务快照并汇总状态分布/任务表。
|
||||
- `api/app/services/task_monitor_service.py`
|
||||
- 从原 inspect+redis 直读切换为基于 Flower 代理服务聚合,保持原接口契约。
|
||||
|
||||
- 调度链路接入 scheduler(可选):
|
||||
- `api/app/services/elevation_service.py`
|
||||
- `create_apply_job` 增加 `dispatch_mode`,支持 `scheduler_api`。
|
||||
- `api/app/api/v1/elevation.py` 新增 `dispatchMode` 查询参数透传。
|
||||
- 默认仍为 `celery_direct`,不破坏现有行为。
|
||||
|
||||
- 运行环境与编排配置:
|
||||
- `docker-compose.yml`
|
||||
- 新增 `scheduler` 服务(19100)。
|
||||
- 新增 `flower` 服务(5555,basic auth)。
|
||||
- `celery-worker` 增加 `CELERY_WORKER_QUEUES` 配置。
|
||||
- API/worker/beat 注入 `SCHEDULER_*`、`FLOWER_*`、`WORKER_REGISTRY_TTL_SECONDS`。
|
||||
- `.env.example`
|
||||
- 新增 `SCHEDULER_API_BASE_URL`、`SCHEDULER_API_TOKEN`、`SCHEDULER_DEFAULT_QUEUE`。
|
||||
- 新增 `FLOWER_API_BASE_URL`、`FLOWER_API_TIMEOUT_SECONDS`、`FLOWER_BASIC_AUTH`、`FLOWER_PORT`。
|
||||
- 新增 `WORKER_REGISTRY_TTL_SECONDS`、`CELERY_WORKER_QUEUES`、`SCHEDULER_PORT`。
|
||||
|
||||
- 验证:
|
||||
- 后端语法校验通过:
|
||||
- `python3 -m py_compile ...`(覆盖 scheduler/flower/worker_registry/celery/task-monitor 相关文件)
|
||||
- 前端构建通过:
|
||||
- `npm run build:web` -> 通过,页面路由包含 `/admin/workers`、`/admin/task-monitor`。
|
||||
- Compose 渲染校验通过:
|
||||
- `docker compose config` 渲染包含 `scheduler`、`flower` 服务以及关键 env/command 字段。
|
||||
|
||||
- 风险与影响:
|
||||
- Flower 代理链路新增了对 `FLOWER_BASIC_AUTH`、`FLOWER_API_BASE_URL` 的配置依赖,配置不一致会导致监控页 502。
|
||||
- `task-monitor` 改为按 worker 聚合调用,worker 数量多时会增加 API 聚合开销。
|
||||
- `dispatchMode=scheduler_api` 需确保 `scheduler` 服务可达;默认 `celery_direct` 兼容旧行为。
|
||||
|
||||
@@ -70,6 +70,7 @@ const PROTECTED_MENU_CODES = new Set([
|
||||
"admin.power_lines",
|
||||
"admin.lightning_currents",
|
||||
"admin.lightning_distribution",
|
||||
"admin.workers",
|
||||
"admin.data_query",
|
||||
"admin.hot_search",
|
||||
"admin.task_monitor",
|
||||
|
||||
@@ -6,6 +6,7 @@ import {
|
||||
AuditOutlined,
|
||||
CodeOutlined,
|
||||
DatabaseOutlined,
|
||||
DeploymentUnitOutlined,
|
||||
FileSearchOutlined,
|
||||
FolderOpenOutlined,
|
||||
GlobalOutlined,
|
||||
@@ -72,6 +73,14 @@ const CARDS: DashboardCard[] = [
|
||||
icon: <FolderOpenOutlined />,
|
||||
visible: (hasPermission) => hasPermission("file.read") || hasPermission("file.manage"),
|
||||
},
|
||||
{
|
||||
href: "/workers",
|
||||
title: "Worker监控",
|
||||
description: "查看 Worker 在线状态、并发、活跃队列和每个 Worker 的任务快照。",
|
||||
category: "协作",
|
||||
icon: <DeploymentUnitOutlined />,
|
||||
visible: (hasPermission) => hasPermission("celery.read") || hasPermission("celery.manage"),
|
||||
},
|
||||
{
|
||||
href: "/task-monitor",
|
||||
title: "任务监控",
|
||||
|
||||
@@ -11,8 +11,9 @@ import {
|
||||
Card,
|
||||
Col,
|
||||
Empty,
|
||||
InputNumber,
|
||||
Input,
|
||||
Row,
|
||||
Select,
|
||||
Space,
|
||||
Spin,
|
||||
Statistic,
|
||||
@@ -30,62 +31,67 @@ import { readApiError } from "@/lib/api";
|
||||
const { Text } = Typography;
|
||||
const AntCard = Card as unknown as ComponentType<CardProps>;
|
||||
|
||||
const DEFAULT_TASK_LIMIT = 100;
|
||||
const DEFAULT_HISTORY_LIMIT = 100;
|
||||
const DEFAULT_RECENT_LIMIT = 100;
|
||||
|
||||
type TaskMonitorBucketItem = {
|
||||
key: string;
|
||||
label: string;
|
||||
count: number;
|
||||
};
|
||||
|
||||
type TaskMonitorWorkerItem = {
|
||||
type FlowerWorkerItem = {
|
||||
worker: string;
|
||||
online: boolean;
|
||||
status: string;
|
||||
queue_names: string[];
|
||||
max_concurrency: number;
|
||||
registered_count: number;
|
||||
processed_count: number;
|
||||
concurrency: number;
|
||||
prefetch_count: number;
|
||||
uptime_seconds: number;
|
||||
processed_total: number;
|
||||
active_count: number;
|
||||
reserved_count: number;
|
||||
scheduled_count: number;
|
||||
last_heartbeat_at: string | null;
|
||||
};
|
||||
|
||||
type TaskMonitorQueueItem = {
|
||||
name: string;
|
||||
pending_count: number;
|
||||
consumer_count: number;
|
||||
active_count: number;
|
||||
reserved_count: number;
|
||||
scheduled_count: number;
|
||||
type FlowerWorkersOverviewResponse = {
|
||||
generated_at: string;
|
||||
workers: FlowerWorkerItem[];
|
||||
summary: {
|
||||
total: number;
|
||||
online: number;
|
||||
offline: number;
|
||||
};
|
||||
};
|
||||
|
||||
type TaskMonitorTaskItem = {
|
||||
type FlowerTaskItem = {
|
||||
task_id: string;
|
||||
name: string;
|
||||
state: string;
|
||||
source: string;
|
||||
worker: string;
|
||||
queue_name: string | null;
|
||||
worker: string | null;
|
||||
retries: number;
|
||||
args_text: string | null;
|
||||
kwargs_text: string | null;
|
||||
eta: string | null;
|
||||
received_at: string | null;
|
||||
started_at: string | null;
|
||||
done_at: string | null;
|
||||
finished_at: string | null;
|
||||
runtime_seconds: number | null;
|
||||
error: string | null;
|
||||
result_text: string | null;
|
||||
exception_text: string | null;
|
||||
};
|
||||
|
||||
type TaskMonitorOverviewResponse = {
|
||||
type FlowerWorkerTaskOverviewResponse = {
|
||||
generated_at: string;
|
||||
broker_url: string;
|
||||
result_backend: string;
|
||||
workers_online: number;
|
||||
worker_concurrency_total: number;
|
||||
queue_pending_total: number;
|
||||
task_state_buckets: TaskMonitorBucketItem[];
|
||||
workers: TaskMonitorWorkerItem[];
|
||||
queues: TaskMonitorQueueItem[];
|
||||
tasks: TaskMonitorTaskItem[];
|
||||
worker: string;
|
||||
active_tasks: FlowerTaskItem[];
|
||||
reserved_tasks: FlowerTaskItem[];
|
||||
scheduled_tasks: FlowerTaskItem[];
|
||||
recent_tasks: FlowerTaskItem[];
|
||||
summary: {
|
||||
active: number;
|
||||
reserved: number;
|
||||
scheduled: number;
|
||||
recent: number;
|
||||
};
|
||||
};
|
||||
|
||||
type TaskTableRow = FlowerTaskItem & {
|
||||
key: string;
|
||||
};
|
||||
|
||||
function formatDateTime(value: string | null | undefined): string {
|
||||
@@ -99,27 +105,6 @@ function formatDateTime(value: string | null | undefined): string {
|
||||
return parsed.format("YYYY-MM-DD HH:mm:ss");
|
||||
}
|
||||
|
||||
function formatDuration(seconds: number): string {
|
||||
const total = Math.max(0, Math.trunc(seconds));
|
||||
const hours = Math.floor(total / 3600);
|
||||
const minutes = Math.floor((total % 3600) / 60);
|
||||
const remain = total % 60;
|
||||
if (hours > 0) {
|
||||
return `${hours}h ${minutes}m ${remain}s`;
|
||||
}
|
||||
if (minutes > 0) {
|
||||
return `${minutes}m ${remain}s`;
|
||||
}
|
||||
return `${remain}s`;
|
||||
}
|
||||
|
||||
function normalizePositiveInt(value: number | null | undefined, fallback: number, min: number, max: number): number {
|
||||
if (typeof value !== "number" || Number.isNaN(value)) {
|
||||
return fallback;
|
||||
}
|
||||
return Math.min(max, Math.max(min, Math.trunc(value)));
|
||||
}
|
||||
|
||||
function renderTaskStateTag(state: string) {
|
||||
const normalized = (state || "").toUpperCase();
|
||||
const color =
|
||||
@@ -141,53 +126,96 @@ function renderTaskStateTag(state: string) {
|
||||
return <Tag color={color}>{normalized || "UNKNOWN"}</Tag>;
|
||||
}
|
||||
|
||||
function renderOnlineTag(online: boolean) {
|
||||
return online ? <Tag color="green">在线</Tag> : <Tag color="default">离线</Tag>;
|
||||
function renderWorkerStatusTag(status: string) {
|
||||
return (status || "").toUpperCase() === "ONLINE" ? <Tag color="green">在线</Tag> : <Tag color="default">离线</Tag>;
|
||||
}
|
||||
|
||||
function containsText(source: string | null | undefined, keyword: string): boolean {
|
||||
if (!keyword) {
|
||||
return true;
|
||||
}
|
||||
return (source || "").toLowerCase().includes(keyword.toLowerCase());
|
||||
}
|
||||
|
||||
function toTaskRows(workerName: string, source: string, tasks: FlowerTaskItem[]): TaskTableRow[] {
|
||||
return tasks.map((item, index) => ({
|
||||
...item,
|
||||
source: item.source || source,
|
||||
worker: item.worker || workerName,
|
||||
key: `${workerName}:${source}:${item.task_id}:${index + 1}`,
|
||||
}));
|
||||
}
|
||||
|
||||
function parseStatusFilter(value: string | undefined): "all" | "online" | "offline" {
|
||||
if (value === "online" || value === "offline") {
|
||||
return value;
|
||||
}
|
||||
return "all";
|
||||
}
|
||||
|
||||
export default function AdminTaskMonitorPage() {
|
||||
const { user, initializing, fetchWithAuth, hasPermission } = useAuth();
|
||||
const [taskLimit, setTaskLimit] = useState(DEFAULT_TASK_LIMIT);
|
||||
const [historyLimit, setHistoryLimit] = useState(DEFAULT_HISTORY_LIMIT);
|
||||
const [autoRefresh, setAutoRefresh] = useState(true);
|
||||
|
||||
const canRead = hasPermission("celery.read") || hasPermission("celery.manage");
|
||||
|
||||
const overviewPath = useMemo(() => {
|
||||
const params = new URLSearchParams();
|
||||
params.set("task_limit", String(taskLimit));
|
||||
params.set("history_limit", String(historyLimit));
|
||||
return `/api/v1/admin/task-monitor/overview?${params.toString()}`;
|
||||
}, [historyLimit, taskLimit]);
|
||||
const [autoRefresh, setAutoRefresh] = useState(true);
|
||||
const [workerKeyword, setWorkerKeyword] = useState("");
|
||||
const [queueKeyword, setQueueKeyword] = useState("");
|
||||
const [taskKeyword, setTaskKeyword] = useState("");
|
||||
const [statusFilter, setStatusFilter] = useState<"all" | "online" | "offline">("all");
|
||||
|
||||
const overviewQuery = useQuery({
|
||||
queryKey: ["task-monitor-overview", overviewPath],
|
||||
const workersOverviewQuery = useQuery({
|
||||
queryKey: ["flower-workers-overview"],
|
||||
enabled: Boolean(user) && canRead,
|
||||
queryFn: async () => {
|
||||
const response = await fetchWithAuth(overviewPath);
|
||||
const response = await fetchWithAuth("/api/v1/admin/flower/workers?forceRefresh=false");
|
||||
if (!response.ok) {
|
||||
throw new Error(await readApiError(response));
|
||||
}
|
||||
return (await response.json()) as TaskMonitorOverviewResponse;
|
||||
return (await response.json()) as FlowerWorkersOverviewResponse;
|
||||
},
|
||||
refetchInterval: autoRefresh ? 5_000 : false,
|
||||
staleTime: 15_000,
|
||||
});
|
||||
|
||||
const workerColumns = useMemo<TableColumnsType<TaskMonitorWorkerItem>>(
|
||||
const workerNames = useMemo(() => (workersOverviewQuery.data?.workers || []).map((item) => item.worker), [workersOverviewQuery.data?.workers]);
|
||||
|
||||
const allTasksQuery = useQuery({
|
||||
queryKey: ["flower-worker-tasks-batch", workerNames],
|
||||
enabled: Boolean(user) && canRead && workerNames.length > 0,
|
||||
queryFn: async () => {
|
||||
const settled = await Promise.all(
|
||||
workerNames.map(async (worker) => {
|
||||
const params = new URLSearchParams();
|
||||
params.set("worker", worker);
|
||||
params.set("recentLimit", String(DEFAULT_RECENT_LIMIT));
|
||||
params.set("forceRefresh", "false");
|
||||
const response = await fetchWithAuth(`/api/v1/admin/flower/worker-tasks?${params.toString()}`);
|
||||
if (!response.ok) {
|
||||
throw new Error(await readApiError(response));
|
||||
}
|
||||
return (await response.json()) as FlowerWorkerTaskOverviewResponse;
|
||||
}),
|
||||
);
|
||||
return settled;
|
||||
},
|
||||
refetchInterval: autoRefresh ? 5_000 : false,
|
||||
staleTime: 15_000,
|
||||
});
|
||||
|
||||
const workerColumns = useMemo<TableColumnsType<FlowerWorkerItem>>(
|
||||
() => [
|
||||
{
|
||||
title: "Worker",
|
||||
dataIndex: "worker",
|
||||
key: "worker",
|
||||
width: 260,
|
||||
width: 280,
|
||||
},
|
||||
{
|
||||
title: "状态",
|
||||
dataIndex: "online",
|
||||
key: "online",
|
||||
dataIndex: "status",
|
||||
key: "status",
|
||||
width: 90,
|
||||
render: (value: boolean) => renderOnlineTag(value),
|
||||
render: (value: string) => renderWorkerStatusTag(value),
|
||||
},
|
||||
{
|
||||
title: "队列",
|
||||
@@ -197,28 +225,27 @@ export default function AdminTaskMonitorPage() {
|
||||
},
|
||||
{
|
||||
title: "并发",
|
||||
dataIndex: "max_concurrency",
|
||||
key: "max_concurrency",
|
||||
width: 90,
|
||||
dataIndex: "concurrency",
|
||||
key: "concurrency",
|
||||
width: 80,
|
||||
},
|
||||
{
|
||||
title: "预取",
|
||||
title: "Prefetch",
|
||||
dataIndex: "prefetch_count",
|
||||
key: "prefetch_count",
|
||||
width: 90,
|
||||
},
|
||||
{
|
||||
title: "在线时长",
|
||||
dataIndex: "uptime_seconds",
|
||||
key: "uptime_seconds",
|
||||
width: 120,
|
||||
render: (value: number) => formatDuration(value),
|
||||
title: "已注册任务",
|
||||
dataIndex: "registered_count",
|
||||
key: "registered_count",
|
||||
width: 110,
|
||||
},
|
||||
{
|
||||
title: "累计处理",
|
||||
dataIndex: "processed_total",
|
||||
key: "processed_total",
|
||||
width: 120,
|
||||
dataIndex: "processed_count",
|
||||
key: "processed_count",
|
||||
width: 110,
|
||||
},
|
||||
{
|
||||
title: "Active/Reserved/Scheduled",
|
||||
@@ -226,65 +253,31 @@ export default function AdminTaskMonitorPage() {
|
||||
width: 190,
|
||||
render: (_: unknown, record) => `${record.active_count}/${record.reserved_count}/${record.scheduled_count}`,
|
||||
},
|
||||
],
|
||||
[],
|
||||
);
|
||||
|
||||
const queueColumns = useMemo<TableColumnsType<TaskMonitorQueueItem>>(
|
||||
() => [
|
||||
{
|
||||
title: "队列",
|
||||
dataIndex: "name",
|
||||
key: "name",
|
||||
},
|
||||
{
|
||||
title: "Pending",
|
||||
dataIndex: "pending_count",
|
||||
key: "pending_count",
|
||||
width: 110,
|
||||
},
|
||||
{
|
||||
title: "Consumer",
|
||||
dataIndex: "consumer_count",
|
||||
key: "consumer_count",
|
||||
width: 110,
|
||||
},
|
||||
{
|
||||
title: "Active",
|
||||
dataIndex: "active_count",
|
||||
key: "active_count",
|
||||
width: 100,
|
||||
},
|
||||
{
|
||||
title: "Reserved",
|
||||
dataIndex: "reserved_count",
|
||||
key: "reserved_count",
|
||||
width: 110,
|
||||
},
|
||||
{
|
||||
title: "Scheduled",
|
||||
dataIndex: "scheduled_count",
|
||||
key: "scheduled_count",
|
||||
width: 120,
|
||||
title: "最近心跳",
|
||||
dataIndex: "last_heartbeat_at",
|
||||
key: "last_heartbeat_at",
|
||||
width: 170,
|
||||
render: (value: string | null) => formatDateTime(value),
|
||||
},
|
||||
],
|
||||
[],
|
||||
);
|
||||
|
||||
const taskColumns = useMemo<TableColumnsType<TaskMonitorTaskItem>>(
|
||||
const taskColumns = useMemo<TableColumnsType<TaskTableRow>>(
|
||||
() => [
|
||||
{
|
||||
title: "Task ID",
|
||||
dataIndex: "task_id",
|
||||
key: "task_id",
|
||||
width: 280,
|
||||
width: 260,
|
||||
render: (value: string) => <Text copyable>{value}</Text>,
|
||||
},
|
||||
{
|
||||
title: "任务名",
|
||||
dataIndex: "name",
|
||||
key: "name",
|
||||
width: 260,
|
||||
width: 220,
|
||||
render: (value: string) => value || "-",
|
||||
},
|
||||
{
|
||||
@@ -294,11 +287,18 @@ export default function AdminTaskMonitorPage() {
|
||||
width: 110,
|
||||
render: (value: string) => renderTaskStateTag(value),
|
||||
},
|
||||
{
|
||||
title: "来源",
|
||||
dataIndex: "source",
|
||||
key: "source",
|
||||
width: 100,
|
||||
render: (value: string) => value || "-",
|
||||
},
|
||||
{
|
||||
title: "队列",
|
||||
dataIndex: "queue_name",
|
||||
key: "queue_name",
|
||||
width: 130,
|
||||
width: 120,
|
||||
render: (value: string | null) => value || "-",
|
||||
},
|
||||
{
|
||||
@@ -306,18 +306,12 @@ export default function AdminTaskMonitorPage() {
|
||||
dataIndex: "worker",
|
||||
key: "worker",
|
||||
width: 220,
|
||||
render: (value: string | null) => value || "-",
|
||||
render: (value: string) => value || "-",
|
||||
},
|
||||
{
|
||||
title: "重试",
|
||||
dataIndex: "retries",
|
||||
key: "retries",
|
||||
width: 80,
|
||||
},
|
||||
{
|
||||
title: "ETA",
|
||||
dataIndex: "eta",
|
||||
key: "eta",
|
||||
title: "接收",
|
||||
dataIndex: "received_at",
|
||||
key: "received_at",
|
||||
width: 170,
|
||||
render: (value: string | null) => formatDateTime(value),
|
||||
},
|
||||
@@ -330,8 +324,8 @@ export default function AdminTaskMonitorPage() {
|
||||
},
|
||||
{
|
||||
title: "完成",
|
||||
dataIndex: "done_at",
|
||||
key: "done_at",
|
||||
dataIndex: "finished_at",
|
||||
key: "finished_at",
|
||||
width: 170,
|
||||
render: (value: string | null) => formatDateTime(value),
|
||||
},
|
||||
@@ -340,13 +334,27 @@ export default function AdminTaskMonitorPage() {
|
||||
dataIndex: "runtime_seconds",
|
||||
key: "runtime_seconds",
|
||||
width: 110,
|
||||
render: (value: number | null) => (value === null ? "-" : `${value.toFixed(1)}s`),
|
||||
render: (value: number | null) => (value === null ? "-" : `${value.toFixed(3)}s`),
|
||||
},
|
||||
{
|
||||
title: "错误",
|
||||
dataIndex: "error",
|
||||
key: "error",
|
||||
width: 260,
|
||||
title: "Args",
|
||||
dataIndex: "args_text",
|
||||
key: "args_text",
|
||||
width: 220,
|
||||
render: (value: string | null) => (value ? <Text ellipsis={{ tooltip: value }}>{value}</Text> : "-"),
|
||||
},
|
||||
{
|
||||
title: "Kwargs",
|
||||
dataIndex: "kwargs_text",
|
||||
key: "kwargs_text",
|
||||
width: 220,
|
||||
render: (value: string | null) => (value ? <Text ellipsis={{ tooltip: value }}>{value}</Text> : "-"),
|
||||
},
|
||||
{
|
||||
title: "Exception",
|
||||
dataIndex: "exception_text",
|
||||
key: "exception_text",
|
||||
width: 220,
|
||||
render: (value: string | null) =>
|
||||
value ? (
|
||||
<Text type="danger" ellipsis={{ tooltip: value }}>
|
||||
@@ -360,7 +368,75 @@ export default function AdminTaskMonitorPage() {
|
||||
[],
|
||||
);
|
||||
|
||||
if (initializing || (overviewQuery.isLoading && !overviewQuery.data && canRead && Boolean(user))) {
|
||||
const filteredWorkers = useMemo(() => {
|
||||
const rows = workersOverviewQuery.data?.workers || [];
|
||||
return rows.filter((item) => {
|
||||
const online = (item.status || "").toUpperCase() === "ONLINE";
|
||||
if (statusFilter === "online" && !online) {
|
||||
return false;
|
||||
}
|
||||
if (statusFilter === "offline" && online) {
|
||||
return false;
|
||||
}
|
||||
if (!containsText(item.worker, workerKeyword.trim())) {
|
||||
return false;
|
||||
}
|
||||
if (queueKeyword.trim()) {
|
||||
const text = item.queue_names.join(", ");
|
||||
if (!containsText(text, queueKeyword.trim())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}, [workersOverviewQuery.data?.workers, statusFilter, workerKeyword, queueKeyword]);
|
||||
|
||||
const allTaskRows = useMemo(() => {
|
||||
const workerTaskOverviews = allTasksQuery.data || [];
|
||||
const rows: TaskTableRow[] = [];
|
||||
for (const overview of workerTaskOverviews) {
|
||||
rows.push(...toTaskRows(overview.worker, "ACTIVE", overview.active_tasks));
|
||||
rows.push(...toTaskRows(overview.worker, "RESERVED", overview.reserved_tasks));
|
||||
rows.push(...toTaskRows(overview.worker, "SCHEDULED", overview.scheduled_tasks));
|
||||
rows.push(...toTaskRows(overview.worker, "RECENT", overview.recent_tasks));
|
||||
}
|
||||
return rows;
|
||||
}, [allTasksQuery.data]);
|
||||
|
||||
const filteredTaskRows = useMemo(() => {
|
||||
const workerSet = new Set(filteredWorkers.map((item) => item.worker));
|
||||
const keyword = taskKeyword.trim();
|
||||
return allTaskRows.filter((item) => {
|
||||
if (!workerSet.has(item.worker)) {
|
||||
return false;
|
||||
}
|
||||
if (!keyword) {
|
||||
return true;
|
||||
}
|
||||
const haystack = [item.task_id, item.name, item.queue_name || "", item.worker, item.args_text || "", item.kwargs_text || ""]
|
||||
.join(" ")
|
||||
.toLowerCase();
|
||||
return haystack.includes(keyword.toLowerCase());
|
||||
});
|
||||
}, [allTaskRows, filteredWorkers, taskKeyword]);
|
||||
|
||||
const stateBuckets = useMemo(() => {
|
||||
const counts = new Map<string, number>();
|
||||
for (const row of filteredTaskRows) {
|
||||
const state = (row.state || "UNKNOWN").toUpperCase();
|
||||
counts.set(state, (counts.get(state) || 0) + 1);
|
||||
}
|
||||
return Array.from(counts.entries())
|
||||
.sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]))
|
||||
.map(([state, count]) => ({ state, count }));
|
||||
}, [filteredTaskRows]);
|
||||
|
||||
const queuePendingTotal = filteredWorkers.reduce(
|
||||
(sum, item) => sum + item.active_count + item.reserved_count + item.scheduled_count,
|
||||
0,
|
||||
);
|
||||
|
||||
if (initializing || (workersOverviewQuery.isLoading && !workersOverviewQuery.data && canRead && Boolean(user))) {
|
||||
return (
|
||||
<div className="py-10">
|
||||
<Spin tip="任务监控数据加载中..." />
|
||||
@@ -396,85 +472,111 @@ export default function AdminTaskMonitorPage() {
|
||||
);
|
||||
}
|
||||
|
||||
const overview = overviewQuery.data;
|
||||
const workersOverview = workersOverviewQuery.data;
|
||||
|
||||
return (
|
||||
<Space direction="vertical" size={16} style={{ width: "100%" }}>
|
||||
<AntCard>
|
||||
<Space size={16} wrap>
|
||||
<Space size={8}>
|
||||
<Text>任务列表上限</Text>
|
||||
<InputNumber
|
||||
min={1}
|
||||
max={500}
|
||||
value={taskLimit}
|
||||
onChange={(value) => setTaskLimit(normalizePositiveInt(value, DEFAULT_TASK_LIMIT, 1, 500))}
|
||||
/>
|
||||
</Space>
|
||||
<Space size={8}>
|
||||
<Text>历史任务扫描上限</Text>
|
||||
<InputNumber
|
||||
min={0}
|
||||
max={500}
|
||||
value={historyLimit}
|
||||
onChange={(value) => setHistoryLimit(normalizePositiveInt(value, DEFAULT_HISTORY_LIMIT, 0, 500))}
|
||||
/>
|
||||
</Space>
|
||||
<Input
|
||||
allowClear
|
||||
placeholder="按 Worker 名称筛选"
|
||||
value={workerKeyword}
|
||||
onChange={(event) => setWorkerKeyword(event.target.value)}
|
||||
style={{ width: 220 }}
|
||||
/>
|
||||
<Input
|
||||
allowClear
|
||||
placeholder="按队列名称筛选"
|
||||
value={queueKeyword}
|
||||
onChange={(event) => setQueueKeyword(event.target.value)}
|
||||
style={{ width: 220 }}
|
||||
/>
|
||||
<Input
|
||||
allowClear
|
||||
placeholder="按 Task ID/任务名筛选"
|
||||
value={taskKeyword}
|
||||
onChange={(event) => setTaskKeyword(event.target.value)}
|
||||
style={{ width: 240 }}
|
||||
/>
|
||||
<Select
|
||||
value={statusFilter}
|
||||
onChange={(value) => setStatusFilter(parseStatusFilter(value))}
|
||||
options={[
|
||||
{ label: "全部状态", value: "all" },
|
||||
{ label: "在线", value: "online" },
|
||||
{ label: "离线", value: "offline" },
|
||||
]}
|
||||
style={{ width: 150 }}
|
||||
/>
|
||||
<Space size={8}>
|
||||
<Text>自动刷新</Text>
|
||||
<Switch checked={autoRefresh} onChange={setAutoRefresh} />
|
||||
</Space>
|
||||
<Button onClick={() => void overviewQuery.refetch()} loading={overviewQuery.isFetching}>
|
||||
<Button
|
||||
onClick={() => {
|
||||
void workersOverviewQuery.refetch();
|
||||
void allTasksQuery.refetch();
|
||||
}}
|
||||
loading={workersOverviewQuery.isFetching || allTasksQuery.isFetching}
|
||||
>
|
||||
刷新监控数据
|
||||
</Button>
|
||||
<Text type="secondary">生成时间:{formatDateTime(overview?.generated_at)}</Text>
|
||||
<Text type="secondary">生成时间:{formatDateTime(workersOverview?.generated_at)}</Text>
|
||||
</Space>
|
||||
</AntCard>
|
||||
|
||||
{overviewQuery.error && (
|
||||
{workersOverviewQuery.error && (
|
||||
<Alert
|
||||
type="error"
|
||||
showIcon
|
||||
message={overviewQuery.error instanceof Error ? overviewQuery.error.message : "任务监控数据加载失败"}
|
||||
message={workersOverviewQuery.error instanceof Error ? workersOverviewQuery.error.message : "任务监控数据加载失败"}
|
||||
/>
|
||||
)}
|
||||
{allTasksQuery.error && (
|
||||
<Alert
|
||||
type="error"
|
||||
showIcon
|
||||
message={allTasksQuery.error instanceof Error ? allTasksQuery.error.message : "任务列表数据加载失败"}
|
||||
/>
|
||||
)}
|
||||
|
||||
{!overview && !overviewQuery.isFetching && (
|
||||
{!workersOverview && !workersOverviewQuery.isFetching && (
|
||||
<AntCard>
|
||||
<Empty description="暂无任务监控数据" />
|
||||
</AntCard>
|
||||
)}
|
||||
|
||||
{overview && (
|
||||
{workersOverview && (
|
||||
<>
|
||||
<Row gutter={[16, 16]}>
|
||||
<Col xs={24} md={6}>
|
||||
<AntCard>
|
||||
<Statistic title="在线 Worker" value={overview.workers_online} />
|
||||
<Statistic title="在线 Worker" value={workersOverview.summary.online} />
|
||||
</AntCard>
|
||||
</Col>
|
||||
<Col xs={24} md={6}>
|
||||
<AntCard>
|
||||
<Statistic title="总并发" value={overview.worker_concurrency_total} />
|
||||
<Statistic title="离线 Worker" value={workersOverview.summary.offline} />
|
||||
</AntCard>
|
||||
</Col>
|
||||
<Col xs={24} md={6}>
|
||||
<AntCard>
|
||||
<Statistic title="队列待处理" value={overview.queue_pending_total} />
|
||||
<Statistic title="队列待处理" value={queuePendingTotal} />
|
||||
</AntCard>
|
||||
</Col>
|
||||
<Col xs={24} md={6}>
|
||||
<AntCard>
|
||||
<Statistic title="采样任务数" value={overview.tasks.length} />
|
||||
<Statistic title="采样任务数" value={filteredTaskRows.length} />
|
||||
</AntCard>
|
||||
</Col>
|
||||
</Row>
|
||||
|
||||
<AntCard title="任务状态分布">
|
||||
<Space wrap>
|
||||
{overview.task_state_buckets.length > 0 ? (
|
||||
overview.task_state_buckets.map((item) => (
|
||||
<Tag key={`task-state-${item.key}`} color="geekblue">{`${item.label}: ${item.count}`}</Tag>
|
||||
{stateBuckets.length > 0 ? (
|
||||
stateBuckets.map((item) => (
|
||||
<Tag key={`task-state-${item.state}`} color="geekblue">{`${item.state}: ${item.count}`}</Tag>
|
||||
))
|
||||
) : (
|
||||
<Text type="secondary">暂无状态分布数据</Text>
|
||||
@@ -482,36 +584,25 @@ export default function AdminTaskMonitorPage() {
|
||||
</Space>
|
||||
</AntCard>
|
||||
|
||||
<AntCard title="Worker 概览" extra={<Text type="secondary">Broker: {overview.broker_url || "-"}</Text>}>
|
||||
<Table<TaskMonitorWorkerItem>
|
||||
<AntCard title="Worker 概览">
|
||||
<Table<FlowerWorkerItem>
|
||||
rowKey={(record) => record.worker}
|
||||
columns={workerColumns}
|
||||
dataSource={overview.workers}
|
||||
dataSource={filteredWorkers}
|
||||
pagination={false}
|
||||
locale={{ emptyText: "暂无 Worker 数据" }}
|
||||
scroll={{ x: 1200 }}
|
||||
/>
|
||||
</AntCard>
|
||||
|
||||
<AntCard title="Queue 概览" extra={<Text type="secondary">Result Backend: {overview.result_backend || "-"}</Text>}>
|
||||
<Table<TaskMonitorQueueItem>
|
||||
rowKey={(record) => record.name}
|
||||
columns={queueColumns}
|
||||
dataSource={overview.queues}
|
||||
pagination={false}
|
||||
locale={{ emptyText: "暂无 Queue 数据" }}
|
||||
scroll={{ x: 760 }}
|
||||
scroll={{ x: 1500 }}
|
||||
/>
|
||||
</AntCard>
|
||||
|
||||
<AntCard title="任务明细">
|
||||
<Table<TaskMonitorTaskItem>
|
||||
rowKey={(record) => record.task_id}
|
||||
<Table<TaskTableRow>
|
||||
rowKey={(record) => record.key}
|
||||
columns={taskColumns}
|
||||
dataSource={overview.tasks}
|
||||
pagination={false}
|
||||
dataSource={filteredTaskRows}
|
||||
pagination={{ pageSize: 50, showSizeChanger: true }}
|
||||
locale={{ emptyText: "暂无任务数据" }}
|
||||
scroll={{ x: 2200 }}
|
||||
scroll={{ x: 2600 }}
|
||||
/>
|
||||
</AntCard>
|
||||
</>
|
||||
|
||||
@@ -0,0 +1,627 @@
|
||||
"use client";
|
||||
|
||||
import Link from "next/link";
|
||||
import dayjs from "dayjs";
|
||||
import { useMemo, useState } from "react";
|
||||
import type { ComponentType } from "react";
|
||||
import { useQuery } from "@tanstack/react-query";
|
||||
import {
|
||||
Alert,
|
||||
Button,
|
||||
Card,
|
||||
Col,
|
||||
Drawer,
|
||||
Empty,
|
||||
Input,
|
||||
Row,
|
||||
Select,
|
||||
Space,
|
||||
Spin,
|
||||
Statistic,
|
||||
Switch,
|
||||
Table,
|
||||
Tag,
|
||||
Typography,
|
||||
type CardProps,
|
||||
type TableColumnsType,
|
||||
} from "antd";
|
||||
|
||||
import { useAuth } from "@/components/auth-provider";
|
||||
import { readApiError } from "@/lib/api";
|
||||
|
||||
const { Text } = Typography;
|
||||
const AntCard = Card as unknown as ComponentType<CardProps>;
|
||||
|
||||
const DEFAULT_RECENT_LIMIT = 100;
|
||||
|
||||
type WorkerMonitorWorkerItem = {
|
||||
worker: string;
|
||||
status: string;
|
||||
queue_names: string[];
|
||||
concurrency: number;
|
||||
prefetch_count: number;
|
||||
processed_count: number;
|
||||
active_count: number;
|
||||
reserved_count: number;
|
||||
scheduled_count: number;
|
||||
registered_count: number;
|
||||
last_heartbeat_at: string | null;
|
||||
};
|
||||
|
||||
type WorkerMonitorOverviewResponse = {
|
||||
generated_at: string;
|
||||
summary: {
|
||||
total: number;
|
||||
online: number;
|
||||
offline: number;
|
||||
};
|
||||
workers: WorkerMonitorWorkerItem[];
|
||||
};
|
||||
|
||||
type WorkerMonitorTaskItem = {
|
||||
task_id: string;
|
||||
name: string;
|
||||
state: string;
|
||||
source: string;
|
||||
queue_name: string | null;
|
||||
worker: string;
|
||||
args_text: string | null;
|
||||
kwargs_text: string | null;
|
||||
eta: string | null;
|
||||
received_at: string | null;
|
||||
started_at: string | null;
|
||||
finished_at: string | null;
|
||||
runtime_seconds: number | null;
|
||||
result_text: string | null;
|
||||
exception_text: string | null;
|
||||
};
|
||||
|
||||
type WorkerMonitorTaskOverviewResponse = {
|
||||
generated_at: string;
|
||||
worker: string;
|
||||
summary: {
|
||||
active: number;
|
||||
reserved: number;
|
||||
scheduled: number;
|
||||
recent: number;
|
||||
};
|
||||
active_tasks: WorkerMonitorTaskItem[];
|
||||
reserved_tasks: WorkerMonitorTaskItem[];
|
||||
scheduled_tasks: WorkerMonitorTaskItem[];
|
||||
recent_tasks: WorkerMonitorTaskItem[];
|
||||
};
|
||||
|
||||
function formatDateTime(value: string | null | undefined): string {
|
||||
if (!value) {
|
||||
return "-";
|
||||
}
|
||||
const parsed = dayjs(value);
|
||||
if (!parsed.isValid()) {
|
||||
return "-";
|
||||
}
|
||||
return parsed.format("YYYY-MM-DD HH:mm:ss");
|
||||
}
|
||||
|
||||
function renderTaskStateTag(state: string) {
|
||||
const normalized = (state || "").toUpperCase();
|
||||
const color =
|
||||
normalized === "STARTED"
|
||||
? "processing"
|
||||
: normalized === "RECEIVED"
|
||||
? "blue"
|
||||
: normalized === "SCHEDULED"
|
||||
? "purple"
|
||||
: normalized === "RETRY"
|
||||
? "orange"
|
||||
: normalized === "SUCCESS"
|
||||
? "green"
|
||||
: normalized === "FAILURE"
|
||||
? "red"
|
||||
: normalized === "REVOKED"
|
||||
? "default"
|
||||
: "geekblue";
|
||||
return <Tag color={color}>{normalized || "UNKNOWN"}</Tag>;
|
||||
}
|
||||
|
||||
function renderWorkerStatusTag(status: string) {
|
||||
return (status || "").toUpperCase() === "ONLINE" ? <Tag color="green">在线</Tag> : <Tag color="default">离线</Tag>;
|
||||
}
|
||||
|
||||
function containsText(source: string | null | undefined, keyword: string): boolean {
|
||||
if (!keyword) {
|
||||
return true;
|
||||
}
|
||||
return (source || "").toLowerCase().includes(keyword.toLowerCase());
|
||||
}
|
||||
|
||||
function parseStatusFilter(value: string | undefined): "all" | "online" | "offline" {
|
||||
if (value === "online" || value === "offline") {
|
||||
return value;
|
||||
}
|
||||
return "all";
|
||||
}
|
||||
|
||||
export default function AdminWorkersPage() {
|
||||
const { user, initializing, fetchWithAuth, hasPermission } = useAuth();
|
||||
const canRead = hasPermission("celery.read") || hasPermission("celery.manage");
|
||||
|
||||
const [autoRefresh, setAutoRefresh] = useState(true);
|
||||
const [workerKeyword, setWorkerKeyword] = useState("");
|
||||
const [queueKeyword, setQueueKeyword] = useState("");
|
||||
const [statusFilter, setStatusFilter] = useState<"all" | "online" | "offline">("all");
|
||||
const [selectedWorker, setSelectedWorker] = useState<string | null>(null);
|
||||
|
||||
const overviewQuery = useQuery({
|
||||
queryKey: ["worker-monitor-overview"],
|
||||
enabled: Boolean(user) && canRead,
|
||||
queryFn: async () => {
|
||||
const response = await fetchWithAuth("/api/v1/admin/flower/workers?forceRefresh=false");
|
||||
if (!response.ok) {
|
||||
throw new Error(await readApiError(response));
|
||||
}
|
||||
return (await response.json()) as WorkerMonitorOverviewResponse;
|
||||
},
|
||||
refetchInterval: autoRefresh ? 5_000 : false,
|
||||
staleTime: 15_000,
|
||||
});
|
||||
|
||||
const workerTasksPath = useMemo(() => {
|
||||
if (!selectedWorker) {
|
||||
return null;
|
||||
}
|
||||
const params = new URLSearchParams();
|
||||
params.set("worker", selectedWorker);
|
||||
params.set("recentLimit", String(DEFAULT_RECENT_LIMIT));
|
||||
params.set("forceRefresh", "false");
|
||||
return `/api/v1/admin/flower/worker-tasks?${params.toString()}`;
|
||||
}, [selectedWorker]);
|
||||
|
||||
const workerTasksQuery = useQuery({
|
||||
queryKey: ["worker-monitor-tasks", workerTasksPath],
|
||||
enabled: Boolean(user) && canRead && Boolean(workerTasksPath),
|
||||
queryFn: async () => {
|
||||
if (!workerTasksPath) {
|
||||
throw new Error("missing worker path");
|
||||
}
|
||||
const response = await fetchWithAuth(workerTasksPath);
|
||||
if (!response.ok) {
|
||||
throw new Error(await readApiError(response));
|
||||
}
|
||||
return (await response.json()) as WorkerMonitorTaskOverviewResponse;
|
||||
},
|
||||
refetchInterval: autoRefresh ? 5_000 : false,
|
||||
staleTime: 15_000,
|
||||
});
|
||||
|
||||
const workerColumns = useMemo<TableColumnsType<WorkerMonitorWorkerItem>>(
|
||||
() => [
|
||||
{
|
||||
title: "Worker",
|
||||
dataIndex: "worker",
|
||||
key: "worker",
|
||||
width: 280,
|
||||
},
|
||||
{
|
||||
title: "状态",
|
||||
dataIndex: "status",
|
||||
key: "status",
|
||||
width: 90,
|
||||
render: (value: string) => renderWorkerStatusTag(value),
|
||||
},
|
||||
{
|
||||
title: "队列",
|
||||
dataIndex: "queue_names",
|
||||
key: "queue_names",
|
||||
render: (value: string[]) => (value.length > 0 ? value.join(", ") : "-"),
|
||||
},
|
||||
{
|
||||
title: "并发",
|
||||
dataIndex: "concurrency",
|
||||
key: "concurrency",
|
||||
width: 80,
|
||||
},
|
||||
{
|
||||
title: "Prefetch",
|
||||
dataIndex: "prefetch_count",
|
||||
key: "prefetch_count",
|
||||
width: 90,
|
||||
},
|
||||
{
|
||||
title: "已注册任务",
|
||||
dataIndex: "registered_count",
|
||||
key: "registered_count",
|
||||
width: 110,
|
||||
},
|
||||
{
|
||||
title: "累计处理",
|
||||
dataIndex: "processed_count",
|
||||
key: "processed_count",
|
||||
width: 110,
|
||||
},
|
||||
{
|
||||
title: "最近心跳",
|
||||
dataIndex: "last_heartbeat_at",
|
||||
key: "last_heartbeat_at",
|
||||
width: 170,
|
||||
render: (value: string | null) => formatDateTime(value),
|
||||
},
|
||||
{
|
||||
title: "Active/Reserved/Scheduled",
|
||||
key: "runtime_counts",
|
||||
width: 190,
|
||||
render: (_: unknown, record) => `${record.active_count}/${record.reserved_count}/${record.scheduled_count}`,
|
||||
},
|
||||
{
|
||||
title: "操作",
|
||||
key: "action",
|
||||
width: 120,
|
||||
fixed: "right",
|
||||
render: (_: unknown, record) => (
|
||||
<Button size="small" onClick={() => setSelectedWorker(record.worker)}>
|
||||
查看任务
|
||||
</Button>
|
||||
),
|
||||
},
|
||||
],
|
||||
[],
|
||||
);
|
||||
|
||||
const taskColumns = useMemo<TableColumnsType<WorkerMonitorTaskItem>>(
|
||||
() => [
|
||||
{
|
||||
title: "Task ID",
|
||||
dataIndex: "task_id",
|
||||
key: "task_id",
|
||||
width: 260,
|
||||
render: (value: string) => <Text copyable>{value}</Text>,
|
||||
},
|
||||
{
|
||||
title: "任务名",
|
||||
dataIndex: "name",
|
||||
key: "name",
|
||||
width: 220,
|
||||
render: (value: string) => value || "-",
|
||||
},
|
||||
{
|
||||
title: "状态",
|
||||
dataIndex: "state",
|
||||
key: "state",
|
||||
width: 110,
|
||||
render: (value: string) => renderTaskStateTag(value),
|
||||
},
|
||||
{
|
||||
title: "来源",
|
||||
dataIndex: "source",
|
||||
key: "source",
|
||||
width: 90,
|
||||
render: (value: string) => value || "-",
|
||||
},
|
||||
{
|
||||
title: "队列",
|
||||
dataIndex: "queue_name",
|
||||
key: "queue_name",
|
||||
width: 120,
|
||||
render: (value: string | null) => value || "-",
|
||||
},
|
||||
{
|
||||
title: "Worker",
|
||||
dataIndex: "worker",
|
||||
key: "worker",
|
||||
width: 220,
|
||||
render: (value: string) => value || "-",
|
||||
},
|
||||
{
|
||||
title: "ETA",
|
||||
dataIndex: "eta",
|
||||
key: "eta",
|
||||
width: 170,
|
||||
render: (value: string | null) => formatDateTime(value),
|
||||
},
|
||||
{
|
||||
title: "接收",
|
||||
dataIndex: "received_at",
|
||||
key: "received_at",
|
||||
width: 170,
|
||||
render: (value: string | null) => formatDateTime(value),
|
||||
},
|
||||
{
|
||||
title: "开始",
|
||||
dataIndex: "started_at",
|
||||
key: "started_at",
|
||||
width: 170,
|
||||
render: (value: string | null) => formatDateTime(value),
|
||||
},
|
||||
{
|
||||
title: "完成",
|
||||
dataIndex: "finished_at",
|
||||
key: "finished_at",
|
||||
width: 170,
|
||||
render: (value: string | null) => formatDateTime(value),
|
||||
},
|
||||
{
|
||||
title: "运行时长",
|
||||
dataIndex: "runtime_seconds",
|
||||
key: "runtime_seconds",
|
||||
width: 110,
|
||||
render: (value: number | null) => (value === null ? "-" : `${value.toFixed(3)}s`),
|
||||
},
|
||||
{
|
||||
title: "Args",
|
||||
dataIndex: "args_text",
|
||||
key: "args_text",
|
||||
width: 220,
|
||||
render: (value: string | null) => (value ? <Text ellipsis={{ tooltip: value }}>{value}</Text> : "-"),
|
||||
},
|
||||
{
|
||||
title: "Kwargs",
|
||||
dataIndex: "kwargs_text",
|
||||
key: "kwargs_text",
|
||||
width: 220,
|
||||
render: (value: string | null) => (value ? <Text ellipsis={{ tooltip: value }}>{value}</Text> : "-"),
|
||||
},
|
||||
{
|
||||
title: "Result",
|
||||
dataIndex: "result_text",
|
||||
key: "result_text",
|
||||
width: 220,
|
||||
render: (value: string | null) =>
|
||||
value ? (
|
||||
<Text ellipsis={{ tooltip: value }}>
|
||||
{value}
|
||||
</Text>
|
||||
) : (
|
||||
"-"
|
||||
),
|
||||
},
|
||||
{
|
||||
title: "Exception",
|
||||
dataIndex: "exception_text",
|
||||
key: "exception_text",
|
||||
width: 220,
|
||||
render: (value: string | null) =>
|
||||
value ? (
|
||||
<Text type="danger" ellipsis={{ tooltip: value }}>
|
||||
{value}
|
||||
</Text>
|
||||
) : (
|
||||
"-"
|
||||
),
|
||||
},
|
||||
],
|
||||
[],
|
||||
);
|
||||
|
||||
const filteredWorkers = useMemo(() => {
|
||||
const rows = overviewQuery.data?.workers || [];
|
||||
return rows.filter((item) => {
|
||||
const online = (item.status || "").toUpperCase() === "ONLINE";
|
||||
if (statusFilter === "online" && !online) {
|
||||
return false;
|
||||
}
|
||||
if (statusFilter === "offline" && online) {
|
||||
return false;
|
||||
}
|
||||
if (!containsText(item.worker, workerKeyword.trim())) {
|
||||
return false;
|
||||
}
|
||||
if (queueKeyword.trim()) {
|
||||
const text = item.queue_names.join(", ");
|
||||
if (!containsText(text, queueKeyword.trim())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}, [overviewQuery.data?.workers, queueKeyword, statusFilter, workerKeyword]);
|
||||
|
||||
if (initializing || (overviewQuery.isLoading && !overviewQuery.data && canRead && Boolean(user))) {
|
||||
return (
|
||||
<div className="py-10">
|
||||
<Spin tip="Worker监控数据加载中..." />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (!user) {
|
||||
return (
|
||||
<main className="mx-auto flex min-h-screen w-full max-w-4xl flex-col justify-center gap-4 px-6 py-20">
|
||||
<p className="text-sm text-[var(--gray-11)]">请先登录后再访问Worker监控页面。</p>
|
||||
<Link
|
||||
href="/"
|
||||
className="inline-flex w-fit items-center justify-center rounded-md border border-[var(--gray-6)] bg-[var(--gray-a2)] px-4 py-2 text-sm font-medium text-[var(--gray-12)] transition hover:bg-[var(--gray-a3)]"
|
||||
>
|
||||
返回首页
|
||||
</Link>
|
||||
</main>
|
||||
);
|
||||
}
|
||||
|
||||
if (!canRead) {
|
||||
return (
|
||||
<main className="mx-auto flex min-h-screen w-full max-w-4xl flex-col justify-center gap-4 px-6 py-20">
|
||||
<p className="text-sm text-[var(--gray-11)]">你没有访问该页面的权限(需要 `celery.read` 或 `celery.manage`)。</p>
|
||||
<Link
|
||||
href="/"
|
||||
className="inline-flex w-fit items-center justify-center rounded-md border border-[var(--gray-6)] bg-[var(--gray-a2)] px-4 py-2 text-sm font-medium text-[var(--gray-12)] transition hover:bg-[var(--gray-a3)]"
|
||||
>
|
||||
返回首页
|
||||
</Link>
|
||||
</main>
|
||||
);
|
||||
}
|
||||
|
||||
const overview = overviewQuery.data;
|
||||
const taskOverview = workerTasksQuery.data;
|
||||
|
||||
return (
|
||||
<Space direction="vertical" size={16} style={{ width: "100%" }}>
|
||||
<AntCard>
|
||||
<Space size={16} wrap>
|
||||
<Input
|
||||
allowClear
|
||||
placeholder="按 Worker 名称筛选"
|
||||
value={workerKeyword}
|
||||
onChange={(event) => setWorkerKeyword(event.target.value)}
|
||||
style={{ width: 220 }}
|
||||
/>
|
||||
<Input
|
||||
allowClear
|
||||
placeholder="按队列名称筛选"
|
||||
value={queueKeyword}
|
||||
onChange={(event) => setQueueKeyword(event.target.value)}
|
||||
style={{ width: 220 }}
|
||||
/>
|
||||
<Select
|
||||
value={statusFilter}
|
||||
onChange={(value) => setStatusFilter(parseStatusFilter(value))}
|
||||
options={[
|
||||
{ label: "全部状态", value: "all" },
|
||||
{ label: "在线", value: "online" },
|
||||
{ label: "离线", value: "offline" },
|
||||
]}
|
||||
style={{ width: 150 }}
|
||||
/>
|
||||
<Space size={8}>
|
||||
<Text>自动刷新</Text>
|
||||
<Switch checked={autoRefresh} onChange={setAutoRefresh} />
|
||||
</Space>
|
||||
<Button onClick={() => void overviewQuery.refetch()} loading={overviewQuery.isFetching}>
|
||||
刷新Worker
|
||||
</Button>
|
||||
<Text type="secondary">生成时间:{formatDateTime(overview?.generated_at)}</Text>
|
||||
</Space>
|
||||
</AntCard>
|
||||
|
||||
{overviewQuery.error && (
|
||||
<Alert
|
||||
type="error"
|
||||
showIcon
|
||||
message={overviewQuery.error instanceof Error ? overviewQuery.error.message : "Worker监控数据加载失败"}
|
||||
/>
|
||||
)}
|
||||
|
||||
{!overview && !overviewQuery.isFetching && (
|
||||
<AntCard>
|
||||
<Empty description="暂无Worker监控数据" />
|
||||
</AntCard>
|
||||
)}
|
||||
|
||||
{overview && (
|
||||
<>
|
||||
<Row gutter={[16, 16]}>
|
||||
<Col xs={24} md={6}>
|
||||
<AntCard>
|
||||
<Statistic title="Worker总数" value={overview.summary.total} />
|
||||
</AntCard>
|
||||
</Col>
|
||||
<Col xs={24} md={6}>
|
||||
<AntCard>
|
||||
<Statistic title="在线Worker" value={overview.summary.online} />
|
||||
</AntCard>
|
||||
</Col>
|
||||
<Col xs={24} md={6}>
|
||||
<AntCard>
|
||||
<Statistic title="离线Worker" value={overview.summary.offline} />
|
||||
</AntCard>
|
||||
</Col>
|
||||
<Col xs={24} md={6}>
|
||||
<AntCard>
|
||||
<Statistic title="筛选结果" value={filteredWorkers.length} />
|
||||
</AntCard>
|
||||
</Col>
|
||||
</Row>
|
||||
|
||||
<AntCard title="Worker列表">
|
||||
<Table<WorkerMonitorWorkerItem>
|
||||
rowKey={(record) => record.worker}
|
||||
columns={workerColumns}
|
||||
dataSource={filteredWorkers}
|
||||
pagination={{ pageSize: 10, showSizeChanger: true }}
|
||||
locale={{ emptyText: "暂无Worker数据" }}
|
||||
scroll={{ x: 1600 }}
|
||||
/>
|
||||
</AntCard>
|
||||
</>
|
||||
)}
|
||||
|
||||
<Drawer
|
||||
title={`Worker任务明细 - ${selectedWorker || "-"}`}
|
||||
open={Boolean(selectedWorker)}
|
||||
width={1260}
|
||||
onClose={() => setSelectedWorker(null)}
|
||||
extra={
|
||||
<Space>
|
||||
<Button size="small" onClick={() => void workerTasksQuery.refetch()} loading={workerTasksQuery.isFetching}>
|
||||
刷新任务
|
||||
</Button>
|
||||
<Text type="secondary">{taskOverview ? `采集时间:${formatDateTime(taskOverview.generated_at)}` : "-"}</Text>
|
||||
</Space>
|
||||
}
|
||||
>
|
||||
{workerTasksQuery.isLoading && !taskOverview ? <Spin tip="任务数据加载中..." /> : null}
|
||||
{workerTasksQuery.error ? (
|
||||
<Alert
|
||||
type="error"
|
||||
showIcon
|
||||
message={workerTasksQuery.error instanceof Error ? workerTasksQuery.error.message : "任务数据加载失败"}
|
||||
/>
|
||||
) : null}
|
||||
{!workerTasksQuery.isLoading && !taskOverview ? <Empty description="暂无任务数据" /> : null}
|
||||
{taskOverview ? (
|
||||
<Space direction="vertical" size={16} style={{ width: "100%" }}>
|
||||
<Space size={8} wrap>
|
||||
<Tag color="processing">运行中: {taskOverview.summary.active}</Tag>
|
||||
<Tag color="blue">保留中: {taskOverview.summary.reserved}</Tag>
|
||||
<Tag color="purple">已调度: {taskOverview.summary.scheduled}</Tag>
|
||||
<Tag color="geekblue">最近任务: {taskOverview.summary.recent}</Tag>
|
||||
</Space>
|
||||
|
||||
<AntCard title="运行中任务">
|
||||
<Table<WorkerMonitorTaskItem>
|
||||
rowKey={(record) => `active-${record.task_id}`}
|
||||
columns={taskColumns}
|
||||
dataSource={taskOverview.active_tasks}
|
||||
pagination={false}
|
||||
locale={{ emptyText: "暂无运行中任务" }}
|
||||
scroll={{ x: 2200 }}
|
||||
/>
|
||||
</AntCard>
|
||||
|
||||
<AntCard title="保留任务">
|
||||
<Table<WorkerMonitorTaskItem>
|
||||
rowKey={(record) => `reserved-${record.task_id}`}
|
||||
columns={taskColumns}
|
||||
dataSource={taskOverview.reserved_tasks}
|
||||
pagination={false}
|
||||
locale={{ emptyText: "暂无保留任务" }}
|
||||
scroll={{ x: 2200 }}
|
||||
/>
|
||||
</AntCard>
|
||||
|
||||
<AntCard title="定时任务">
|
||||
<Table<WorkerMonitorTaskItem>
|
||||
rowKey={(record) => `scheduled-${record.task_id}`}
|
||||
columns={taskColumns}
|
||||
dataSource={taskOverview.scheduled_tasks}
|
||||
pagination={false}
|
||||
locale={{ emptyText: "暂无定时任务" }}
|
||||
scroll={{ x: 2200 }}
|
||||
/>
|
||||
</AntCard>
|
||||
|
||||
<AntCard title="最近完成任务">
|
||||
<Table<WorkerMonitorTaskItem>
|
||||
rowKey={(record) => `recent-${record.task_id}`}
|
||||
columns={taskColumns}
|
||||
dataSource={taskOverview.recent_tasks}
|
||||
pagination={false}
|
||||
locale={{ emptyText: "暂无最近任务" }}
|
||||
scroll={{ x: 2200 }}
|
||||
/>
|
||||
</AntCard>
|
||||
</Space>
|
||||
) : null}
|
||||
</Drawer>
|
||||
</Space>
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user