Files
fquiz/api/app/services/flower_monitor_service.py
T

335 lines
12 KiB
Python

from __future__ import annotations
import base64
import json
from datetime import datetime, timezone
from typing import Any
from urllib.parse import quote_plus
import httpx
from fastapi import HTTPException, status
from ..core.config import get_settings
from ..models.base import utcnow
from ..schemas.flower_monitor import (
FlowerTaskItem,
FlowerWorkerItem,
FlowerWorkerTaskOverviewResponse,
FlowerWorkerTaskSummary,
FlowerWorkersOverviewResponse,
FlowerWorkersSummary,
)
def build_workers_overview(*, force_refresh: bool) -> FlowerWorkersOverviewResponse:
refresh = "true" if force_refresh else "false"
workers_map = _call_flower_json(f"/api/workers?refresh={refresh}")
status_map = _call_flower_json("/api/workers?status=true")
if not isinstance(workers_map, dict):
workers_map = {}
if not isinstance(status_map, dict):
status_map = {}
worker_names = sorted(set(workers_map.keys()) | set(status_map.keys()))
workers: list[FlowerWorkerItem] = []
for worker_name in worker_names:
worker_raw = _as_record(workers_map.get(worker_name))
is_online = bool(status_map.get(worker_name))
workers.append(_normalize_worker(worker_name, worker_raw, is_online))
workers.sort(key=lambda item: (0 if item.status == "ONLINE" else 1, item.worker))
summary = FlowerWorkersSummary(
total=len(workers),
online=sum(1 for item in workers if item.status == "ONLINE"),
offline=sum(1 for item in workers if item.status != "ONLINE"),
)
return FlowerWorkersOverviewResponse(
generated_at=utcnow(),
workers=workers,
summary=summary,
)
def build_worker_task_overview(
*,
worker: str,
force_refresh: bool,
recent_limit: int,
) -> FlowerWorkerTaskOverviewResponse:
normalized_worker = worker.strip()
if not normalized_worker:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="worker is required")
refresh = "true" if force_refresh else "false"
safe_recent_limit = max(1, min(200, int(recent_limit)))
workers_map = _call_flower_json(
f"/api/workers?refresh={refresh}&workername={quote_plus(normalized_worker)}"
)
tasks_map = _call_flower_json(
f"/api/tasks?limit={safe_recent_limit}&workername={quote_plus(normalized_worker)}"
)
worker_raw = _pick_worker_raw(_as_record(workers_map), normalized_worker)
active_tasks = _build_snapshot_tasks(normalized_worker, worker_raw, "ACTIVE")
reserved_tasks = _build_snapshot_tasks(normalized_worker, worker_raw, "RESERVED")
scheduled_tasks = _build_snapshot_tasks(normalized_worker, worker_raw, "SCHEDULED")
recent_tasks = _build_recent_tasks(normalized_worker, _as_record(tasks_map))
summary = FlowerWorkerTaskSummary(
active=len(active_tasks),
reserved=len(reserved_tasks),
scheduled=len(scheduled_tasks),
recent=len(recent_tasks),
)
return FlowerWorkerTaskOverviewResponse(
generated_at=utcnow(),
worker=normalized_worker,
active_tasks=active_tasks,
reserved_tasks=reserved_tasks,
scheduled_tasks=scheduled_tasks,
recent_tasks=recent_tasks,
summary=summary,
)
def _call_flower_json(path: str) -> Any:
settings = get_settings()
url = f"{settings.resolved_flower_api_base_url}{path}"
headers = {"Accept": "application/json"}
basic_auth = settings.resolved_flower_basic_auth
if basic_auth:
token = base64.b64encode(basic_auth.encode("utf-8")).decode("ascii")
headers["Authorization"] = f"Basic {token}"
timeout = max(3, int(settings.flower_api_timeout_seconds))
try:
with httpx.Client(timeout=timeout) as client:
response = client.get(url, headers=headers)
except httpx.TimeoutException as exc:
raise HTTPException(status_code=status.HTTP_504_GATEWAY_TIMEOUT, detail=f"flower request timeout: {path}") from exc
except httpx.HTTPError as exc:
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=f"flower request failed: {path}: {exc}") from exc
if response.status_code >= 400:
detail = (response.text or "").strip() or response.reason_phrase or "flower error"
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=f"flower error {response.status_code}: {detail}")
try:
return response.json()
except ValueError:
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=f"flower returned non-json payload: {path}")
def _normalize_worker(worker_name: str, worker_raw: dict[str, Any], is_online: bool) -> FlowerWorkerItem:
stats = _as_record(worker_raw.get("stats"))
return FlowerWorkerItem(
worker=worker_name,
status="ONLINE" if is_online else "OFFLINE",
queue_names=_parse_active_queue_names(worker_raw.get("active_queues")),
registered_count=len(_as_array(worker_raw.get("registered"))),
processed_count=_parse_processed_count(stats.get("total")),
concurrency=_safe_int(_as_record(stats.get("pool")).get("max-concurrency") or _as_record(stats.get("pool")).get("max_concurrency")),
prefetch_count=_safe_int(stats.get("prefetch_count")),
active_count=len(_as_array(worker_raw.get("active"))),
reserved_count=len(_as_array(worker_raw.get("reserved"))),
scheduled_count=len(_as_array(worker_raw.get("scheduled"))),
last_heartbeat_at=_parse_datetime(worker_raw.get("timestamp")),
)
def _pick_worker_raw(workers_map: dict[str, Any], worker_name: str) -> dict[str, Any]:
if worker_name in workers_map:
return _as_record(workers_map.get(worker_name))
entries = list(workers_map.items())
if len(entries) == 1:
return _as_record(entries[0][1])
normalized = worker_name.strip().lower()
for name, value in entries:
if str(name).strip().lower() == normalized:
return _as_record(value)
return {}
def _build_snapshot_tasks(worker_name: str, worker_raw: dict[str, Any], source: str) -> list[FlowerTaskItem]:
key = source.lower()
snapshot = _as_array(worker_raw.get(key))
items: list[FlowerTaskItem] = []
for index, raw in enumerate(snapshot, start=1):
items.append(
_normalize_task_item(
raw,
source=source,
fallback_task_id=f"{worker_name}:{source}:{index}",
default_worker=worker_name,
)
)
return items
def _build_recent_tasks(worker_name: str, tasks_map: dict[str, Any]) -> list[FlowerTaskItem]:
items: list[FlowerTaskItem] = []
for task_id, raw in tasks_map.items():
item = _normalize_task_item(raw, source="RECENT", fallback_task_id=str(task_id), default_worker=worker_name)
if item.worker.strip().lower() != worker_name.strip().lower():
continue
items.append(item)
items.sort(
key=lambda item: _sortable_timestamp(
item.received_at or item.started_at or item.finished_at or item.eta
),
reverse=True,
)
return items
def _normalize_task_item(
raw_task: Any,
*,
source: str,
fallback_task_id: str,
default_worker: str,
) -> FlowerTaskItem:
raw = _as_record(raw_task)
request = _as_record(raw.get("request"))
task_data = dict(raw)
if request:
task_data.update(request)
task_id = _safe_text(task_data.get("uuid") or task_data.get("id") or fallback_task_id) or fallback_task_id
task_name = _safe_text(task_data.get("name") or task_data.get("type")) or "-"
worker_name = _safe_text(task_data.get("worker") or task_data.get("hostname")) or default_worker
state_raw = _safe_text(task_data.get("state"))
state = state_raw.upper() if state_raw else ("UNKNOWN" if source == "RECENT" else source)
queue_name = _extract_queue_name(task_data)
return FlowerTaskItem(
task_id=task_id,
name=task_name,
state=state,
source=source,
worker=worker_name,
queue_name=queue_name,
args_text=_stringify_payload(task_data.get("args")),
kwargs_text=_stringify_payload(task_data.get("kwargs")),
eta=_parse_datetime(task_data.get("eta")),
received_at=_parse_datetime(task_data.get("received") or task_data.get("timestamp")),
started_at=_parse_datetime(task_data.get("started") or task_data.get("time_start")),
finished_at=_parse_datetime(task_data.get("succeeded") or task_data.get("failed") or task_data.get("revoked")),
runtime_seconds=_safe_float(task_data.get("runtime")),
result_text=_stringify_payload(task_data.get("result")),
exception_text=_stringify_payload(task_data.get("exception")),
)
def _parse_active_queue_names(raw: Any) -> list[str]:
values = []
for item in _as_array(raw):
name = _safe_text(_as_record(item).get("name"))
if name:
values.append(name)
return sorted(set(values))
def _parse_processed_count(raw_total: Any) -> int:
total = _as_record(raw_total)
count = 0
for value in total.values():
count += _safe_int(value)
return count
def _extract_queue_name(task_data: dict[str, Any]) -> str | None:
delivery = _as_record(task_data.get("delivery_info"))
routing_key = _safe_text(delivery.get("routing_key"))
if routing_key:
return routing_key
exchange = _safe_text(task_data.get("exchange") or delivery.get("exchange"))
return exchange
def _parse_datetime(value: Any) -> datetime | None:
if value is None:
return None
if isinstance(value, datetime):
if value.tzinfo is None:
return value.replace(tzinfo=timezone.utc)
return value.astimezone(timezone.utc)
if isinstance(value, (int, float)):
numeric = float(value)
if numeric <= 0:
return None
if numeric < 1e11:
numeric *= 1000
try:
return datetime.fromtimestamp(numeric / 1000, timezone.utc)
except (OverflowError, OSError, ValueError):
return None
text = str(value).strip()
if not text:
return None
if text.replace(".", "", 1).isdigit():
try:
return _parse_datetime(float(text))
except ValueError:
return None
text = text.replace("Z", "+00:00")
try:
parsed = datetime.fromisoformat(text)
except ValueError:
return None
if parsed.tzinfo is None:
return parsed.replace(tzinfo=timezone.utc)
return parsed.astimezone(timezone.utc)
def _stringify_payload(value: Any) -> str | None:
if value is None:
return None
if isinstance(value, str):
text = value.strip()
else:
try:
text = json.dumps(value, ensure_ascii=False)
except TypeError:
text = str(value)
text = text.strip()
if not text:
return None
if len(text) > 1000:
return text[:997] + "..."
return text
def _safe_text(value: Any) -> str:
return str(value or "").strip()
def _safe_int(value: Any) -> int:
try:
return int(value)
except (TypeError, ValueError):
return 0
def _safe_float(value: Any) -> float | None:
try:
parsed = float(value)
except (TypeError, ValueError):
return None
if parsed < 0:
return None
return parsed
def _as_record(value: Any) -> dict[str, Any]:
if isinstance(value, dict):
return dict(value)
return {}
def _as_array(value: Any) -> list[Any]:
if isinstance(value, list):
return list(value)
return []
def _sortable_timestamp(value: datetime | None) -> float:
if value is None:
return 0.0
if value.tzinfo is None:
value = value.replace(tzinfo=timezone.utc)
return value.timestamp()