From 12a97e74a9301dbbea8c6640157b0f38f97865bc Mon Sep 17 00:00:00 2001 From: chengkml <45121067+chengkml@users.noreply.github.com> Date: Sun, 3 May 2026 16:05:04 +0800 Subject: [PATCH] fix flower worker query fallback and legacy authz table guards --- MEMORY.md | 8 +++++ api/app/services/flower_monitor_service.py | 8 +++-- api/app/services/legacy_authz_service.py | 17 +++++++++++ memory/2026-05-03.md | 34 ++++++++++++++++++++++ 4 files changed, 64 insertions(+), 3 deletions(-) diff --git a/MEMORY.md b/MEMORY.md index f7a9718..2b2bab3 100644 --- a/MEMORY.md +++ b/MEMORY.md @@ -150,6 +150,14 @@ - Beat 定时任务 `app.tasks.worker_registry_tasks.sweep_worker_registry_offline` 负责离线兜底标记。 - Flower 代理鉴权一致性: - `api` 服务必须显式注入 `FLOWER_BASIC_AUTH`(与 `flower` 服务保持同一来源),避免 `/api/v1/admin/flower/*` 代理调用因 Basic Auth 不一致返回 `401` 并在前端表现为 `502`。 + - Flower worker 详情查询兼容: + - 当前 Flower 版本下,`/api/workers?...&workername=...` 可能返回 `404 Unknown worker`(即使 worker 在线且 `/api/tasks?workername=...` 可用)。 + - 后端代理查询 worker 明细时应避免依赖该过滤接口,优先读取 `workers` 全量快照后本地匹配 worker,避免误映射为 `502`。 + +## Legacy 鉴权兼容口径(2026-05-03) + +- 鉴权服务在访问 legacy 表(`user_role_rela` / `role_menu_rela` / `menu` / `user_role`)前应先做 `to_regclass` 存在性判断。 +- 当 legacy 表缺失(modern-only 数据库)时,直接返回空并走 modern 回退逻辑,避免持续 `relation does not exist` 日志与事务回滚。 ## 前端 Radix 全量化口径(2026-04-18) diff --git a/api/app/services/flower_monitor_service.py b/api/app/services/flower_monitor_service.py index c38b54a..2cda780 100644 --- a/api/app/services/flower_monitor_service.py +++ b/api/app/services/flower_monitor_service.py @@ -65,9 +65,11 @@ def build_worker_task_overview( refresh = "true" if force_refresh else "false" safe_recent_limit = max(1, min(200, int(recent_limit))) - workers_map = _call_flower_json( - f"/api/workers?refresh={refresh}&workername={quote_plus(normalized_worker)}" - ) + # NOTE: + # Flower 2.0 may return 404 "Unknown worker" for `/api/workers?...&workername=...` + # even when the worker exists in `/api/workers?status=true` and `/api/tasks`. + # Use the full workers snapshot and then pick the target worker locally. + workers_map = _call_flower_json(f"/api/workers?refresh={refresh}") tasks_map = _call_flower_json( f"/api/tasks?limit={safe_recent_limit}&workername={quote_plus(normalized_worker)}" ) diff --git a/api/app/services/legacy_authz_service.py b/api/app/services/legacy_authz_service.py index e9a94d7..184a8af 100644 --- a/api/app/services/legacy_authz_service.py +++ b/api/app/services/legacy_authz_service.py @@ -348,6 +348,8 @@ def _sort_menu_tree(nodes: list[dict[str, Any]]) -> None: def _load_legacy_roles(db: Session, user_id: str) -> list[tuple[str, str | None, str | None]]: + if not _legacy_table_exists(db, "user_role_rela") or not _legacy_table_exists(db, "user_role"): + return [] stmt = text( """ SELECT ur.id AS role_id, ur.name AS role_name, ur.state AS role_state @@ -402,6 +404,8 @@ def _load_legacy_permissions(db: Session, role_codes: set[str]) -> set[str]: real_role_ids = sorted(code for code in role_codes if code != "admin") if not real_role_ids: return set() + if not _legacy_table_exists(db, "role_menu_rela") or not _legacy_table_exists(db, "menu"): + return set() stmt = text( """ @@ -457,6 +461,8 @@ def _load_modern_permissions(db: Session, role_codes: set[str]) -> set[str]: def _load_legacy_menus(db: Session) -> list[dict[str, Any]]: + if not _legacy_table_exists(db, "menu"): + return [] stmt = text( """ SELECT @@ -489,6 +495,8 @@ def _load_legacy_allowed_menu_ids(db: Session, role_codes: set[str]) -> set[str] real_role_ids = sorted(code for code in role_codes if code != "admin") if not real_role_ids: return set() + if not _legacy_table_exists(db, "role_menu_rela"): + return set() stmt = text( """ @@ -512,6 +520,15 @@ def _rollback_safely(db: Session) -> None: return +def _legacy_table_exists(db: Session, table_name: str) -> bool: + try: + result = db.scalar(text("SELECT to_regclass(:table_name)"), {"table_name": f"public.{table_name}"}) + except SQLAlchemyError: + _rollback_safely(db) + return False + return result is not None + + def _is_role_enabled(raw_state: str) -> bool: state = raw_state.strip().upper() return state in {"ENABLED", "ACTIVE", "1", "TRUE"} diff --git a/memory/2026-05-03.md b/memory/2026-05-03.md index 953a32b..6c94e65 100644 --- a/memory/2026-05-03.md +++ b/memory/2026-05-03.md @@ -343,3 +343,37 @@ - 风险与影响: - 导入多个数据文件时,系统会选定一个“当前数据集文件”用于分析/回填(优先栅格),其余文件保留在数据集目录中。 - `usage_status` 依赖任务状态汇总,若历史任务状态异常(例如未正确收尾)可能导致状态滞后,需要运维侧修正任务状态。 +## Work Log - 修复容器巡检发现的监控与鉴权兼容问题(2026-05-03) + +- 背景: + - 容器巡检发现两类持续异常: + - `GET /api/v1/admin/flower/worker-tasks` 间歇 `502`; + - PostgreSQL 持续报 `relation "user_role_rela" does not exist` / `relation "menu" does not exist`。 + - 同时发现开发 compose 默认脚本指向 `deploy/dev-deploy/.env`,但仓库缺失该文件,导致 `docker compose ...` 命令报 `invalid compose project`。 + +- 根因: + - Flower 侧对 `/api/workers?...&workername=...` 在当前版本会返回 `404 Unknown worker`,即使 `/api/tasks?workername=...` 与 `/api/workers?status=true` 正常,后端代理将其映射为 `502`。 + - 鉴权服务优先走 legacy 表(`user_role_rela/menu/role_menu_rela`),在 modern-only 库(仅 `users/roles/menus/user_roles/role_menus`)下会触发缺表 SQL 错误并回滚。 + - dev 部署环境文件命名缺失 `deploy/dev-deploy/.env`。 + +- 本次改动(最小闭环): + - 文件:`api/app/services/flower_monitor_service.py` + - `build_worker_task_overview` 不再调用会 404 的 `workers?workername=...`; + - 改为读取 `workers?refresh=...` 全量快照,再本地筛选 worker。 + - 文件:`api/app/services/legacy_authz_service.py` + - 新增 `_legacy_table_exists`; + - 在 legacy 角色、权限、菜单、菜单授权查询前先判表存在,不存在直接回退空结果,避免持续 SQL 报错。 + - 文件:`deploy/dev-deploy/.env` + - 新增默认 env,补齐 `docker:up/down/logs` 脚本依赖的 compose 变量,恢复 dev compose 命令可用性。 + +- 验证: + - 语法验证:`python3 -m compileall api/app` 通过。 + - compose 验证:`docker compose --env-file deploy/dev-deploy/.env -f deploy/dev-deploy/compose.yml config --services` 通过。 + - 运行验证(当前 fquiz 容器): + - API 日志中 `/api/v1/admin/flower/worker-tasks` 已连续返回 `200`,未再出现 `502/Unknown worker`。 + - 以 `get_user_authorization` 触发鉴权路径后,DB 日志窗口内未出现 `user_role_rela/menu does not exist`。 + +- 风险与影响: + - Flower 修复仅调整查询路径,不改变响应 schema 与前端调用方式。 + - legacy 鉴权改动是“缺表短路”策略:当 legacy 表不存在时更快回退 modern 逻辑,避免日志噪音和事务回滚开销。 + - 新增 `deploy/dev-deploy/.env` 为默认开发口径,若团队有自定义值仍可通过本地覆盖文件管理。