diff --git a/MEMORY.md b/MEMORY.md index 24b07c5..85e62b8 100644 --- a/MEMORY.md +++ b/MEMORY.md @@ -236,6 +236,13 @@ - 线路渲染继续复用 `power_line_tower.altitude_m` 作为高度来源;回填后在 `raw_extra_json.elevation` 写入溯源信息(dataset/sample_distance/sampled_at)。 - 一期采样策略为 CSV 点集最近邻(非栅格插值),用于先打通管理与回填闭环;默认推荐回填模式 `fill_null_only`,避免覆盖人工高程。 - 高频通知 topic 为 `admin.elevation`,线路联动通知沿用 `admin.power-lines`。 +- 高程数据文件格式已扩展为:`csv/img/tif/tiff`。 + - `csv`:继续使用点集最近邻采样。 + - `img/tif/tiff`:使用栅格像元采样(按杆塔经纬度取值,必要时自动做 CRS 转换)。 +- 栅格实现口径: + - 运行依赖 `rasterio`(随 `api` 依赖安装)。 + - 分析阶段 `sample_count` 使用 `width * height`(超大栅格上限截断为 `2_147_483_647`),`bbox` 直接取源栅格 `bounds`。 + - 非 WGS84 CRS 会返回告警说明,提醒边界坐标单位可能不是经纬度。 - 用户名列口径:历史环境存在 `users.username` 与 `users.user_name` 双形态;运行时通过 `USER_USERNAME_COLUMN`(`username`/`user_name`)与目标库对齐,避免启动阶段关系预加载触发 `UndefinedColumn`。 - 密码列口径:历史环境存在 `users.password` 与 `users.password_hash` 双形态;运行时通过 `USER_PASSWORD_COLUMN`(`password`/`password_hash`)与目标库对齐,避免启动阶段关系预加载触发 `UndefinedColumn`。 diff --git a/api/app/services/elevation_service.py b/api/app/services/elevation_service.py index b566d41..8e4d831 100644 --- a/api/app/services/elevation_service.py +++ b/api/app/services/elevation_service.py @@ -4,6 +4,8 @@ import asyncio import csv import io from dataclasses import dataclass +from pathlib import Path +from tempfile import NamedTemporaryFile from typing import Any from fastapi import HTTPException, status @@ -35,6 +37,14 @@ ELEVATION_TOPIC = "admin.elevation" POWER_LINES_TOPIC = "admin.power-lines" CSV_ENCODINGS = ("utf-8-sig", "utf-8", "gbk", "latin-1") NEAREST_MATCH_MAX_DISTANCE_M = 2000.0 +ELEVATION_FILE_EXT_FORMAT_MAP = { + ".csv": "csv", + ".img": "img", + ".tif": "tif", + ".tiff": "tiff", +} +RASTER_FILE_FORMATS = {"img", "tif", "tiff"} +MAX_SAMPLE_COUNT_INT = 2_147_483_647 @dataclass @@ -44,6 +54,26 @@ class ElevationSamplePoint: altitude_m: float +@dataclass +class _OpenedRasterDataset: + rasterio: Any + dataset: Any + temp_path: str + + def __enter__(self) -> "_OpenedRasterDataset": + return self + + def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> bool: + try: + self.dataset.close() + finally: + try: + Path(self.temp_path).unlink(missing_ok=True) + except Exception: + pass + return False + + def serialize_dataset(item: ElevationDataset) -> ElevationDatasetSummary: return ElevationDatasetSummary( id=item.id, @@ -157,16 +187,18 @@ def create_dataset( if get_dataset_by_code(db, payload.code): return None - _ensure_dataset_file_exists(db, mount_code=payload.mount_code, file_path=payload.file_path) + normalized_file_path = payload.file_path.strip() + file_format = _detect_file_format(normalized_file_path) + _ensure_dataset_file_exists(db, mount_code=payload.mount_code, file_path=normalized_file_path) now = utcnow() item = ElevationDataset( code=payload.code.strip(), name=payload.name.strip(), source=_normalize_str(payload.source), - file_format="csv", + file_format=file_format, mount_code=payload.mount_code.strip(), - file_path=payload.file_path.strip(), + file_path=normalized_file_path, resolution_m=payload.resolution_m, status="active", notes=_normalize_str(payload.notes), @@ -236,8 +268,7 @@ def analyze_dataset( if item.status != "active": raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="高程数据集未启用") - sample_points, warnings = _load_dataset_points(db, item) - stats = _compute_dataset_stats(sample_points) + stats, warnings = _analyze_dataset_content(db, item) item.sample_count = stats["sample_count"] item.bbox_min_lon = stats["bbox_min_lon"] @@ -399,18 +430,30 @@ def execute_apply_job(job_id: str) -> None: ) return - points, warnings = _load_dataset_points(db, dataset) - if warnings: - warning_note = "; ".join(warnings[:5]) + file_format = _resolve_dataset_file_format(dataset) + if file_format == "csv": + points, warnings = _load_dataset_points(db, dataset) + stats = _apply_points_to_line_towers( + db, + line_id=line.id, + dataset=dataset, + mode=job.mode, + points=points, + ) + elif file_format in RASTER_FILE_FORMATS: + stats, warnings = _apply_raster_to_line_towers( + db, + line_id=line.id, + dataset=dataset, + mode=job.mode, + ) else: - warning_note = None - stats = _apply_points_to_line_towers( - db, - line_id=line.id, - dataset=dataset, - mode=job.mode, - points=points, - ) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"不支持的高程文件格式: {file_format}", + ) + + warning_note = "; ".join(warnings[:5]) if warnings else None job.updated_tower_count = stats["updated_tower_count"] job.skipped_tower_count = stats["skipped_tower_count"] job.missing_geo_count = stats["missing_geo_count"] @@ -512,6 +555,22 @@ def _compute_dataset_stats(points: list[ElevationSamplePoint]) -> dict[str, floa } +def _analyze_dataset_content( + db: Session, + dataset: ElevationDataset, +) -> tuple[dict[str, float | int], list[str]]: + file_format = _resolve_dataset_file_format(dataset) + if file_format == "csv": + points, warnings = _load_dataset_points(db, dataset) + return _compute_dataset_stats(points), warnings + if file_format in RASTER_FILE_FORMATS: + return _compute_raster_stats(db, dataset) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"不支持的高程文件格式: {file_format}", + ) + + def _apply_points_to_line_towers( db: Session, *, @@ -560,6 +619,7 @@ def _apply_points_to_line_towers( "dataset_code": dataset.code, "sample_method": "nearest", "sample_distance_m": round(distance_m, 3), + "sample_distance_source": "computed", "sampled_at": utcnow().isoformat(), } tower.raw_extra_json = raw_extra @@ -624,6 +684,120 @@ def _haversine_distance_m( return 2 * r * math.asin(min(1.0, math.sqrt(h))) +def _detect_file_format(file_path: str) -> str: + extension = Path(file_path).suffix.lower() + file_format = ELEVATION_FILE_EXT_FORMAT_MAP.get(extension) + if not file_format: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"不支持的高程文件类型: {extension or 'unknown'},仅支持 .csv/.img/.tif/.tiff", + ) + return file_format + + +def _resolve_dataset_file_format(dataset: ElevationDataset) -> str: + declared = (dataset.file_format or "").strip().lower() + detected = _detect_file_format(dataset.file_path) + if declared and declared in ELEVATION_FILE_EXT_FORMAT_MAP.values(): + if declared == detected: + return declared + if declared in {"img", "tif", "tiff"} and detected in RASTER_FILE_FORMATS: + return detected + return detected + + +def _open_raster_dataset( + db: Session, + dataset: ElevationDataset, +) -> _OpenedRasterDataset: + file_format = _resolve_dataset_file_format(dataset) + if file_format not in RASTER_FILE_FORMATS: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"当前文件不是栅格高程文件: {dataset.file_path}", + ) + + try: + import rasterio + except ImportError as exc: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="服务未安装 rasterio,暂不支持 IMG/TIF 高程文件", + ) from exc + + mount = _require_mount(db, dataset.mount_code) + driver = _build_driver_or_400(mount) + try: + read_result = driver.read_file(dataset.file_path) + except StoragePathNotFoundError as exc: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"高程数据文件不存在: {dataset.file_path}") from exc + except StorageInvalidPathError as exc: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc + + suffix = Path(dataset.file_path).suffix.lower() or ".img" + temp_path = "" + try: + with NamedTemporaryFile(delete=False, suffix=suffix) as tmp: + tmp.write(read_result.content) + temp_path = tmp.name + opened = rasterio.open(temp_path) + return _OpenedRasterDataset( + rasterio=rasterio, + dataset=opened, + temp_path=temp_path, + ) + except Exception as exc: + if temp_path: + try: + Path(temp_path).unlink(missing_ok=True) + except Exception: + pass + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"高程栅格文件解析失败: {dataset.file_path}", + ) from exc + + +def _append_non_wgs84_bounds_warning(*, rasterio: Any, src: Any) -> str | None: + src_crs = src.crs + if src_crs is None: + return "栅格缺少 CRS 定义,默认按 WGS84 经度/纬度采样" + try: + src_crs_obj = rasterio.crs.CRS.from_user_input(src_crs) + except Exception: + return "栅格 CRS 无法识别,默认按 WGS84 经度/纬度采样,建议先校验源数据" + if src_crs_obj.to_string() in {"EPSG:4326", "OGC:CRS84"}: + return None + if bool(getattr(src_crs_obj, "is_geographic", False)): + return None + return ( + f"栅格 CRS 为 {src_crs_obj.to_string()},数据集边界框基于该投影坐标," + "回填时会自动从 WGS84 坐标转换后采样" + ) + + +def _is_masked_value(value: Any) -> bool: + try: + import numpy as np + except ImportError: + return False + return bool(np.ma.is_masked(value)) + + +def _almost_equal(a: float, b: float) -> bool: + return abs(a - b) <= 1e-6 + + +def _is_finite_number(value: float) -> bool: + import math + + return math.isfinite(value) + + +def _is_point_within_bounds(*, x: float, y: float, left: float, right: float, bottom: float, top: float) -> bool: + return left <= x <= right and bottom <= y <= top + + def _pick_float(row: dict[str, Any], keys: list[str]) -> float | None: for key in keys: value = row.get(key) @@ -642,6 +816,151 @@ def _decode_csv_bytes(content: bytes) -> str: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="CSV 编码不受支持") +def _compute_raster_stats( + db: Session, + dataset: ElevationDataset, +) -> tuple[dict[str, float | int], list[str]]: + warnings: list[str] = [] + with _open_raster_dataset(db, dataset) as opened: + rasterio = opened.rasterio + src = opened.dataset + bounds = src.bounds + warning_text = _append_non_wgs84_bounds_warning(rasterio=rasterio, src=src) + if warning_text: + warnings.append(warning_text) + + width = int(src.width or 0) + height = int(src.height or 0) + sample_count = width * height + if sample_count > MAX_SAMPLE_COUNT_INT: + sample_count = MAX_SAMPLE_COUNT_INT + + return ( + { + "sample_count": sample_count, + "bbox_min_lon": float(bounds.left), + "bbox_max_lon": float(bounds.right), + "bbox_min_lat": float(bounds.bottom), + "bbox_max_lat": float(bounds.top), + }, + warnings, + ) + + +def _apply_raster_to_line_towers( + db: Session, + *, + line_id: str, + dataset: ElevationDataset, + mode: str, +) -> tuple[dict[str, int], list[str]]: + towers = db.execute( + select(LineTower) + .where(LineTower.line_id == line_id) + .order_by(LineTower.seq_no.asc(), LineTower.id.asc()) + ).scalars().all() + + updated_tower_count = 0 + skipped_tower_count = 0 + missing_geo_count = 0 + unmatched_count = 0 + warnings: list[str] = [] + + with _open_raster_dataset(db, dataset) as opened: + rasterio = opened.rasterio + src = opened.dataset + warning_text = _append_non_wgs84_bounds_warning(rasterio=rasterio, src=src) + if warning_text: + warnings.append(warning_text) + src_crs = src.crs + band_nodata = src.nodatavals[0] if src.nodatavals else None + + for tower in towers: + if tower.longitude is None or tower.latitude is None: + missing_geo_count += 1 + continue + if mode == "fill_null_only" and tower.altitude_m is not None: + skipped_tower_count += 1 + continue + + lon = float(tower.longitude) + lat = float(tower.latitude) + transformed_lon = lon + transformed_lat = lat + + if src_crs and str(src_crs) not in {"EPSG:4326", "OGC:CRS84"}: + try: + xs, ys = rasterio.warp.transform( + "EPSG:4326", + src_crs, + [lon], + [lat], + ) + transformed_lon = float(xs[0]) + transformed_lat = float(ys[0]) + except Exception: + unmatched_count += 1 + continue + + if not _is_point_within_bounds( + x=transformed_lon, + y=transformed_lat, + left=float(src.bounds.left), + right=float(src.bounds.right), + bottom=float(src.bounds.bottom), + top=float(src.bounds.top), + ): + unmatched_count += 1 + continue + + try: + sampled = next(src.sample([(transformed_lon, transformed_lat)], masked=True), None) + except Exception: + sampled = None + + if sampled is None or len(sampled) == 0: + unmatched_count += 1 + continue + + value = sampled[0] + if _is_masked_value(value): + unmatched_count += 1 + continue + if band_nodata is not None and _almost_equal(float(value), float(band_nodata)): + unmatched_count += 1 + continue + + altitude = float(value) + if not _is_finite_number(altitude): + unmatched_count += 1 + continue + + tower.altitude_m = round(altitude, 3) + raw_extra = dict(tower.raw_extra_json or {}) + raw_extra["elevation"] = { + "dataset_id": dataset.id, + "dataset_code": dataset.code, + "sample_method": "raster_pixel", + "sample_distance_m": 0.0, + "sample_distance_source": "pixel_lookup", + "sampled_at": utcnow().isoformat(), + } + tower.raw_extra_json = raw_extra + tower.update_date = utcnow() + updated_tower_count += 1 + + db.commit() + return ( + { + "updated_tower_count": updated_tower_count, + "skipped_tower_count": skipped_tower_count, + "missing_geo_count": missing_geo_count, + "unmatched_count": unmatched_count, + }, + warnings, + ) + + def _normalize_str(value: Any) -> str | None: if value is None: return None diff --git a/api/pyproject.toml b/api/pyproject.toml index 865c657..76bd6be 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -15,4 +15,5 @@ dependencies = [ "python-multipart>=0.0.20,<1.0.0", "boto3>=1.40.0,<2.0.0", "celery[redis]>=5.5.0,<6.0.0", + "rasterio>=1.4.0,<2.0.0", ] diff --git a/api/requirements.txt b/api/requirements.txt index 665dc61..8a30421 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -15,3 +15,4 @@ python-multipart==0.0.20 boto3==1.40.59 httpx==0.28.1 celery[redis]==5.5.3 +rasterio==1.4.3 diff --git a/memory/2026-05-01.md b/memory/2026-05-01.md index dc51286..0e7b137 100644 --- a/memory/2026-05-01.md +++ b/memory/2026-05-01.md @@ -385,3 +385,37 @@ - 当前实现使用 CSV 点集“最近邻采样”,适合先跑通管理与回填流程;不是严格栅格插值方案。 - 未引入 GDAL/rasterio,部署更稳,但精度依赖 CSV 样本密度与坐标质量。 - 回填默认允许 `overwrite_all`,存在覆盖人工高程风险;前端默认展示“仅填空(推荐)”。 + +## Work Log - 高程管理支持 IMG/TIF 导入与回填(2026-05-01) + +- 背景: + - 用户上传的高程数据为 `.img`(栅格),现有实现仅支持 CSV 点集,无法直接用于高程回填任务。 + +- 本次改动(最小闭环): + - 后端服务能力扩展(文件格式识别 + 栅格采样): + - 文件:`api/app/services/elevation_service.py` + - 数据集创建时按扩展名自动识别并落库 `file_format`(支持 `.csv/.img/.tif/.tiff`)。 + - 新增“按格式分流”执行: + - `csv`:沿用现有最近邻点集采样逻辑。 + - `img/tif/tiff`:新增栅格像元采样逻辑,按杆塔经纬度写回 `power_line_tower.altitude_m`。 + - 新增栅格分析逻辑: + - 从栅格读取 `width/height/bounds` 回写 `sample_count/bbox`。 + - 对非 WGS84 CRS 增加告警,并在回填时自动执行坐标转换(WGS84 -> 栅格 CRS)。 + - 回填溯源扩展: + - `raw_extra_json.elevation.sample_method` 对栅格标记为 `raster_pixel`。 + - 增加 `sample_distance_source` 字段(CSV 为 `computed`,栅格为 `pixel_lookup`)。 + - 依赖补齐: + - 文件:`api/requirements.txt`,新增 `rasterio==1.4.3`。 + - 文件:`api/pyproject.toml`,新增 `rasterio>=1.4.0,<2.0.0`。 + - 前端文案更新: + - 文件:`web/src/app/admin/elevation/page.tsx` + - 页面提示更新为支持 `CSV/IMG/TIF/TIFF`,空态与示例路径同步为栅格可用口径。 + +- 验证: + - `python3 -m compileall api/app` -> 通过。 + - `npm run build:web` -> 通过。 + - 构建产物包含路由 `/admin/elevation`。 + +- 风险与影响: + - `.img/.tif` 回填依赖 `rasterio`(及底层 GDAL 运行时),部署环境需确保镜像能成功安装该依赖。 + - 栅格 bbox 直接来自源栅格 CRS;非经纬度坐标系场景会返回告警,便于识别与后续治理。 diff --git a/web/src/app/admin/elevation/page.tsx b/web/src/app/admin/elevation/page.tsx index 3717b59..64e1e60 100644 --- a/web/src/app/admin/elevation/page.tsx +++ b/web/src/app/admin/elevation/page.tsx @@ -451,8 +451,15 @@ export default function AdminElevationPage() { )} > + {datasets.length === 0 ? ( - + ) : ( rowKey={(row) => row.id} @@ -510,7 +517,7 @@ export default function AdminElevationPage() { - + @@ -519,7 +526,7 @@ export default function AdminElevationPage() { - +