feat: support img/tif elevation dataset apply workflow

This commit is contained in:
chengkai3
2026-05-01 18:09:18 +08:00
parent 98fb191e34
commit 850a2e7bc3
6 changed files with 388 additions and 19 deletions
+7
View File
@@ -236,6 +236,13 @@
- 线路渲染继续复用 `power_line_tower.altitude_m` 作为高度来源;回填后在 `raw_extra_json.elevation` 写入溯源信息(dataset/sample_distance/sampled_at)。
- 一期采样策略为 CSV 点集最近邻(非栅格插值),用于先打通管理与回填闭环;默认推荐回填模式 `fill_null_only`,避免覆盖人工高程。
- 高频通知 topic 为 `admin.elevation`,线路联动通知沿用 `admin.power-lines`
- 高程数据文件格式已扩展为:`csv/img/tif/tiff`
- `csv`:继续使用点集最近邻采样。
- `img/tif/tiff`:使用栅格像元采样(按杆塔经纬度取值,必要时自动做 CRS 转换)。
- 栅格实现口径:
- 运行依赖 `rasterio`(随 `api` 依赖安装)。
- 分析阶段 `sample_count` 使用 `width * height`(超大栅格上限截断为 `2_147_483_647`),`bbox` 直接取源栅格 `bounds`
- 非 WGS84 CRS 会返回告警说明,提醒边界坐标单位可能不是经纬度。
- 用户名列口径:历史环境存在 `users.username``users.user_name` 双形态;运行时通过 `USER_USERNAME_COLUMN``username`/`user_name`)与目标库对齐,避免启动阶段关系预加载触发 `UndefinedColumn`
- 密码列口径:历史环境存在 `users.password``users.password_hash` 双形态;运行时通过 `USER_PASSWORD_COLUMN``password`/`password_hash`)与目标库对齐,避免启动阶段关系预加载触发 `UndefinedColumn`
+335 -16
View File
@@ -4,6 +4,8 @@ import asyncio
import csv
import io
from dataclasses import dataclass
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Any
from fastapi import HTTPException, status
@@ -35,6 +37,14 @@ ELEVATION_TOPIC = "admin.elevation"
POWER_LINES_TOPIC = "admin.power-lines"
CSV_ENCODINGS = ("utf-8-sig", "utf-8", "gbk", "latin-1")
NEAREST_MATCH_MAX_DISTANCE_M = 2000.0
ELEVATION_FILE_EXT_FORMAT_MAP = {
".csv": "csv",
".img": "img",
".tif": "tif",
".tiff": "tiff",
}
RASTER_FILE_FORMATS = {"img", "tif", "tiff"}
MAX_SAMPLE_COUNT_INT = 2_147_483_647
@dataclass
@@ -44,6 +54,26 @@ class ElevationSamplePoint:
altitude_m: float
@dataclass
class _OpenedRasterDataset:
rasterio: Any
dataset: Any
temp_path: str
def __enter__(self) -> "_OpenedRasterDataset":
return self
def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> bool:
try:
self.dataset.close()
finally:
try:
Path(self.temp_path).unlink(missing_ok=True)
except Exception:
pass
return False
def serialize_dataset(item: ElevationDataset) -> ElevationDatasetSummary:
return ElevationDatasetSummary(
id=item.id,
@@ -157,16 +187,18 @@ def create_dataset(
if get_dataset_by_code(db, payload.code):
return None
_ensure_dataset_file_exists(db, mount_code=payload.mount_code, file_path=payload.file_path)
normalized_file_path = payload.file_path.strip()
file_format = _detect_file_format(normalized_file_path)
_ensure_dataset_file_exists(db, mount_code=payload.mount_code, file_path=normalized_file_path)
now = utcnow()
item = ElevationDataset(
code=payload.code.strip(),
name=payload.name.strip(),
source=_normalize_str(payload.source),
file_format="csv",
file_format=file_format,
mount_code=payload.mount_code.strip(),
file_path=payload.file_path.strip(),
file_path=normalized_file_path,
resolution_m=payload.resolution_m,
status="active",
notes=_normalize_str(payload.notes),
@@ -236,8 +268,7 @@ def analyze_dataset(
if item.status != "active":
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="高程数据集未启用")
sample_points, warnings = _load_dataset_points(db, item)
stats = _compute_dataset_stats(sample_points)
stats, warnings = _analyze_dataset_content(db, item)
item.sample_count = stats["sample_count"]
item.bbox_min_lon = stats["bbox_min_lon"]
@@ -399,18 +430,30 @@ def execute_apply_job(job_id: str) -> None:
)
return
points, warnings = _load_dataset_points(db, dataset)
if warnings:
warning_note = "; ".join(warnings[:5])
file_format = _resolve_dataset_file_format(dataset)
if file_format == "csv":
points, warnings = _load_dataset_points(db, dataset)
stats = _apply_points_to_line_towers(
db,
line_id=line.id,
dataset=dataset,
mode=job.mode,
points=points,
)
elif file_format in RASTER_FILE_FORMATS:
stats, warnings = _apply_raster_to_line_towers(
db,
line_id=line.id,
dataset=dataset,
mode=job.mode,
)
else:
warning_note = None
stats = _apply_points_to_line_towers(
db,
line_id=line.id,
dataset=dataset,
mode=job.mode,
points=points,
)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"不支持的高程文件格式: {file_format}",
)
warning_note = "; ".join(warnings[:5]) if warnings else None
job.updated_tower_count = stats["updated_tower_count"]
job.skipped_tower_count = stats["skipped_tower_count"]
job.missing_geo_count = stats["missing_geo_count"]
@@ -512,6 +555,22 @@ def _compute_dataset_stats(points: list[ElevationSamplePoint]) -> dict[str, floa
}
def _analyze_dataset_content(
db: Session,
dataset: ElevationDataset,
) -> tuple[dict[str, float | int], list[str]]:
file_format = _resolve_dataset_file_format(dataset)
if file_format == "csv":
points, warnings = _load_dataset_points(db, dataset)
return _compute_dataset_stats(points), warnings
if file_format in RASTER_FILE_FORMATS:
return _compute_raster_stats(db, dataset)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"不支持的高程文件格式: {file_format}",
)
def _apply_points_to_line_towers(
db: Session,
*,
@@ -560,6 +619,7 @@ def _apply_points_to_line_towers(
"dataset_code": dataset.code,
"sample_method": "nearest",
"sample_distance_m": round(distance_m, 3),
"sample_distance_source": "computed",
"sampled_at": utcnow().isoformat(),
}
tower.raw_extra_json = raw_extra
@@ -624,6 +684,120 @@ def _haversine_distance_m(
return 2 * r * math.asin(min(1.0, math.sqrt(h)))
def _detect_file_format(file_path: str) -> str:
extension = Path(file_path).suffix.lower()
file_format = ELEVATION_FILE_EXT_FORMAT_MAP.get(extension)
if not file_format:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"不支持的高程文件类型: {extension or 'unknown'},仅支持 .csv/.img/.tif/.tiff",
)
return file_format
def _resolve_dataset_file_format(dataset: ElevationDataset) -> str:
declared = (dataset.file_format or "").strip().lower()
detected = _detect_file_format(dataset.file_path)
if declared and declared in ELEVATION_FILE_EXT_FORMAT_MAP.values():
if declared == detected:
return declared
if declared in {"img", "tif", "tiff"} and detected in RASTER_FILE_FORMATS:
return detected
return detected
def _open_raster_dataset(
db: Session,
dataset: ElevationDataset,
) -> _OpenedRasterDataset:
file_format = _resolve_dataset_file_format(dataset)
if file_format not in RASTER_FILE_FORMATS:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"当前文件不是栅格高程文件: {dataset.file_path}",
)
try:
import rasterio
except ImportError as exc:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="服务未安装 rasterio,暂不支持 IMG/TIF 高程文件",
) from exc
mount = _require_mount(db, dataset.mount_code)
driver = _build_driver_or_400(mount)
try:
read_result = driver.read_file(dataset.file_path)
except StoragePathNotFoundError as exc:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"高程数据文件不存在: {dataset.file_path}") from exc
except StorageInvalidPathError as exc:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
suffix = Path(dataset.file_path).suffix.lower() or ".img"
temp_path = ""
try:
with NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
tmp.write(read_result.content)
temp_path = tmp.name
opened = rasterio.open(temp_path)
return _OpenedRasterDataset(
rasterio=rasterio,
dataset=opened,
temp_path=temp_path,
)
except Exception as exc:
if temp_path:
try:
Path(temp_path).unlink(missing_ok=True)
except Exception:
pass
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"高程栅格文件解析失败: {dataset.file_path}",
) from exc
def _append_non_wgs84_bounds_warning(*, rasterio: Any, src: Any) -> str | None:
src_crs = src.crs
if src_crs is None:
return "栅格缺少 CRS 定义,默认按 WGS84 经度/纬度采样"
try:
src_crs_obj = rasterio.crs.CRS.from_user_input(src_crs)
except Exception:
return "栅格 CRS 无法识别,默认按 WGS84 经度/纬度采样,建议先校验源数据"
if src_crs_obj.to_string() in {"EPSG:4326", "OGC:CRS84"}:
return None
if bool(getattr(src_crs_obj, "is_geographic", False)):
return None
return (
f"栅格 CRS 为 {src_crs_obj.to_string()},数据集边界框基于该投影坐标,"
"回填时会自动从 WGS84 坐标转换后采样"
)
def _is_masked_value(value: Any) -> bool:
try:
import numpy as np
except ImportError:
return False
return bool(np.ma.is_masked(value))
def _almost_equal(a: float, b: float) -> bool:
return abs(a - b) <= 1e-6
def _is_finite_number(value: float) -> bool:
import math
return math.isfinite(value)
def _is_point_within_bounds(*, x: float, y: float, left: float, right: float, bottom: float, top: float) -> bool:
return left <= x <= right and bottom <= y <= top
def _pick_float(row: dict[str, Any], keys: list[str]) -> float | None:
for key in keys:
value = row.get(key)
@@ -642,6 +816,151 @@ def _decode_csv_bytes(content: bytes) -> str:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="CSV 编码不受支持")
def _compute_raster_stats(
db: Session,
dataset: ElevationDataset,
) -> tuple[dict[str, float | int], list[str]]:
warnings: list[str] = []
with _open_raster_dataset(db, dataset) as opened:
rasterio = opened.rasterio
src = opened.dataset
bounds = src.bounds
warning_text = _append_non_wgs84_bounds_warning(rasterio=rasterio, src=src)
if warning_text:
warnings.append(warning_text)
width = int(src.width or 0)
height = int(src.height or 0)
sample_count = width * height
if sample_count > MAX_SAMPLE_COUNT_INT:
sample_count = MAX_SAMPLE_COUNT_INT
return (
{
"sample_count": sample_count,
"bbox_min_lon": float(bounds.left),
"bbox_max_lon": float(bounds.right),
"bbox_min_lat": float(bounds.bottom),
"bbox_max_lat": float(bounds.top),
},
warnings,
)
def _apply_raster_to_line_towers(
db: Session,
*,
line_id: str,
dataset: ElevationDataset,
mode: str,
) -> tuple[dict[str, int], list[str]]:
towers = db.execute(
select(LineTower)
.where(LineTower.line_id == line_id)
.order_by(LineTower.seq_no.asc(), LineTower.id.asc())
).scalars().all()
updated_tower_count = 0
skipped_tower_count = 0
missing_geo_count = 0
unmatched_count = 0
warnings: list[str] = []
with _open_raster_dataset(db, dataset) as opened:
rasterio = opened.rasterio
src = opened.dataset
warning_text = _append_non_wgs84_bounds_warning(rasterio=rasterio, src=src)
if warning_text:
warnings.append(warning_text)
src_crs = src.crs
band_nodata = src.nodatavals[0] if src.nodatavals else None
for tower in towers:
if tower.longitude is None or tower.latitude is None:
missing_geo_count += 1
continue
if mode == "fill_null_only" and tower.altitude_m is not None:
skipped_tower_count += 1
continue
lon = float(tower.longitude)
lat = float(tower.latitude)
transformed_lon = lon
transformed_lat = lat
if src_crs and str(src_crs) not in {"EPSG:4326", "OGC:CRS84"}:
try:
xs, ys = rasterio.warp.transform(
"EPSG:4326",
src_crs,
[lon],
[lat],
)
transformed_lon = float(xs[0])
transformed_lat = float(ys[0])
except Exception:
unmatched_count += 1
continue
if not _is_point_within_bounds(
x=transformed_lon,
y=transformed_lat,
left=float(src.bounds.left),
right=float(src.bounds.right),
bottom=float(src.bounds.bottom),
top=float(src.bounds.top),
):
unmatched_count += 1
continue
try:
sampled = next(src.sample([(transformed_lon, transformed_lat)], masked=True), None)
except Exception:
sampled = None
if sampled is None or len(sampled) == 0:
unmatched_count += 1
continue
value = sampled[0]
if _is_masked_value(value):
unmatched_count += 1
continue
if band_nodata is not None and _almost_equal(float(value), float(band_nodata)):
unmatched_count += 1
continue
altitude = float(value)
if not _is_finite_number(altitude):
unmatched_count += 1
continue
tower.altitude_m = round(altitude, 3)
raw_extra = dict(tower.raw_extra_json or {})
raw_extra["elevation"] = {
"dataset_id": dataset.id,
"dataset_code": dataset.code,
"sample_method": "raster_pixel",
"sample_distance_m": 0.0,
"sample_distance_source": "pixel_lookup",
"sampled_at": utcnow().isoformat(),
}
tower.raw_extra_json = raw_extra
tower.update_date = utcnow()
updated_tower_count += 1
db.commit()
return (
{
"updated_tower_count": updated_tower_count,
"skipped_tower_count": skipped_tower_count,
"missing_geo_count": missing_geo_count,
"unmatched_count": unmatched_count,
},
warnings,
)
def _normalize_str(value: Any) -> str | None:
if value is None:
return None
+1
View File
@@ -15,4 +15,5 @@ dependencies = [
"python-multipart>=0.0.20,<1.0.0",
"boto3>=1.40.0,<2.0.0",
"celery[redis]>=5.5.0,<6.0.0",
"rasterio>=1.4.0,<2.0.0",
]
+1
View File
@@ -15,3 +15,4 @@ python-multipart==0.0.20
boto3==1.40.59
httpx==0.28.1
celery[redis]==5.5.3
rasterio==1.4.3
+34
View File
@@ -385,3 +385,37 @@
- 当前实现使用 CSV 点集“最近邻采样”,适合先跑通管理与回填流程;不是严格栅格插值方案。
- 未引入 GDAL/rasterio,部署更稳,但精度依赖 CSV 样本密度与坐标质量。
- 回填默认允许 `overwrite_all`,存在覆盖人工高程风险;前端默认展示“仅填空(推荐)”。
## Work Log - 高程管理支持 IMG/TIF 导入与回填(2026-05-01
- 背景:
- 用户上传的高程数据为 `.img`(栅格),现有实现仅支持 CSV 点集,无法直接用于高程回填任务。
- 本次改动(最小闭环):
- 后端服务能力扩展(文件格式识别 + 栅格采样):
- 文件:`api/app/services/elevation_service.py`
- 数据集创建时按扩展名自动识别并落库 `file_format`(支持 `.csv/.img/.tif/.tiff`)。
- 新增“按格式分流”执行:
- `csv`:沿用现有最近邻点集采样逻辑。
- `img/tif/tiff`:新增栅格像元采样逻辑,按杆塔经纬度写回 `power_line_tower.altitude_m`
- 新增栅格分析逻辑:
- 从栅格读取 `width/height/bounds` 回写 `sample_count/bbox`
- 对非 WGS84 CRS 增加告警,并在回填时自动执行坐标转换(WGS84 -> 栅格 CRS)。
- 回填溯源扩展:
- `raw_extra_json.elevation.sample_method` 对栅格标记为 `raster_pixel`
- 增加 `sample_distance_source` 字段(CSV 为 `computed`,栅格为 `pixel_lookup`)。
- 依赖补齐:
- 文件:`api/requirements.txt`,新增 `rasterio==1.4.3`
- 文件:`api/pyproject.toml`,新增 `rasterio>=1.4.0,<2.0.0`
- 前端文案更新:
- 文件:`web/src/app/admin/elevation/page.tsx`
- 页面提示更新为支持 `CSV/IMG/TIF/TIFF`,空态与示例路径同步为栅格可用口径。
- 验证:
- `python3 -m compileall api/app` -> 通过。
- `npm run build:web` -> 通过。
- 构建产物包含路由 `/admin/elevation`
- 风险与影响:
- `.img/.tif` 回填依赖 `rasterio`(及底层 GDAL 运行时),部署环境需确保镜像能成功安装该依赖。
- 栅格 bbox 直接来自源栅格 CRS;非经纬度坐标系场景会返回告警,便于识别与后续治理。
+10 -3
View File
@@ -451,8 +451,15 @@ export default function AdminElevationPage() {
</Space>
)}
>
<Alert
type="info"
showIcon
message="支持文件格式:CSV(点集)/ IMG / TIF / TIFF(栅格)"
description="CSV 需包含经度、纬度、高程列;IMG/TIF 会按塔杆经纬度直接采样。"
className="mb-4"
/>
{datasets.length === 0 ? (
<Empty description="暂无高程数据集,请先上传 CSV 并创建数据集。" />
<Empty description="暂无高程数据集,请先上传 CSV/IMG/TIF 并创建数据集。" />
) : (
<Table<ElevationDatasetSummary>
rowKey={(row) => row.id}
@@ -510,7 +517,7 @@ export default function AdminElevationPage() {
<Input placeholder="dem_china_90m_v1" />
</Form.Item>
<Form.Item name="name" label="名称" rules={[{ required: true, message: "请输入名称" }]}>
<Input placeholder="中国90米DEMCSV" />
<Input placeholder="中国90米DEMIMG" />
</Form.Item>
<Form.Item name="source" label="来源">
<Input placeholder="中科院地理空间数据云" />
@@ -519,7 +526,7 @@ export default function AdminElevationPage() {
<Input placeholder="main" />
</Form.Item>
<Form.Item name="file_path" label="文件路径" rules={[{ required: true, message: "请输入文件路径" }]}>
<Input placeholder="/elevation/datasets/china90m.csv" />
<Input placeholder="/elevation/datasets/china90m.img" />
</Form.Item>
<Form.Item name="resolution_m" label="分辨率(米)">
<InputNumber className="w-full" min={1} max={10000} />