[fix]:[FL-52][ATP文本转换与预览导入ATP后中文乱码]

Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
chengkai3
2026-06-08 23:39:37 +08:00
parent c661591b7c
commit 8dc443b2b7
4 changed files with 174 additions and 1 deletions
+24
View File
@@ -92,3 +92,27 @@
- 风险与关注点: - 风险与关注点:
- 当前本地环境仍不具备完整后端依赖,无法直接回归所有 FastAPI/SQLAlchemy 相关测试;本次验证聚焦在 Wine 探测逻辑和语法层面。 - 当前本地环境仍不具备完整后端依赖,无法直接回归所有 FastAPI/SQLAlchemy 相关测试;本次验证聚焦在 Wine 探测逻辑和语法层面。
## Work Log - 修复 ATP 文本上传中文乱码(2026-06-08
- 背景:
- `ATP文本转换与预览` 页面上传 `.atp/.txt` 文件时直接使用 `file.text()`,浏览器会按 UTF-8 解码。
- 现场 ATP 文件常见为 Windows 导出的 GBK/GB18030 文本,导致导入后编辑区和转换预览中的中文注释/名称乱码。
- 本次处理:
- `web/src/lib/text-file.ts`
- 新增前端文本解码工具,优先识别 BOM、兼容无 BOM 的 UTF-16,并在 UTF-8 严格解码失败时回退到 `GB18030`
- `web/src/app/admin/power-lines/atp-viewer/page.tsx`
- 上传 ATP 文本时改为基于 `arrayBuffer + TextDecoder` 自动判定编码,不再固定走 UTF-8。
- `web/src/lib/text-file.test.js`
- 补充最小测试,覆盖 `UTF-8``GB18030(兼容 GBK)`、无 BOM `UTF-16LE` 三类输入。
- 验证:
- 基线:`npm_config_cache=/tmp/npm-cache npm --workspace web exec tsc --noEmit` 通过;`npm_config_cache=/tmp/npm-cache npm --workspace web exec eslint src/app/admin/power-lines/atp-viewer/page.tsx` 通过。
- 修改后:
- `npm_config_cache=/tmp/npm-cache npm --workspace web exec tsc --noEmit`
- `npm_config_cache=/tmp/npm-cache npm --workspace web exec eslint src/app/admin/power-lines/atp-viewer/page.tsx src/lib/text-file.ts src/lib/text-file.test.js`
- `node --test web/src/lib/text-file.test.js`
- 风险与关注点:
- 已经以错误编码写入数据库的历史 ATP 文本不会被自动修复;本次修复只覆盖后续上传与预览入口。
@@ -30,6 +30,7 @@ import { useTopicSubscription } from "@/hooks/use-topic-subscription";
import { readApiError } from "@/lib/api"; import { readApiError } from "@/lib/api";
import { parseAtpTextToGraphJson, stringifyAtpGraphJson } from "@/lib/atp/parse-atp-text"; import { parseAtpTextToGraphJson, stringifyAtpGraphJson } from "@/lib/atp/parse-atp-text";
import { ATP_SAMPLE_TEXT } from "@/lib/atp/sample"; import { ATP_SAMPLE_TEXT } from "@/lib/atp/sample";
import { readTextFile } from "@/lib/text-file";
import type { AtpGraphJson } from "@/lib/atp/types"; import type { AtpGraphJson } from "@/lib/atp/types";
import type { import type {
AtpEngineStatusResponse, AtpEngineStatusResponse,
@@ -611,7 +612,7 @@ export default function PowerLinesAtpViewerPage() {
const handleFileSelected = async (file: File) => { const handleFileSelected = async (file: File) => {
try { try {
const content = await file.text(); const { text: content } = await readTextFile(file);
setSourceText(content); setSourceText(content);
versionForm.setFieldValue("atp_text", content); versionForm.setFieldValue("atp_text", content);
setParseError(""); setParseError("");
+28
View File
@@ -0,0 +1,28 @@
import assert from "node:assert/strict";
import test from "node:test";
import { decodeTextBytes } from "./text-file.ts";
test("decodeTextBytes keeps utf-8 ATP text intact", () => {
const bytes = new TextEncoder().encode("中文ATP线路");
const decoded = decodeTextBytes(bytes);
assert.equal(decoded.encoding, "utf-8");
assert.equal(decoded.text, "中文ATP线路");
});
test("decodeTextBytes falls back to gb18030 for gbk ATP text", () => {
const bytes = Uint8Array.from([214, 208, 206, 196, 65, 84, 80, 207, 223, 194, 183]);
const decoded = decodeTextBytes(bytes);
assert.equal(decoded.encoding, "gb18030");
assert.equal(decoded.text, "中文ATP线路");
});
test("decodeTextBytes detects utf-16le ATP text without bom", () => {
const bytes = Uint8Array.from(Buffer.from("ATP线路", "utf16le"));
const decoded = decodeTextBytes(bytes);
assert.equal(decoded.encoding, "utf-16le");
assert.equal(decoded.text, "ATP线路");
});
+120
View File
@@ -0,0 +1,120 @@
export type DecodedTextEncoding = "utf-8" | "utf-16le" | "utf-16be" | "gb18030";
export type DecodedTextFile = {
text: string;
encoding: DecodedTextEncoding;
};
const UTF8_BOM = Uint8Array.from([0xef, 0xbb, 0xbf]);
const UTF16_LE_BOM = Uint8Array.from([0xff, 0xfe]);
const UTF16_BE_BOM = Uint8Array.from([0xfe, 0xff]);
function startsWithBom(bytes: Uint8Array, bom: Uint8Array): boolean {
if (bytes.length < bom.length) {
return false;
}
return bom.every((value, index) => bytes[index] === value);
}
function tryDecode(
bytes: Uint8Array,
encoding: DecodedTextEncoding,
options?: TextDecoderOptions,
): string | null {
try {
return new TextDecoder(encoding, options).decode(bytes);
} catch {
return null;
}
}
function detectUtf16WithoutBom(bytes: Uint8Array): DecodedTextEncoding | null {
if (bytes.length < 8) {
return null;
}
const sampleSize = Math.min(bytes.length, 512);
let evenZeroCount = 0;
let oddZeroCount = 0;
let evenCount = 0;
let oddCount = 0;
// ATP 文本通常以 ASCII 关键字开头,交替空字节是无 BOM UTF-16 的明显特征。
for (let index = 0; index < sampleSize; index += 1) {
if (index % 2 === 0) {
evenCount += 1;
if (bytes[index] === 0) {
evenZeroCount += 1;
}
continue;
}
oddCount += 1;
if (bytes[index] === 0) {
oddZeroCount += 1;
}
}
const evenZeroRatio = evenCount === 0 ? 0 : evenZeroCount / evenCount;
const oddZeroRatio = oddCount === 0 ? 0 : oddZeroCount / oddCount;
if (oddZeroRatio >= 0.3 && evenZeroRatio <= 0.05) {
return "utf-16le";
}
if (evenZeroRatio >= 0.3 && oddZeroRatio <= 0.05) {
return "utf-16be";
}
return null;
}
export function decodeTextBytes(bytes: Uint8Array): DecodedTextFile {
if (bytes.length === 0) {
return { text: "", encoding: "utf-8" };
}
if (startsWithBom(bytes, UTF8_BOM)) {
return {
text: tryDecode(bytes, "utf-8") ?? "",
encoding: "utf-8",
};
}
if (startsWithBom(bytes, UTF16_LE_BOM)) {
return {
text: tryDecode(bytes, "utf-16le") ?? "",
encoding: "utf-16le",
};
}
if (startsWithBom(bytes, UTF16_BE_BOM)) {
return {
text: tryDecode(bytes, "utf-16be") ?? "",
encoding: "utf-16be",
};
}
const utf16Encoding = detectUtf16WithoutBom(bytes);
if (utf16Encoding) {
const utf16Text = tryDecode(bytes, utf16Encoding);
if (utf16Text !== null) {
return { text: utf16Text, encoding: utf16Encoding };
}
}
const utf8Text = tryDecode(bytes, "utf-8", { fatal: true });
if (utf8Text !== null) {
return { text: utf8Text, encoding: "utf-8" };
}
const gb18030Text = tryDecode(bytes, "gb18030");
if (gb18030Text !== null) {
return { text: gb18030Text, encoding: "gb18030" };
}
throw new Error("无法识别文件编码,请将 ATP 文本另存为 UTF-8 或 GB18030 后重试。");
}
export async function readTextFile(file: Blob): Promise<DecodedTextFile> {
const buffer = new Uint8Array(await file.arrayBuffer());
return decodeTextBytes(buffer);
}