[fix]:[FL-52][ATP文本转换与预览导入ATP后中文乱码]
Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
@@ -92,3 +92,27 @@
|
|||||||
|
|
||||||
- 风险与关注点:
|
- 风险与关注点:
|
||||||
- 当前本地环境仍不具备完整后端依赖,无法直接回归所有 FastAPI/SQLAlchemy 相关测试;本次验证聚焦在 Wine 探测逻辑和语法层面。
|
- 当前本地环境仍不具备完整后端依赖,无法直接回归所有 FastAPI/SQLAlchemy 相关测试;本次验证聚焦在 Wine 探测逻辑和语法层面。
|
||||||
|
|
||||||
|
## Work Log - 修复 ATP 文本上传中文乱码(2026-06-08)
|
||||||
|
|
||||||
|
- 背景:
|
||||||
|
- `ATP文本转换与预览` 页面上传 `.atp/.txt` 文件时直接使用 `file.text()`,浏览器会按 UTF-8 解码。
|
||||||
|
- 现场 ATP 文件常见为 Windows 导出的 GBK/GB18030 文本,导致导入后编辑区和转换预览中的中文注释/名称乱码。
|
||||||
|
|
||||||
|
- 本次处理:
|
||||||
|
- `web/src/lib/text-file.ts`
|
||||||
|
- 新增前端文本解码工具,优先识别 BOM、兼容无 BOM 的 UTF-16,并在 UTF-8 严格解码失败时回退到 `GB18030`。
|
||||||
|
- `web/src/app/admin/power-lines/atp-viewer/page.tsx`
|
||||||
|
- 上传 ATP 文本时改为基于 `arrayBuffer + TextDecoder` 自动判定编码,不再固定走 UTF-8。
|
||||||
|
- `web/src/lib/text-file.test.js`
|
||||||
|
- 补充最小测试,覆盖 `UTF-8`、`GB18030(兼容 GBK)`、无 BOM `UTF-16LE` 三类输入。
|
||||||
|
|
||||||
|
- 验证:
|
||||||
|
- 基线:`npm_config_cache=/tmp/npm-cache npm --workspace web exec tsc --noEmit` 通过;`npm_config_cache=/tmp/npm-cache npm --workspace web exec eslint src/app/admin/power-lines/atp-viewer/page.tsx` 通过。
|
||||||
|
- 修改后:
|
||||||
|
- `npm_config_cache=/tmp/npm-cache npm --workspace web exec tsc --noEmit`
|
||||||
|
- `npm_config_cache=/tmp/npm-cache npm --workspace web exec eslint src/app/admin/power-lines/atp-viewer/page.tsx src/lib/text-file.ts src/lib/text-file.test.js`
|
||||||
|
- `node --test web/src/lib/text-file.test.js`
|
||||||
|
|
||||||
|
- 风险与关注点:
|
||||||
|
- 已经以错误编码写入数据库的历史 ATP 文本不会被自动修复;本次修复只覆盖后续上传与预览入口。
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ import { useTopicSubscription } from "@/hooks/use-topic-subscription";
|
|||||||
import { readApiError } from "@/lib/api";
|
import { readApiError } from "@/lib/api";
|
||||||
import { parseAtpTextToGraphJson, stringifyAtpGraphJson } from "@/lib/atp/parse-atp-text";
|
import { parseAtpTextToGraphJson, stringifyAtpGraphJson } from "@/lib/atp/parse-atp-text";
|
||||||
import { ATP_SAMPLE_TEXT } from "@/lib/atp/sample";
|
import { ATP_SAMPLE_TEXT } from "@/lib/atp/sample";
|
||||||
|
import { readTextFile } from "@/lib/text-file";
|
||||||
import type { AtpGraphJson } from "@/lib/atp/types";
|
import type { AtpGraphJson } from "@/lib/atp/types";
|
||||||
import type {
|
import type {
|
||||||
AtpEngineStatusResponse,
|
AtpEngineStatusResponse,
|
||||||
@@ -611,7 +612,7 @@ export default function PowerLinesAtpViewerPage() {
|
|||||||
|
|
||||||
const handleFileSelected = async (file: File) => {
|
const handleFileSelected = async (file: File) => {
|
||||||
try {
|
try {
|
||||||
const content = await file.text();
|
const { text: content } = await readTextFile(file);
|
||||||
setSourceText(content);
|
setSourceText(content);
|
||||||
versionForm.setFieldValue("atp_text", content);
|
versionForm.setFieldValue("atp_text", content);
|
||||||
setParseError("");
|
setParseError("");
|
||||||
|
|||||||
@@ -0,0 +1,28 @@
|
|||||||
|
import assert from "node:assert/strict";
|
||||||
|
import test from "node:test";
|
||||||
|
|
||||||
|
import { decodeTextBytes } from "./text-file.ts";
|
||||||
|
|
||||||
|
test("decodeTextBytes keeps utf-8 ATP text intact", () => {
|
||||||
|
const bytes = new TextEncoder().encode("中文ATP线路");
|
||||||
|
const decoded = decodeTextBytes(bytes);
|
||||||
|
|
||||||
|
assert.equal(decoded.encoding, "utf-8");
|
||||||
|
assert.equal(decoded.text, "中文ATP线路");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("decodeTextBytes falls back to gb18030 for gbk ATP text", () => {
|
||||||
|
const bytes = Uint8Array.from([214, 208, 206, 196, 65, 84, 80, 207, 223, 194, 183]);
|
||||||
|
const decoded = decodeTextBytes(bytes);
|
||||||
|
|
||||||
|
assert.equal(decoded.encoding, "gb18030");
|
||||||
|
assert.equal(decoded.text, "中文ATP线路");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("decodeTextBytes detects utf-16le ATP text without bom", () => {
|
||||||
|
const bytes = Uint8Array.from(Buffer.from("ATP线路", "utf16le"));
|
||||||
|
const decoded = decodeTextBytes(bytes);
|
||||||
|
|
||||||
|
assert.equal(decoded.encoding, "utf-16le");
|
||||||
|
assert.equal(decoded.text, "ATP线路");
|
||||||
|
});
|
||||||
@@ -0,0 +1,120 @@
|
|||||||
|
export type DecodedTextEncoding = "utf-8" | "utf-16le" | "utf-16be" | "gb18030";
|
||||||
|
|
||||||
|
export type DecodedTextFile = {
|
||||||
|
text: string;
|
||||||
|
encoding: DecodedTextEncoding;
|
||||||
|
};
|
||||||
|
|
||||||
|
const UTF8_BOM = Uint8Array.from([0xef, 0xbb, 0xbf]);
|
||||||
|
const UTF16_LE_BOM = Uint8Array.from([0xff, 0xfe]);
|
||||||
|
const UTF16_BE_BOM = Uint8Array.from([0xfe, 0xff]);
|
||||||
|
|
||||||
|
function startsWithBom(bytes: Uint8Array, bom: Uint8Array): boolean {
|
||||||
|
if (bytes.length < bom.length) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return bom.every((value, index) => bytes[index] === value);
|
||||||
|
}
|
||||||
|
|
||||||
|
function tryDecode(
|
||||||
|
bytes: Uint8Array,
|
||||||
|
encoding: DecodedTextEncoding,
|
||||||
|
options?: TextDecoderOptions,
|
||||||
|
): string | null {
|
||||||
|
try {
|
||||||
|
return new TextDecoder(encoding, options).decode(bytes);
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectUtf16WithoutBom(bytes: Uint8Array): DecodedTextEncoding | null {
|
||||||
|
if (bytes.length < 8) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const sampleSize = Math.min(bytes.length, 512);
|
||||||
|
let evenZeroCount = 0;
|
||||||
|
let oddZeroCount = 0;
|
||||||
|
let evenCount = 0;
|
||||||
|
let oddCount = 0;
|
||||||
|
|
||||||
|
// ATP 文本通常以 ASCII 关键字开头,交替空字节是无 BOM UTF-16 的明显特征。
|
||||||
|
for (let index = 0; index < sampleSize; index += 1) {
|
||||||
|
if (index % 2 === 0) {
|
||||||
|
evenCount += 1;
|
||||||
|
if (bytes[index] === 0) {
|
||||||
|
evenZeroCount += 1;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
oddCount += 1;
|
||||||
|
if (bytes[index] === 0) {
|
||||||
|
oddZeroCount += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const evenZeroRatio = evenCount === 0 ? 0 : evenZeroCount / evenCount;
|
||||||
|
const oddZeroRatio = oddCount === 0 ? 0 : oddZeroCount / oddCount;
|
||||||
|
|
||||||
|
if (oddZeroRatio >= 0.3 && evenZeroRatio <= 0.05) {
|
||||||
|
return "utf-16le";
|
||||||
|
}
|
||||||
|
if (evenZeroRatio >= 0.3 && oddZeroRatio <= 0.05) {
|
||||||
|
return "utf-16be";
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function decodeTextBytes(bytes: Uint8Array): DecodedTextFile {
|
||||||
|
if (bytes.length === 0) {
|
||||||
|
return { text: "", encoding: "utf-8" };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (startsWithBom(bytes, UTF8_BOM)) {
|
||||||
|
return {
|
||||||
|
text: tryDecode(bytes, "utf-8") ?? "",
|
||||||
|
encoding: "utf-8",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (startsWithBom(bytes, UTF16_LE_BOM)) {
|
||||||
|
return {
|
||||||
|
text: tryDecode(bytes, "utf-16le") ?? "",
|
||||||
|
encoding: "utf-16le",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (startsWithBom(bytes, UTF16_BE_BOM)) {
|
||||||
|
return {
|
||||||
|
text: tryDecode(bytes, "utf-16be") ?? "",
|
||||||
|
encoding: "utf-16be",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const utf16Encoding = detectUtf16WithoutBom(bytes);
|
||||||
|
if (utf16Encoding) {
|
||||||
|
const utf16Text = tryDecode(bytes, utf16Encoding);
|
||||||
|
if (utf16Text !== null) {
|
||||||
|
return { text: utf16Text, encoding: utf16Encoding };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const utf8Text = tryDecode(bytes, "utf-8", { fatal: true });
|
||||||
|
if (utf8Text !== null) {
|
||||||
|
return { text: utf8Text, encoding: "utf-8" };
|
||||||
|
}
|
||||||
|
|
||||||
|
const gb18030Text = tryDecode(bytes, "gb18030");
|
||||||
|
if (gb18030Text !== null) {
|
||||||
|
return { text: gb18030Text, encoding: "gb18030" };
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error("无法识别文件编码,请将 ATP 文本另存为 UTF-8 或 GB18030 后重试。");
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function readTextFile(file: Blob): Promise<DecodedTextFile> {
|
||||||
|
const buffer = new Uint8Array(await file.arrayBuffer());
|
||||||
|
return decodeTextBytes(buffer);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user