mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-04 08:38:34 +00:00
fix(editor): fix import zip with cjk filename (#13458)
fix #12721 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Bug Fixes** * Improved handling of non‑ASCII filenames when unzipping archives: the extractor now tries alternative encodings and validates results so filenames are preserved and displayed correctly after extraction. This change reduces corrupt or garbled names while keeping existing extraction behavior otherwise unchanged. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -65,6 +65,98 @@ export class Unzip {
|
||||
this.unzipped = fflate.unzipSync(new Uint8Array(await blob.arrayBuffer()));
|
||||
}
|
||||
|
||||
private fixFileNameEncoding(fileName: string): string {
|
||||
try {
|
||||
// check if contains non-ASCII characters
|
||||
if (fileName.split('').some(char => char.charCodeAt(0) > 127)) {
|
||||
// try different encodings
|
||||
const fixedName = this.tryDifferentEncodings(fileName);
|
||||
if (fixedName && fixedName !== fileName) {
|
||||
return fixedName;
|
||||
}
|
||||
}
|
||||
return fileName;
|
||||
} catch {
|
||||
return fileName;
|
||||
}
|
||||
}
|
||||
|
||||
// try different encodings
|
||||
private tryDifferentEncodings(fileName: string): string | null {
|
||||
try {
|
||||
// convert string to bytes
|
||||
const bytes = new Uint8Array(fileName.length);
|
||||
for (let i = 0; i < fileName.length; i++) {
|
||||
bytes[i] = fileName.charCodeAt(i);
|
||||
}
|
||||
|
||||
// try different encodings
|
||||
// The macOS system zip tool creates archives with UTF-8 encoded filenames.
|
||||
// However, this implementation doesn't strictly adhere to the ZIP specification.
|
||||
// Simply forcing UTF-8 encoding when unzipping should resolve filename corruption issues.
|
||||
const encodings = ['utf-8'];
|
||||
|
||||
for (const encoding of encodings) {
|
||||
try {
|
||||
const decoder = new TextDecoder(encoding);
|
||||
const result = decoder.decode(bytes);
|
||||
|
||||
// check if decoded result is valid
|
||||
if (result && this.isValidDecodedString(result)) {
|
||||
return result;
|
||||
}
|
||||
} catch {
|
||||
// ignore encoding error, try next encoding
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// ignore conversion error
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// check if decoded string is valid
|
||||
private isValidDecodedString(str: string): boolean {
|
||||
// check if contains control characters
|
||||
const controlCharCodes = new Set([
|
||||
0x00,
|
||||
0x01,
|
||||
0x02,
|
||||
0x03,
|
||||
0x04,
|
||||
0x05,
|
||||
0x06,
|
||||
0x07,
|
||||
0x08, // \x00-\x08
|
||||
0x0b,
|
||||
0x0c, // \x0B, \x0C
|
||||
0x0e,
|
||||
0x0f,
|
||||
0x10,
|
||||
0x11,
|
||||
0x12,
|
||||
0x13,
|
||||
0x14,
|
||||
0x15,
|
||||
0x16,
|
||||
0x17,
|
||||
0x18,
|
||||
0x19,
|
||||
0x1a,
|
||||
0x1b,
|
||||
0x1c,
|
||||
0x1d,
|
||||
0x1e,
|
||||
0x1f, // \x0E-\x1F
|
||||
0x7f, // \x7F
|
||||
]);
|
||||
|
||||
return !str
|
||||
.split('')
|
||||
.some(char => controlCharCodes.has(char.charCodeAt(0)));
|
||||
}
|
||||
|
||||
*[Symbol.iterator]() {
|
||||
const keys = Object.keys(this.unzipped ?? {});
|
||||
let index = 0;
|
||||
@@ -81,7 +173,10 @@ export class Unzip {
|
||||
const content = new File([this.unzipped![path]], fileName, {
|
||||
type: mime ?? '',
|
||||
}) as Blob;
|
||||
yield { path, content, index };
|
||||
|
||||
const fixedPath = this.fixFileNameEncoding(path);
|
||||
|
||||
yield { path: fixedPath, content, index };
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user