fix(editor): fix import zip with cjk filename (#13458)

fix #12721  ## Summary by CodeRabbit * **Bug Fixes** * Improved handling of non‑ASCII filenames when unzipping archives: the extractor now tries alternative encodings and validates results so filenames are preserved and displayed correctly after extraction. This change reduces corrupt or garbled names while keeping existing extraction behavior otherwise unchanged.
2026-02-04 08:38:34 +00:00 · 2025-08-12 10:51:23 +08:00
parent 65f679c4f0
commit ef99c376ec
1 changed files with 96 additions and 1 deletions
--- a/blocksuite/affine/widgets/linked-doc/src/transformers/utils.ts
+++ b/blocksuite/affine/widgets/linked-doc/src/transformers/utils.ts
@@ -65,6 +65,98 @@ export class Unzip {
    this.unzipped = fflate.unzipSync(new Uint8Array(await blob.arrayBuffer()));
  }

+  private fixFileNameEncoding(fileName: string): string {
+    try {
+      // check if contains non-ASCII characters
+      if (fileName.split('').some(char => char.charCodeAt(0) > 127)) {
+        // try different encodings
+        const fixedName = this.tryDifferentEncodings(fileName);
+        if (fixedName && fixedName !== fileName) {
+          return fixedName;
+        }
+      }
+      return fileName;
+    } catch {
+      return fileName;
+    }
+  }
+
+  // try different encodings
+  private tryDifferentEncodings(fileName: string): string | null {
+    try {
+      // convert string to bytes
+      const bytes = new Uint8Array(fileName.length);
+      for (let i = 0; i < fileName.length; i++) {
+        bytes[i] = fileName.charCodeAt(i);
+      }
+
+      // try different encodings
+      // The macOS system zip tool creates archives with UTF-8 encoded filenames.
+      // However, this implementation doesn't strictly adhere to the ZIP specification.
+      // Simply forcing UTF-8 encoding when unzipping should resolve filename corruption issues.
+      const encodings = ['utf-8'];
+
+      for (const encoding of encodings) {
+        try {
+          const decoder = new TextDecoder(encoding);
+          const result = decoder.decode(bytes);
+
+          // check if decoded result is valid
+          if (result && this.isValidDecodedString(result)) {
+            return result;
+          }
+        } catch {
+          // ignore encoding error, try next encoding
+        }
+      }
+    } catch {
+      // ignore conversion error
+    }
+
+    return null;
+  }
+
+  // check if decoded string is valid
+  private isValidDecodedString(str: string): boolean {
+    // check if contains control characters
+    const controlCharCodes = new Set([
+      0x00,
+      0x01,
+      0x02,
+      0x03,
+      0x04,
+      0x05,
+      0x06,
+      0x07,
+      0x08, // \x00-\x08
+      0x0b,
+      0x0c, // \x0B, \x0C
+      0x0e,
+      0x0f,
+      0x10,
+      0x11,
+      0x12,
+      0x13,
+      0x14,
+      0x15,
+      0x16,
+      0x17,
+      0x18,
+      0x19,
+      0x1a,
+      0x1b,
+      0x1c,
+      0x1d,
+      0x1e,
+      0x1f, // \x0E-\x1F
+      0x7f, // \x7F
+    ]);
+
+    return !str
+      .split('')
+      .some(char => controlCharCodes.has(char.charCodeAt(0)));
+  }
+
  *[Symbol.iterator]() {
    const keys = Object.keys(this.unzipped ?? {});
    let index = 0;
@@ -81,7 +173,10 @@ export class Unzip {
      const content = new File([this.unzipped![path]], fileName, {
        type: mime ?? '',
      }) as Blob;
-      yield { path, content, index };
+
+      const fixedPath = this.fixFileNameEncoding(path);
+
+      yield { path: fixedPath, content, index };
      index++;
    }
  }