fix(editor): should add HTTP protocol into link automatically (#11934)

Closes: [BS-3291](https://linear.app/affine-design/issue/BS-3291/工具栏展开时报错，链接无法点击打开)  ## Summary by CodeRabbit - **New Features** - URLs entered without a protocol (e.g., "github.com/...") are now automatically normalized to use "https://", ensuring links are secure and consistently formatted. - **Bug Fixes** - Improved handling and validation of links to prevent issues with missing or invalid protocols in bookmarks and inline links. - Simplified URL validation logic by leveraging native URL parsing, removing complex regex and email-specific checks. - Streamlined toolbar link actions to operate only on valid normalized URLs. - Refined URL detection in markdown preprocessing to exclude lines containing spaces from being treated as URLs. - **Tests** - Added tests to verify that links without a protocol are correctly normalized and displayed across different views. - Updated URL validation tests to better reflect valid and invalid URL formats, including IP addresses and domain variants. - **Style** - Updated snapshots to reflect the use of "https://" in links.
2026-02-14 13:25:12 +00:00 · 2025-05-19 17:05:06 +00:00
parent 4d6a3731a3
commit fd838d4e2d
10 changed files with 160 additions and 120 deletions
--- a/blocksuite/affine/shared/src/tests/utils/url.unit.spec.ts
+++ b/blocksuite/affine/shared/src/tests/utils/url.unit.spec.ts
@@ -29,13 +29,13 @@ describe('isValidUrl: determining whether a URL is valid is very complicated', (
    expect(isValidUrl('www.example.com')).toEqual(true);
    expect(isValidUrl('example.co')).toEqual(true);
    expect(isValidUrl('example.cm')).toEqual(true);
-    expect(isValidUrl('1.1.1.1')).toEqual(true);
+    expect(isValidUrl('1.1.1.1')).toEqual(false);

    expect(isValidUrl('example.c')).toEqual(false);
  });

  test('special cases', () => {
-    expect(isValidUrl('example.com.')).toEqual(true);
+    expect(isValidUrl('example.com.')).toEqual(false);

    // I don't know why
    // private & local networks is excluded
@@ -44,8 +44,8 @@ describe('isValidUrl: determining whether a URL is valid is very complicated', (
    expect(isValidUrl('localhost')).toEqual(false);
    expect(isValidUrl('0.0.0.0')).toEqual(false);

-    expect(isValidUrl('128.0.0.1')).toEqual(true);
-    expect(isValidUrl('1.0.0.1')).toEqual(true);
+    expect(isValidUrl('128.0.0.1')).toEqual(false);
+    expect(isValidUrl('1.0.0.1')).toEqual(false);
  });

  test('email link is a valid URL', () => {
--- a/blocksuite/affine/shared/src/utils/url.ts
+++ b/blocksuite/affine/shared/src/utils/url.ts
@@ -1,75 +1,66 @@
-export const ALLOWED_SCHEMES = [
+// https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
+const ALLOWED_SCHEMES = new Set([
  'http',
  'https',
  'ftp',
  'sftp',
  'mailto',
  'tel',
-  // may need support other schemes
-];
-// I guess you don't want to use the regex base the RFC 5322 Official Standard
-// For more detail see https://stackoverflow.com/questions/201323/how-can-i-validate-an-email-address-using-a-regular-expression/1917982#1917982
-const MAIL_REGEX =
-  /^[a-zA-Z0-9.!#$%&’*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*$/;
+]);

-// For more detail see https://stackoverflow.com/questions/8667070/javascript-regular-expression-to-validate-url
-const URL_REGEX = new RegExp(
-  '^' +
-    // protocol identifier (optional)
-    // short syntax // still required
-    '(?:(?:(?:https?|ftp):)?\\/\\/)' +
-    // user:pass BasicAuth (optional)
-    '(?:\\S+(?::\\S*)?@)?' +
-    '(?:' +
-    // IP address exclusion
-    // private & local networks
-    '(?!(?:10|127)(?:\\.\\d{1,3}){3})' +
-    '(?!(?:169\\.254|192\\.168)(?:\\.\\d{1,3}){2})' +
-    '(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})' +
-    // IP address dotted notation octets
-    // excludes loopback network 0.0.0.0
-    // excludes reserved space >= 224.0.0.0
-    // excludes network & broadcast addresses
-    // (first & last IP address of each class)
-    '(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])' +
-    '(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}' +
-    '(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))' +
-    '|' +
-    // host & domain names, may end with dot
-    // can be replaced by a shortest alternative
-    // (?![-_])(?:[-\\w\\u00a1-\\uffff]{0,63}[^-_]\\.)+
-    '(?:' +
-    '(?:' +
-    '[a-z0-9\\u00a1-\\uffff]' +
-    '[a-z0-9\\u00a1-\\uffff_-]{0,62}' +
-    ')?' +
-    '[a-z0-9\\u00a1-\\uffff]\\.' +
-    ')+' +
-    // TLD identifier name, may end with dot
-    // Addition: We limit the TLD to 2-6 characters, because it can cover most of the cases.
-    '(?:[a-z\\u00a1-\\uffff]{2,6}\\.?)' +
-    ')' +
-    // port number (optional)
-    '(?::\\d{2,5})?' +
-    // resource path (optional)
-    '(?:[/?#]\\S*)?' +
-    '$',
-  'i'
-);
+// https://publicsuffix.org/
+const TLD_REGEXP = /(?:\.[a-zA-Z]+)?(\.[a-zA-Z]{2,})$/;

-export function normalizeUrl(url: string) {
-  const includeScheme = ALLOWED_SCHEMES.find(scheme =>
-    url.startsWith(scheme + ':')
-  );
-  if (includeScheme) {
-    // Any link include schema is a valid url
-    return url;
+const toURL = (str: string) => {
+  try {
+    if (!URL.canParse(str)) return null;
+
+    return new URL(str);
+  } catch {
+    return null;
  }
-  const isEmail = MAIL_REGEX.test(url);
-  if (isEmail) {
-    return 'mailto:' + url;
+};
+
+function resolveURL(str: string) {
+  const url = toURL(str);
+  if (!url) return null;
+
+  const protocol = url.protocol.substring(0, url.protocol.length - 1);
+  const hostname = url.hostname;
+
+  let allowed = ALLOWED_SCHEMES.has(protocol);
+  if (allowed && hostname.includes('.')) {
+    allowed = TLD_REGEXP.test(hostname);
  }
-  return 'http://' + url;
+
+  return { url, allowed };
+}
+
+export function normalizeUrl(str: string) {
+  str = str.trim();
+
+  let url = toURL(str);
+
+  if (!url) {
+    const hasScheme = str.match(/^https?:\/\//);
+
+    if (!hasScheme) {
+      const dotIdx = str.indexOf('.');
+      if (dotIdx > 0 && dotIdx < str.length - 1) {
+        url = toURL(`https://${str}`);
+      }
+    }
+  }
+
+  // Formatted
+  if (url) {
+    if (!str.endsWith('/') && url.href.endsWith('/')) {
+      return url.href.substring(0, url.href.length - 1);
+    }
+    return url.href;
+  }
+
+  return str;
 }

 /**
@@ -78,20 +69,23 @@ export function normalizeUrl(url: string) {
 * For more detail see https://www.ietf.org/rfc/rfc1738.txt
 */
 export function isValidUrl(str: string) {
-  if (!str) {
-    return false;
-  }
-  const url = normalizeUrl(str);
-  if (url === str) {
-    // Skip check if user input scheme manually
-    try {
-      new URL(url);
-    } catch {
-      return false;
+  str = str.trim();
+
+  let result = resolveURL(str);
+
+  if (result && !result.allowed) return false;
+
+  if (!result) {
+    const hasScheme = str.match(/^https?:\/\//);
+    if (!hasScheme) {
+      const dotIdx = str.indexOf('.');
+      if (dotIdx > 0 && dotIdx < str.length - 1) {
+        result = resolveURL(`https://${str}`);
+      }
    }
-    return true;
  }
-  return URL_REGEX.test(url);
+
+  return result?.allowed ?? false;
 }

 // https://en.wikipedia.org/wiki/Top-level_domain
@@ -119,10 +113,7 @@ const COMMON_TLDS = new Set([
 ]);

 function isCommonTLD(url: URL) {
-  const tld = url.hostname.split('.').pop();
-  if (!tld) {
-    return false;
-  }
+  const tld = url.hostname.split('.').pop() ?? '';
  return COMMON_TLDS.has(tld);
 }