mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-14 13:25:12 +00:00
fix(editor): should add HTTP protocol into link automatically (#11934)
Closes: [BS-3291](https://linear.app/affine-design/issue/BS-3291/工具栏展开时报错,链接无法点击打开) <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - URLs entered without a protocol (e.g., "github.com/...") are now automatically normalized to use "https://", ensuring links are secure and consistently formatted. - **Bug Fixes** - Improved handling and validation of links to prevent issues with missing or invalid protocols in bookmarks and inline links. - Simplified URL validation logic by leveraging native URL parsing, removing complex regex and email-specific checks. - Streamlined toolbar link actions to operate only on valid normalized URLs. - Refined URL detection in markdown preprocessing to exclude lines containing spaces from being treated as URLs. - **Tests** - Added tests to verify that links without a protocol are correctly normalized and displayed across different views. - Updated URL validation tests to better reflect valid and invalid URL formats, including IP addresses and domain variants. - **Style** - Updated snapshots to reflect the use of "https://" in links. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -29,13 +29,13 @@ describe('isValidUrl: determining whether a URL is valid is very complicated', (
|
||||
expect(isValidUrl('www.example.com')).toEqual(true);
|
||||
expect(isValidUrl('example.co')).toEqual(true);
|
||||
expect(isValidUrl('example.cm')).toEqual(true);
|
||||
expect(isValidUrl('1.1.1.1')).toEqual(true);
|
||||
expect(isValidUrl('1.1.1.1')).toEqual(false);
|
||||
|
||||
expect(isValidUrl('example.c')).toEqual(false);
|
||||
});
|
||||
|
||||
test('special cases', () => {
|
||||
expect(isValidUrl('example.com.')).toEqual(true);
|
||||
expect(isValidUrl('example.com.')).toEqual(false);
|
||||
|
||||
// I don't know why
|
||||
// private & local networks is excluded
|
||||
@@ -44,8 +44,8 @@ describe('isValidUrl: determining whether a URL is valid is very complicated', (
|
||||
expect(isValidUrl('localhost')).toEqual(false);
|
||||
expect(isValidUrl('0.0.0.0')).toEqual(false);
|
||||
|
||||
expect(isValidUrl('128.0.0.1')).toEqual(true);
|
||||
expect(isValidUrl('1.0.0.1')).toEqual(true);
|
||||
expect(isValidUrl('128.0.0.1')).toEqual(false);
|
||||
expect(isValidUrl('1.0.0.1')).toEqual(false);
|
||||
});
|
||||
|
||||
test('email link is a valid URL', () => {
|
||||
|
||||
@@ -1,75 +1,66 @@
|
||||
export const ALLOWED_SCHEMES = [
|
||||
// https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
|
||||
const ALLOWED_SCHEMES = new Set([
|
||||
'http',
|
||||
'https',
|
||||
'ftp',
|
||||
'sftp',
|
||||
'mailto',
|
||||
'tel',
|
||||
// may need support other schemes
|
||||
];
|
||||
// I guess you don't want to use the regex base the RFC 5322 Official Standard
|
||||
// For more detail see https://stackoverflow.com/questions/201323/how-can-i-validate-an-email-address-using-a-regular-expression/1917982#1917982
|
||||
const MAIL_REGEX =
|
||||
/^[a-zA-Z0-9.!#$%&’*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*$/;
|
||||
]);
|
||||
|
||||
// For more detail see https://stackoverflow.com/questions/8667070/javascript-regular-expression-to-validate-url
|
||||
const URL_REGEX = new RegExp(
|
||||
'^' +
|
||||
// protocol identifier (optional)
|
||||
// short syntax // still required
|
||||
'(?:(?:(?:https?|ftp):)?\\/\\/)' +
|
||||
// user:pass BasicAuth (optional)
|
||||
'(?:\\S+(?::\\S*)?@)?' +
|
||||
'(?:' +
|
||||
// IP address exclusion
|
||||
// private & local networks
|
||||
'(?!(?:10|127)(?:\\.\\d{1,3}){3})' +
|
||||
'(?!(?:169\\.254|192\\.168)(?:\\.\\d{1,3}){2})' +
|
||||
'(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})' +
|
||||
// IP address dotted notation octets
|
||||
// excludes loopback network 0.0.0.0
|
||||
// excludes reserved space >= 224.0.0.0
|
||||
// excludes network & broadcast addresses
|
||||
// (first & last IP address of each class)
|
||||
'(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])' +
|
||||
'(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}' +
|
||||
'(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))' +
|
||||
'|' +
|
||||
// host & domain names, may end with dot
|
||||
// can be replaced by a shortest alternative
|
||||
// (?![-_])(?:[-\\w\\u00a1-\\uffff]{0,63}[^-_]\\.)+
|
||||
'(?:' +
|
||||
'(?:' +
|
||||
'[a-z0-9\\u00a1-\\uffff]' +
|
||||
'[a-z0-9\\u00a1-\\uffff_-]{0,62}' +
|
||||
')?' +
|
||||
'[a-z0-9\\u00a1-\\uffff]\\.' +
|
||||
')+' +
|
||||
// TLD identifier name, may end with dot
|
||||
// Addition: We limit the TLD to 2-6 characters, because it can cover most of the cases.
|
||||
'(?:[a-z\\u00a1-\\uffff]{2,6}\\.?)' +
|
||||
')' +
|
||||
// port number (optional)
|
||||
'(?::\\d{2,5})?' +
|
||||
// resource path (optional)
|
||||
'(?:[/?#]\\S*)?' +
|
||||
'$',
|
||||
'i'
|
||||
);
|
||||
// https://publicsuffix.org/
|
||||
const TLD_REGEXP = /(?:\.[a-zA-Z]+)?(\.[a-zA-Z]{2,})$/;
|
||||
|
||||
export function normalizeUrl(url: string) {
|
||||
const includeScheme = ALLOWED_SCHEMES.find(scheme =>
|
||||
url.startsWith(scheme + ':')
|
||||
);
|
||||
if (includeScheme) {
|
||||
// Any link include schema is a valid url
|
||||
return url;
|
||||
const toURL = (str: string) => {
|
||||
try {
|
||||
if (!URL.canParse(str)) return null;
|
||||
|
||||
return new URL(str);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
const isEmail = MAIL_REGEX.test(url);
|
||||
if (isEmail) {
|
||||
return 'mailto:' + url;
|
||||
};
|
||||
|
||||
function resolveURL(str: string) {
|
||||
const url = toURL(str);
|
||||
if (!url) return null;
|
||||
|
||||
const protocol = url.protocol.substring(0, url.protocol.length - 1);
|
||||
const hostname = url.hostname;
|
||||
|
||||
let allowed = ALLOWED_SCHEMES.has(protocol);
|
||||
if (allowed && hostname.includes('.')) {
|
||||
allowed = TLD_REGEXP.test(hostname);
|
||||
}
|
||||
return 'http://' + url;
|
||||
|
||||
return { url, allowed };
|
||||
}
|
||||
|
||||
export function normalizeUrl(str: string) {
|
||||
str = str.trim();
|
||||
|
||||
let url = toURL(str);
|
||||
|
||||
if (!url) {
|
||||
const hasScheme = str.match(/^https?:\/\//);
|
||||
|
||||
if (!hasScheme) {
|
||||
const dotIdx = str.indexOf('.');
|
||||
if (dotIdx > 0 && dotIdx < str.length - 1) {
|
||||
url = toURL(`https://${str}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Formatted
|
||||
if (url) {
|
||||
if (!str.endsWith('/') && url.href.endsWith('/')) {
|
||||
return url.href.substring(0, url.href.length - 1);
|
||||
}
|
||||
return url.href;
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -78,20 +69,23 @@ export function normalizeUrl(url: string) {
|
||||
* For more detail see https://www.ietf.org/rfc/rfc1738.txt
|
||||
*/
|
||||
export function isValidUrl(str: string) {
|
||||
if (!str) {
|
||||
return false;
|
||||
}
|
||||
const url = normalizeUrl(str);
|
||||
if (url === str) {
|
||||
// Skip check if user input scheme manually
|
||||
try {
|
||||
new URL(url);
|
||||
} catch {
|
||||
return false;
|
||||
str = str.trim();
|
||||
|
||||
let result = resolveURL(str);
|
||||
|
||||
if (result && !result.allowed) return false;
|
||||
|
||||
if (!result) {
|
||||
const hasScheme = str.match(/^https?:\/\//);
|
||||
if (!hasScheme) {
|
||||
const dotIdx = str.indexOf('.');
|
||||
if (dotIdx > 0 && dotIdx < str.length - 1) {
|
||||
result = resolveURL(`https://${str}`);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return URL_REGEX.test(url);
|
||||
|
||||
return result?.allowed ?? false;
|
||||
}
|
||||
|
||||
// https://en.wikipedia.org/wiki/Top-level_domain
|
||||
@@ -119,10 +113,7 @@ const COMMON_TLDS = new Set([
|
||||
]);
|
||||
|
||||
function isCommonTLD(url: URL) {
|
||||
const tld = url.hostname.split('.').pop();
|
||||
if (!tld) {
|
||||
return false;
|
||||
}
|
||||
const tld = url.hostname.split('.').pop() ?? '';
|
||||
return COMMON_TLDS.has(tld);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user