mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-03-22 23:30:36 +08:00
242 lines
5.5 KiB
TypeScript
242 lines
5.5 KiB
TypeScript
// https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
|
|
const ALLOWED_SCHEMES = new Set([
|
|
'http',
|
|
'https',
|
|
'ftp',
|
|
'sftp',
|
|
'mailto',
|
|
'tel',
|
|
]);
|
|
|
|
// https://publicsuffix.org/
|
|
const TLD_REGEXP = /(?:\.[a-zA-Z]+)?(\.[a-zA-Z]{2,})$/;
|
|
|
|
const IPV4_ADDR_REGEXP =
|
|
/^(25[0-5]|2[0-4]\d|[01]?\d\d?)(\.(25[0-5]|2[0-4]\d|[01]?\d\d?)){3}$/;
|
|
|
|
const toURL = (str: string) => {
|
|
try {
|
|
if (!URL.canParse(str)) return null;
|
|
|
|
return new URL(str);
|
|
} catch {
|
|
return null;
|
|
}
|
|
};
|
|
|
|
const hasAllowedScheme = (url: URL) => {
|
|
const protocol = url.protocol.slice(0, -1).toLowerCase();
|
|
return ALLOWED_SCHEMES.has(protocol);
|
|
};
|
|
|
|
function resolveURL(str: string, baseUrl: string, padded = false) {
|
|
const url = toURL(str);
|
|
if (!url) return null;
|
|
|
|
const protocol = url.protocol.substring(0, url.protocol.length - 1);
|
|
const hostname = url.hostname;
|
|
const origin = url.origin;
|
|
|
|
let allowed = ALLOWED_SCHEMES.has(protocol);
|
|
if (allowed && hostname.includes('.')) {
|
|
allowed =
|
|
origin === baseUrl ||
|
|
TLD_REGEXP.test(hostname) ||
|
|
(padded ? false : IPV4_ADDR_REGEXP.test(hostname));
|
|
}
|
|
|
|
return { url, allowed };
|
|
}
|
|
|
|
export function normalizeUrl(str: string) {
|
|
str = str.trim();
|
|
|
|
let url = toURL(str);
|
|
|
|
if (!url) {
|
|
const hasScheme = str.match(/^https?:\/\//);
|
|
|
|
if (!hasScheme) {
|
|
const dotIdx = str.indexOf('.');
|
|
if (dotIdx > 0 && dotIdx < str.length - 1) {
|
|
url = toURL(`https://${str}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Formatted
|
|
if (url) {
|
|
if (!hasAllowedScheme(url)) return '';
|
|
if (!str.endsWith('/') && url.href.endsWith('/')) {
|
|
return url.href.substring(0, url.href.length - 1);
|
|
}
|
|
return url.href;
|
|
}
|
|
|
|
return str;
|
|
}
|
|
|
|
/**
|
|
* Assume user will input a url, we just need to check if it is valid.
|
|
*
|
|
* For more detail see https://www.ietf.org/rfc/rfc1738.txt
|
|
*/
|
|
export function isValidUrl(str: string, baseUrl = location.origin) {
|
|
str = str.trim();
|
|
|
|
let result = resolveURL(str, baseUrl);
|
|
|
|
if (result && !result.allowed) return false;
|
|
|
|
if (!result) {
|
|
const hasScheme = str.match(/^https?:\/\//);
|
|
if (!hasScheme) {
|
|
const dotIdx = str.indexOf('.');
|
|
if (dotIdx > 0 && dotIdx < str.length - 1) {
|
|
result = resolveURL(`https://${str}`, baseUrl, true);
|
|
}
|
|
}
|
|
}
|
|
|
|
return result?.allowed ?? false;
|
|
}
|
|
|
|
const URL_SCHEME_IN_TOKEN_REGEXP =
|
|
/(?:https?:\/\/|ftp:\/\/|sftp:\/\/|mailto:|tel:|www\.)/i;
|
|
|
|
const URL_LEADING_DELIMITER_REGEXP = /^[-([{<'"~]+/;
|
|
|
|
const URL_TRAILING_DELIMITER_REGEXP = /[)\]}>.,;:!?'"]+$/;
|
|
|
|
export type UrlTextSegment = {
|
|
text: string;
|
|
link?: string;
|
|
};
|
|
|
|
function appendUrlTextSegment(
|
|
segments: UrlTextSegment[],
|
|
segment: UrlTextSegment
|
|
) {
|
|
if (!segment.text) return;
|
|
const last = segments[segments.length - 1];
|
|
if (last && !last.link && !segment.link) {
|
|
last.text += segment.text;
|
|
return;
|
|
}
|
|
segments.push(segment);
|
|
}
|
|
|
|
function splitTokenByUrl(token: string, baseUrl: string): UrlTextSegment[] {
|
|
const schemeMatch = token.match(URL_SCHEME_IN_TOKEN_REGEXP);
|
|
const schemeIndex = schemeMatch?.index;
|
|
if (typeof schemeIndex === 'number' && schemeIndex > 0) {
|
|
return [
|
|
{ text: token.slice(0, schemeIndex) },
|
|
...splitTokenByUrl(token.slice(schemeIndex), baseUrl),
|
|
];
|
|
}
|
|
|
|
const leading = token.match(URL_LEADING_DELIMITER_REGEXP)?.[0] ?? '';
|
|
const withoutLeading = token.slice(leading.length);
|
|
const trailing =
|
|
withoutLeading.match(URL_TRAILING_DELIMITER_REGEXP)?.[0] ?? '';
|
|
const core = trailing
|
|
? withoutLeading.slice(0, withoutLeading.length - trailing.length)
|
|
: withoutLeading;
|
|
|
|
if (core && isValidUrl(core, baseUrl)) {
|
|
const segments: UrlTextSegment[] = [];
|
|
appendUrlTextSegment(segments, { text: leading });
|
|
appendUrlTextSegment(segments, { text: core, link: normalizeUrl(core) });
|
|
appendUrlTextSegment(segments, { text: trailing });
|
|
return segments;
|
|
}
|
|
|
|
return [{ text: token }];
|
|
}
|
|
|
|
/**
|
|
* Split plain text into mixed segments, where only URL segments carry link metadata.
|
|
* This is used by paste handlers so text like `example:https://google.com` keeps
|
|
* normal text while only URL parts are linkified.
|
|
*/
|
|
export function splitTextByUrl(text: string, baseUrl = location.origin) {
|
|
const chunks = text.match(/\s+|\S+/g);
|
|
if (!chunks) {
|
|
return [];
|
|
}
|
|
|
|
const segments: UrlTextSegment[] = [];
|
|
chunks.forEach(chunk => {
|
|
if (/^\s+$/.test(chunk)) {
|
|
appendUrlTextSegment(segments, { text: chunk });
|
|
return;
|
|
}
|
|
splitTokenByUrl(chunk, baseUrl).forEach(segment => {
|
|
appendUrlTextSegment(segments, segment);
|
|
});
|
|
});
|
|
return segments;
|
|
}
|
|
|
|
// https://en.wikipedia.org/wiki/Top-level_domain
|
|
const COMMON_TLDS = new Set([
|
|
'cat',
|
|
'co',
|
|
'com',
|
|
'de',
|
|
'dev',
|
|
'edu',
|
|
'eu',
|
|
'gov',
|
|
'info',
|
|
'io',
|
|
'jp',
|
|
'me',
|
|
'mil',
|
|
'moe',
|
|
'net',
|
|
'org',
|
|
'pro',
|
|
'ru',
|
|
'top',
|
|
'uk',
|
|
'xyz',
|
|
]);
|
|
|
|
function isCommonTLD(url: URL) {
|
|
const tld = url.hostname.split('.').pop() ?? '';
|
|
return COMMON_TLDS.has(tld);
|
|
}
|
|
|
|
/**
|
|
* Assuming the user will input anything, we need to check rigorously.
|
|
*/
|
|
export function isStrictUrl(str: string) {
|
|
try {
|
|
if (!isValidUrl(str)) {
|
|
return false;
|
|
}
|
|
|
|
const url = new URL(normalizeUrl(str));
|
|
|
|
return isCommonTLD(url);
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
export function isUrlInClipboard(clipboardData: DataTransfer) {
|
|
const url = clipboardData.getData('text/plain');
|
|
return isValidUrl(url);
|
|
}
|
|
|
|
export function getHostName(link: string) {
|
|
try {
|
|
const url = new URL(link);
|
|
return url.hostname || url.pathname;
|
|
} catch {
|
|
return link;
|
|
}
|
|
}
|