mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-12 04:18:54 +00:00
feat: replace electron to puppeteer (#2700)
Co-authored-by: himself65 <himself65@outlook.com>
This commit is contained in:
@@ -10,7 +10,7 @@ import { getGoogleOauthCode } from './google-auth';
|
||||
const handlers = require(join(
|
||||
process.env.PLUGIN_DIR ?? '../../plugins',
|
||||
'./bookmark-block/server'
|
||||
)) as NamespaceHandlers;
|
||||
)).default as NamespaceHandlers;
|
||||
|
||||
export const uiHandlers = {
|
||||
handleThemeChange: async (_, theme: (typeof nativeTheme)['themeSource']) => {
|
||||
|
||||
@@ -5,5 +5,5 @@ import { definePluginServerConfig } from './utils.mjs';
|
||||
|
||||
await build({
|
||||
...definePluginServerConfig('bookmark-block'),
|
||||
external: ['cheerio', 'electron'],
|
||||
external: ['cheerio', 'electron', 'puppeteer'],
|
||||
});
|
||||
|
||||
@@ -5,7 +5,7 @@ import { definePluginServerConfig } from './utils.mjs';
|
||||
|
||||
const plugin = await context({
|
||||
...definePluginServerConfig('bookmark-block'),
|
||||
external: ['cheerio', 'electron'],
|
||||
external: ['cheerio', 'electron', 'puppeteer'],
|
||||
});
|
||||
|
||||
await plugin.watch();
|
||||
|
||||
@@ -9,15 +9,13 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@toeverything/plugin-infra": "workspace:*",
|
||||
"cheerio": "^1.0.0-rc.12"
|
||||
"link-preview-js": "^3.0.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"electron": "=25.0.1",
|
||||
"react": "18.3.0-canary-16d053d59-20230506",
|
||||
"react-dom": "18.3.0-canary-16d053d59-20230506"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"electron": "*",
|
||||
"react": "*",
|
||||
"react-dom": "*"
|
||||
},
|
||||
|
||||
@@ -1,11 +1,56 @@
|
||||
import { getMetaData } from './server/get-meta-data';
|
||||
import { getLinkPreview } from 'link-preview-js';
|
||||
|
||||
type MetaData = {
|
||||
title?: string;
|
||||
description?: string;
|
||||
icon?: string;
|
||||
image?: string;
|
||||
[x: string]: string | string[] | undefined;
|
||||
};
|
||||
|
||||
export interface PreviewType {
|
||||
url: string;
|
||||
title: string;
|
||||
siteName: string | undefined;
|
||||
description: string | undefined;
|
||||
mediaType: string;
|
||||
contentType: string | undefined;
|
||||
images: string[];
|
||||
videos: {
|
||||
url: string | undefined;
|
||||
secureUrl: string | null | undefined;
|
||||
type: string | null | undefined;
|
||||
width: string | undefined;
|
||||
height: string | undefined;
|
||||
}[];
|
||||
favicons: string[];
|
||||
}
|
||||
|
||||
export default {
|
||||
getBookmarkDataByLink: async (_: unknown, url: string) => {
|
||||
return getMetaData(url, {
|
||||
shouldReGetHTML: metaData => {
|
||||
return !metaData.title && !metaData.description;
|
||||
getBookmarkDataByLink: async (_: unknown, url: string): Promise<MetaData> => {
|
||||
const previewData = (await getLinkPreview(url, {
|
||||
timeout: 6000,
|
||||
headers: {
|
||||
'user-agent': 'googlebot',
|
||||
},
|
||||
});
|
||||
followRedirects: 'follow',
|
||||
}).catch(() => {
|
||||
return {
|
||||
title: '',
|
||||
siteName: '',
|
||||
description: '',
|
||||
images: [],
|
||||
videos: [],
|
||||
contentType: `text/html`,
|
||||
favicons: [],
|
||||
};
|
||||
})) as PreviewType;
|
||||
|
||||
return {
|
||||
title: previewData.title,
|
||||
description: previewData.description,
|
||||
icon: previewData.favicons[0],
|
||||
image: previewData.images[0],
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
import { BrowserWindow } from 'electron';
|
||||
|
||||
import type { GetHTMLOptions } from './types';
|
||||
|
||||
async function getHTMLFromWindow(win: BrowserWindow): Promise<string> {
|
||||
return win.webContents
|
||||
.executeJavaScript(`document.documentElement.outerHTML;`)
|
||||
.then(html => html);
|
||||
}
|
||||
|
||||
// For normal web pages, obtaining html can be done directly,
|
||||
// but for some dynamic web pages, obtaining html should wait for the complete loading of web pages. shouldReGetHTML should be used to judge whether to obtain html again
|
||||
export async function getHTMLByURL(
|
||||
url: string,
|
||||
options: GetHTMLOptions
|
||||
): Promise<string> {
|
||||
return new Promise(resolve => {
|
||||
const { timeout = 10000, shouldReGetHTML } = options;
|
||||
const window = new BrowserWindow({
|
||||
show: false,
|
||||
});
|
||||
let html = '';
|
||||
window.loadURL(url);
|
||||
|
||||
const timer = setTimeout(() => {
|
||||
resolve(html);
|
||||
window.close();
|
||||
}, timeout);
|
||||
|
||||
async function loopHandle() {
|
||||
html = await getHTMLFromWindow(window);
|
||||
if (!shouldReGetHTML) {
|
||||
return html;
|
||||
}
|
||||
|
||||
if (await shouldReGetHTML(html)) {
|
||||
setTimeout(loopHandle, 1000);
|
||||
} else {
|
||||
window.close();
|
||||
clearTimeout(timer);
|
||||
resolve(html);
|
||||
}
|
||||
}
|
||||
|
||||
window.webContents.on('did-finish-load', async () => {
|
||||
loopHandle();
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -1,107 +0,0 @@
|
||||
import type { CheerioAPI, Element } from 'cheerio';
|
||||
import { load } from 'cheerio';
|
||||
|
||||
import type { Context, MetaData, Options, RuleSet } from './types';
|
||||
|
||||
export * from './types';
|
||||
|
||||
import { getHTMLByURL } from './get-html';
|
||||
import { metaDataRules } from './rules';
|
||||
import type { GetMetaDataOptions } from './types';
|
||||
|
||||
function runRule(ruleSet: RuleSet, $: CheerioAPI, context: Context) {
|
||||
let maxScore = 0;
|
||||
let value;
|
||||
|
||||
for (let currRule = 0; currRule < ruleSet.rules.length; currRule++) {
|
||||
const [query, handler] = ruleSet.rules[currRule];
|
||||
const elements = Array.from($(query));
|
||||
|
||||
if (elements.length) {
|
||||
for (const element of elements) {
|
||||
let score = ruleSet.rules.length - currRule;
|
||||
|
||||
if (ruleSet.scorer) {
|
||||
const newScore = ruleSet.scorer(element as Element, score);
|
||||
|
||||
if (newScore) {
|
||||
score = newScore;
|
||||
}
|
||||
}
|
||||
|
||||
if (score > maxScore) {
|
||||
maxScore = score;
|
||||
value = handler(element as Element);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (value) {
|
||||
if (ruleSet.processor) {
|
||||
value = ruleSet.processor(value, context);
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
if (ruleSet.defaultValue) {
|
||||
return ruleSet.defaultValue(context);
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
async function getMetaDataByHTML(
|
||||
html: string,
|
||||
url: string,
|
||||
options: GetMetaDataOptions
|
||||
) {
|
||||
const { customRules = {} } = options;
|
||||
const rules: Record<string, RuleSet> = { ...metaDataRules };
|
||||
Object.keys(customRules).forEach((key: string) => {
|
||||
rules[key] = {
|
||||
rules: [...metaDataRules[key].rules, ...customRules[key].rules],
|
||||
defaultValue:
|
||||
customRules[key].defaultValue || metaDataRules[key].defaultValue,
|
||||
processor: customRules[key].processor || metaDataRules[key].processor,
|
||||
};
|
||||
});
|
||||
|
||||
const metadata: MetaData = {};
|
||||
const context: Context = {
|
||||
url,
|
||||
...options,
|
||||
};
|
||||
|
||||
const $ = load(html);
|
||||
|
||||
Object.keys(rules).forEach((key: string) => {
|
||||
const ruleSet = rules[key];
|
||||
metadata[key] = runRule(ruleSet, $, context) || undefined;
|
||||
});
|
||||
|
||||
return metadata;
|
||||
}
|
||||
|
||||
export async function getMetaData(url: string, options: Options = {}) {
|
||||
const { customRules, forceImageHttps, shouldReGetHTML, ...other } = options;
|
||||
const html = await getHTMLByURL(url, {
|
||||
...other,
|
||||
shouldReGetHTML: async html => {
|
||||
const meta = await getMetaDataByHTML(html, url, {
|
||||
customRules,
|
||||
forceImageHttps,
|
||||
});
|
||||
return shouldReGetHTML ? await shouldReGetHTML(meta) : false;
|
||||
},
|
||||
}).catch(() => {
|
||||
// TODO: report error
|
||||
return '';
|
||||
});
|
||||
|
||||
return await getMetaDataByHTML(html, url, {
|
||||
customRules,
|
||||
forceImageHttps,
|
||||
});
|
||||
}
|
||||
@@ -1,690 +0,0 @@
|
||||
import type { RuleSet } from './types';
|
||||
import { getProvider, makeUrlAbsolute, makeUrlSecure, parseUrl } from './utils';
|
||||
|
||||
export const metaDataRules: Record<string, RuleSet> = {
|
||||
title: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="og:title"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['meta[name="og:title"][content]', element => element.attribs['content']],
|
||||
[
|
||||
'meta[property="twitter:title"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="twitter:title"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="parsely-title"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="parsely-title"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="sailthru.title"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="sailthru.title"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['title', (element: any) => element.text],
|
||||
],
|
||||
},
|
||||
description: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="og:description"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="og:description"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="description" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="description" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="sailthru.description"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="sailthru.description"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="twitter:description"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="twitter:description"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="summary" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="summary" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
],
|
||||
},
|
||||
language: {
|
||||
rules: [
|
||||
['html[lang]', element => element.attribs['lang']],
|
||||
[
|
||||
'meta[property="language" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="language" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="og:locale"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="og:locale"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
],
|
||||
processor: (language: any) => language.split('-')[0],
|
||||
},
|
||||
type: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="og:type"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['meta[name="og:type"][content]', element => element.attribs['content']],
|
||||
[
|
||||
'meta[property="parsely-type"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="parsely-type"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="medium"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['meta[name="medium"][content]', element => element.attribs['content']],
|
||||
],
|
||||
},
|
||||
url: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="og:url"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['meta[name="og:url"][content]', element => element.attribs['content']],
|
||||
[
|
||||
'meta[property="al:web:url"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="al:web:url"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="parsely-link"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="parsely-link"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['a.amp-canurl', element => element.attribs['href']],
|
||||
['link[rel="canonical"][href]', element => element.attribs['href']],
|
||||
],
|
||||
defaultValue: context => context.url,
|
||||
processor: (url: any, context) => makeUrlAbsolute(context.url, url),
|
||||
},
|
||||
provider: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="og:site_name"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="og:site_name"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="publisher" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="publisher" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="application-name" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="application-name" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="al:android:app_name"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="al:android:app_name"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="al:iphone:app_name"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="al:iphone:app_name"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="al:ipad:app_name"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="al:ipad:app_name"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="al:ios:app_name"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="al:ios:app_name"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="twitter:app:name:iphone"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="twitter:app:name:iphone"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="twitter:app:name:ipad"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="twitter:app:name:ipad"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="twitter:app:name:googleplay"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="twitter:app:name:googleplay"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
],
|
||||
defaultValue: context => getProvider(parseUrl(context.url)),
|
||||
},
|
||||
keywords: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="keywords" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="keywords" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="parsely-tags"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="parsely-tags"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="sailthru.tags"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="sailthru.tags"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="article:tag" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="article:tag" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="book:tag" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="book:tag" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="topic" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['meta[name="topic" i][content]', element => element.attribs['content']],
|
||||
],
|
||||
processor: (keywords: any) =>
|
||||
keywords.split(',').map((keyword: string) => keyword.trim()),
|
||||
},
|
||||
section: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="article:section"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="article:section"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="category"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['meta[name="category"][content]', element => element.attribs['content']],
|
||||
],
|
||||
},
|
||||
author: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="author" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['meta[name="author" i][content]', element => element.attribs['content']],
|
||||
[
|
||||
'meta[property="article:author"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="article:author"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="book:author"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="book:author"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="parsely-author"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="parsely-author"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="sailthru.author"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="sailthru.author"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['a[class*="author" i]', (element: any) => element.text],
|
||||
['[rel="author"]', (element: any) => element.text],
|
||||
[
|
||||
'meta[property="twitter:creator"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="twitter:creator"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="profile:username"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="profile:username"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
],
|
||||
},
|
||||
published: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="article:published_time"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="article:published_time"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="published_time"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="published_time"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="parsely-pub-date"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="parsely-pub-date"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="sailthru.date"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="sailthru.date"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="date" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['meta[name="date" i][content]', element => element.attribs['content']],
|
||||
[
|
||||
'meta[property="release_date" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="release_date" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['time[datetime]', element => element.attribs['datetime']],
|
||||
['time[datetime][pubdate]', element => element.attribs['datetime']],
|
||||
],
|
||||
processor: (value: any) =>
|
||||
Date.parse(value.toString())
|
||||
? new Date(value.toString()).toISOString()
|
||||
: undefined,
|
||||
},
|
||||
modified: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="og:updated_time"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="og:updated_time"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="article:modified_time"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="article:modified_time"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="updated_time" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="updated_time" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="modified_time"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="modified_time"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="revised"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['meta[name="revised"][content]', element => element.attribs['content']],
|
||||
],
|
||||
processor: (value: any) =>
|
||||
Date.parse(value.toString())
|
||||
? new Date(value.toString()).toISOString()
|
||||
: undefined,
|
||||
},
|
||||
robots: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="robots" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['meta[name="robots" i][content]', element => element.attribs['content']],
|
||||
],
|
||||
processor: (keywords: any) =>
|
||||
keywords.split(',').map((keyword: string) => keyword.trim()),
|
||||
},
|
||||
copyright: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="copyright" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="copyright" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
],
|
||||
},
|
||||
email: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="email" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['meta[name="email" i][content]', element => element.attribs['content']],
|
||||
[
|
||||
'meta[property="reply-to" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="reply-to" i][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
],
|
||||
},
|
||||
twitter: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="twitter:site"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="twitter:site"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
],
|
||||
},
|
||||
facebook: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="fb:pages"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['meta[name="fb:pages"][content]', element => element.attribs['content']],
|
||||
],
|
||||
},
|
||||
image: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="og:image:secure_url"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="og:image:secure_url"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="og:image:url"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="og:image:url"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="og:image"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['meta[name="og:image"][content]', element => element.attribs['content']],
|
||||
[
|
||||
'meta[property="twitter:image"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="twitter:image"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="twitter:image:src"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="twitter:image:src"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="thumbnail"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="thumbnail"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="parsely-image-url"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="parsely-image-url"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="sailthru.image.full"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="sailthru.image.full"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
],
|
||||
processor: (imageUrl: any, context) =>
|
||||
context.forceImageHttps === true
|
||||
? makeUrlSecure(makeUrlAbsolute(context.url, imageUrl))
|
||||
: makeUrlAbsolute(context.url, imageUrl),
|
||||
},
|
||||
icon: {
|
||||
rules: [
|
||||
[
|
||||
'link[rel="apple-touch-icon"][href]',
|
||||
element => element.attribs['href'],
|
||||
],
|
||||
[
|
||||
'link[rel="apple-touch-icon-precomposed"][href]',
|
||||
element => element.attribs['href'],
|
||||
],
|
||||
['link[rel="icon" i][href]', element => element.attribs['href']],
|
||||
['link[rel="fluid-icon"][href]', element => element.attribs['href']],
|
||||
['link[rel="shortcut icon"][href]', element => element.attribs['href']],
|
||||
['link[rel="Shortcut Icon"][href]', element => element.attribs['href']],
|
||||
['link[rel="mask-icon"][href]', element => element.attribs['href']],
|
||||
],
|
||||
scorer: element => {
|
||||
const sizes = element.attribs['sizes'];
|
||||
if (sizes) {
|
||||
const sizeMatches = sizes.match(/\d+/g);
|
||||
if (sizeMatches) {
|
||||
const parsed = parseInt(sizeMatches[0]);
|
||||
if (!isNaN(parsed)) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
defaultValue: context => makeUrlAbsolute(context.url, '/favicon.ico'),
|
||||
processor: (iconUrl, context) =>
|
||||
context.forceImageHttps === true
|
||||
? makeUrlSecure(makeUrlAbsolute(context.url, iconUrl))
|
||||
: makeUrlAbsolute(context.url, iconUrl),
|
||||
},
|
||||
video: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="og:video:secure_url"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="og:video:secure_url"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="og:video:url"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="og:video:url"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="og:video"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['meta[name="og:video"][content]', element => element.attribs['content']],
|
||||
],
|
||||
processor: (imageUrl: any, context) =>
|
||||
context.forceImageHttps === true
|
||||
? makeUrlSecure(makeUrlAbsolute(context.url, imageUrl))
|
||||
: makeUrlAbsolute(context.url, imageUrl),
|
||||
},
|
||||
audio: {
|
||||
rules: [
|
||||
[
|
||||
'meta[property="og:audio:secure_url"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="og:audio:secure_url"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="og:audio:url"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[name="og:audio:url"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
[
|
||||
'meta[property="og:audio"][content]',
|
||||
element => element.attribs['content'],
|
||||
],
|
||||
['meta[name="og:audio"][content]', element => element.attribs['content']],
|
||||
],
|
||||
processor: (imageUrl: any, context) =>
|
||||
context.forceImageHttps === true
|
||||
? makeUrlSecure(makeUrlAbsolute(context.url, imageUrl))
|
||||
: makeUrlAbsolute(context.url, imageUrl),
|
||||
},
|
||||
};
|
||||
@@ -1,43 +0,0 @@
|
||||
import type { Element } from 'cheerio';
|
||||
|
||||
export type MetaData = {
|
||||
title?: string;
|
||||
description?: string;
|
||||
icon?: string;
|
||||
image?: string;
|
||||
keywords?: string[];
|
||||
language?: string;
|
||||
type?: string;
|
||||
url?: string;
|
||||
provider?: string;
|
||||
|
||||
[x: string]: string | string[] | undefined;
|
||||
};
|
||||
|
||||
export type MetadataRule = [string, (el: Element) => string | null];
|
||||
|
||||
export type Context = {
|
||||
url: string;
|
||||
} & GetMetaDataOptions;
|
||||
|
||||
export type RuleSet = {
|
||||
rules: MetadataRule[];
|
||||
defaultValue?: (context: Context) => string | string[];
|
||||
scorer?: (el: Element, score: any) => any;
|
||||
processor?: (input: any, context: Context) => any;
|
||||
};
|
||||
|
||||
export type GetMetaDataOptions = {
|
||||
customRules?: Record<string, RuleSet>;
|
||||
forceImageHttps?: boolean;
|
||||
};
|
||||
|
||||
export type GetHTMLOptions = {
|
||||
timeout?: number;
|
||||
shouldReGetHTML?: (currentHTML: string) => boolean | Promise<boolean>;
|
||||
};
|
||||
|
||||
export type Options = {
|
||||
shouldReGetHTML?: (metaData: MetaData) => boolean | Promise<boolean>;
|
||||
} & GetMetaDataOptions &
|
||||
Omit<GetHTMLOptions, 'shouldReGetHTML'>;
|
||||
@@ -1,28 +0,0 @@
|
||||
import { parse, resolve } from 'node:url';
|
||||
|
||||
export function makeUrlAbsolute(base: string, relative: string): string {
|
||||
const relativeParsed = parse(relative);
|
||||
|
||||
if (relativeParsed.host === null) {
|
||||
return resolve(base, relative);
|
||||
}
|
||||
|
||||
return relative;
|
||||
}
|
||||
|
||||
export function makeUrlSecure(url: string): string {
|
||||
return url.replace(/^http:/, 'https:');
|
||||
}
|
||||
|
||||
export function parseUrl(url: string): string {
|
||||
return parse(url).hostname || '';
|
||||
}
|
||||
|
||||
export function getProvider(host: string): string {
|
||||
return host
|
||||
.replace(/www[a-zA-Z0-9]*\./, '')
|
||||
.replace('.co.', '.')
|
||||
.split('.')
|
||||
.slice(0, -1)
|
||||
.join(' ');
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
import { build } from 'esbuild';
|
||||
|
||||
import { definePluginServerConfig } from './utils.mjs';
|
||||
|
||||
await build({
|
||||
...definePluginServerConfig('bookmark-block'),
|
||||
external: ['cheerio', 'electron'],
|
||||
});
|
||||
@@ -1,11 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
import { context } from 'esbuild';
|
||||
|
||||
import { definePluginServerConfig } from './utils.mjs';
|
||||
|
||||
const plugin = await context({
|
||||
...definePluginServerConfig('bookmark-block'),
|
||||
external: ['cheerio', 'electron'],
|
||||
});
|
||||
|
||||
await plugin.watch();
|
||||
@@ -1,29 +0,0 @@
|
||||
import { resolve } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
export const rootDir = fileURLToPath(new URL('../..', import.meta.url));
|
||||
export const electronOutputDir = resolve(
|
||||
rootDir,
|
||||
'apps',
|
||||
'electron',
|
||||
'dist',
|
||||
'plugins'
|
||||
);
|
||||
export const pluginDir = resolve(rootDir, 'plugins');
|
||||
|
||||
/**
|
||||
*
|
||||
* @param pluginDirName {string}
|
||||
* @return {import('esbuild').BuildOptions}
|
||||
*/
|
||||
export function definePluginServerConfig(pluginDirName) {
|
||||
const pluginRootDir = resolve(pluginDir, pluginDirName);
|
||||
const serverEntryFile = resolve(pluginRootDir, 'src/server.ts');
|
||||
const serverOutputDir = resolve(electronOutputDir, pluginDirName);
|
||||
return {
|
||||
entryPoints: [serverEntryFile],
|
||||
platform: 'node',
|
||||
outdir: serverOutputDir,
|
||||
bundle: true,
|
||||
};
|
||||
}
|
||||
@@ -2,5 +2,5 @@ import { beforeAll } from 'vitest';
|
||||
|
||||
beforeAll(async () => {
|
||||
console.log('Build plugins');
|
||||
await import('../esbuild/build-plugins.mjs');
|
||||
await import('../../apps/electron/scripts/plugins/build-plugins.mjs');
|
||||
});
|
||||
|
||||
81
yarn.lock
81
yarn.lock
@@ -35,12 +35,10 @@ __metadata:
|
||||
resolution: "@affine/bookmark-block@workspace:plugins/bookmark-block"
|
||||
dependencies:
|
||||
"@toeverything/plugin-infra": "workspace:*"
|
||||
cheerio: ^1.0.0-rc.12
|
||||
electron: =25.0.1
|
||||
link-preview-js: ^3.0.4
|
||||
react: 18.3.0-canary-16d053d59-20230506
|
||||
react-dom: 18.3.0-canary-16d053d59-20230506
|
||||
peerDependencies:
|
||||
electron: "*"
|
||||
react: "*"
|
||||
react-dom: "*"
|
||||
languageName: unknown
|
||||
@@ -10814,6 +10812,15 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"abort-controller@npm:^3.0.0":
|
||||
version: 3.0.0
|
||||
resolution: "abort-controller@npm:3.0.0"
|
||||
dependencies:
|
||||
event-target-shim: ^5.0.0
|
||||
checksum: 170bdba9b47b7e65906a28c8ce4f38a7a369d78e2271706f020849c1bfe0ee2067d4261df8bbb66eb84f79208fd5b710df759d64191db58cfba7ce8ef9c54b75
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"accepts@npm:~1.3.5, accepts@npm:~1.3.8":
|
||||
version: 1.3.8
|
||||
resolution: "accepts@npm:1.3.8"
|
||||
@@ -12494,6 +12501,22 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"cheerio@npm:1.0.0-rc.11":
|
||||
version: 1.0.0-rc.11
|
||||
resolution: "cheerio@npm:1.0.0-rc.11"
|
||||
dependencies:
|
||||
cheerio-select: ^2.1.0
|
||||
dom-serializer: ^2.0.0
|
||||
domhandler: ^5.0.3
|
||||
domutils: ^3.0.1
|
||||
htmlparser2: ^8.0.1
|
||||
parse5: ^7.0.0
|
||||
parse5-htmlparser2-tree-adapter: ^7.0.0
|
||||
tslib: ^2.4.0
|
||||
checksum: 7619edcbecafb70ca6ca842ce149307a84e8d451432a888d82959b2aa04e2090701658f25eac75821e0832cc1305bdbcf02f17175102fc1723f119f3c9ece17a
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"cheerio@npm:^1.0.0-rc.12":
|
||||
version: 1.0.0-rc.12
|
||||
resolution: "cheerio@npm:1.0.0-rc.12"
|
||||
@@ -13289,6 +13312,15 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"cross-fetch@npm:3.1.5":
|
||||
version: 3.1.5
|
||||
resolution: "cross-fetch@npm:3.1.5"
|
||||
dependencies:
|
||||
node-fetch: 2.6.7
|
||||
checksum: f6b8c6ee3ef993ace6277fd789c71b6acf1b504fd5f5c7128df4ef2f125a429e29cd62dc8c127523f04a5f2fa4771ed80e3f3d9695617f441425045f505cf3bb
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"cross-fetch@npm:^3.0.6, cross-fetch@npm:^3.1.5":
|
||||
version: 3.1.6
|
||||
resolution: "cross-fetch@npm:3.1.6"
|
||||
@@ -15415,6 +15447,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"event-target-shim@npm:^5.0.0":
|
||||
version: 5.0.1
|
||||
resolution: "event-target-shim@npm:5.0.1"
|
||||
checksum: 1ffe3bb22a6d51bdeb6bf6f7cf97d2ff4a74b017ad12284cc9e6a279e727dc30a5de6bb613e5596ff4dc3e517841339ad09a7eec44266eccb1aa201a30448166
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"eventemitter3@npm:^3.1.0":
|
||||
version: 3.1.2
|
||||
resolution: "eventemitter3@npm:3.1.2"
|
||||
@@ -19911,6 +19950,18 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"link-preview-js@npm:^3.0.4":
|
||||
version: 3.0.4
|
||||
resolution: "link-preview-js@npm:3.0.4"
|
||||
dependencies:
|
||||
abort-controller: ^3.0.0
|
||||
cheerio: 1.0.0-rc.11
|
||||
cross-fetch: 3.1.5
|
||||
url: 0.11.0
|
||||
checksum: 8d3c0c8bc3b0b5a7b95586f4e98e1013ccfc8fbce3405fffb543ecb10a6d3abb66742f1845e366ca3c1ef80725619aa4b034d97937f4e152e63d11f8676bab9a
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"lint-staged@npm:^13.2.2":
|
||||
version: 13.2.2
|
||||
resolution: "lint-staged@npm:13.2.2"
|
||||
@@ -23120,6 +23171,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"punycode@npm:1.3.2":
|
||||
version: 1.3.2
|
||||
resolution: "punycode@npm:1.3.2"
|
||||
checksum: b8807fd594b1db33335692d1f03e8beeddde6fda7fbb4a2e32925d88d20a3aa4cd8dcc0c109ccaccbd2ba761c208dfaaada83007087ea8bfb0129c9ef1b99ed6
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"punycode@npm:^1.3.2":
|
||||
version: 1.4.1
|
||||
resolution: "punycode@npm:1.4.1"
|
||||
@@ -23205,6 +23263,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"querystring@npm:0.2.0":
|
||||
version: 0.2.0
|
||||
resolution: "querystring@npm:0.2.0"
|
||||
checksum: 8258d6734f19be27e93f601758858c299bdebe71147909e367101ba459b95446fbe5b975bf9beb76390156a592b6f4ac3a68b6087cea165c259705b8b4e56a69
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"queue-microtask@npm:^1.2.2, queue-microtask@npm:^1.2.3":
|
||||
version: 1.2.3
|
||||
resolution: "queue-microtask@npm:1.2.3"
|
||||
@@ -26805,6 +26870,16 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"url@npm:0.11.0":
|
||||
version: 0.11.0
|
||||
resolution: "url@npm:0.11.0"
|
||||
dependencies:
|
||||
punycode: 1.3.2
|
||||
querystring: 0.2.0
|
||||
checksum: 50d100d3dd2d98b9fe3ada48cadb0b08aa6be6d3ac64112b867b56b19be4bfcba03c2a9a0d7922bfd7ac17d4834e88537749fe182430dfd9b68e520175900d90
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"urlpattern-polyfill@npm:^8.0.0":
|
||||
version: 8.0.2
|
||||
resolution: "urlpattern-polyfill@npm:8.0.2"
|
||||
|
||||
Reference in New Issue
Block a user