From 336ff5220ffc03b1569113dd1cd4a797420560b2 Mon Sep 17 00:00:00 2001 From: Qi <474021214@qq.com> Date: Thu, 8 Jun 2023 17:51:45 +0800 Subject: [PATCH] feat: replace electron to puppeteer (#2700) Co-authored-by: himself65 (cherry picked from commit fda89b05e7b48b476d53dbcaea95f1868658e82a) --- apps/electron/layers/main/src/ui/index.ts | 2 +- .../scripts/plugins/build-plugins.mjs | 2 +- apps/electron/scripts/plugins/dev-plugins.mjs | 2 +- plugins/bookmark-block/package.json | 4 +- plugins/bookmark-block/src/server.ts | 57 +- .../src/server/get-meta-data/get-html.ts | 49 -- .../src/server/get-meta-data/index.ts | 107 --- .../src/server/get-meta-data/rules.ts | 690 ------------------ .../src/server/get-meta-data/types.ts | 43 -- .../src/server/get-meta-data/utils.ts | 28 - scripts/esbuild/build-plugins.mjs | 9 - scripts/esbuild/dev-plugins.mjs | 11 - scripts/esbuild/utils.mjs | 29 - scripts/setup/build-plugins.ts | 2 +- yarn.lock | 81 +- 15 files changed, 134 insertions(+), 982 deletions(-) delete mode 100644 plugins/bookmark-block/src/server/get-meta-data/get-html.ts delete mode 100644 plugins/bookmark-block/src/server/get-meta-data/index.ts delete mode 100644 plugins/bookmark-block/src/server/get-meta-data/rules.ts delete mode 100644 plugins/bookmark-block/src/server/get-meta-data/types.ts delete mode 100644 plugins/bookmark-block/src/server/get-meta-data/utils.ts delete mode 100755 scripts/esbuild/build-plugins.mjs delete mode 100755 scripts/esbuild/dev-plugins.mjs delete mode 100644 scripts/esbuild/utils.mjs diff --git a/apps/electron/layers/main/src/ui/index.ts b/apps/electron/layers/main/src/ui/index.ts index 261803e131..a275c52320 100644 --- a/apps/electron/layers/main/src/ui/index.ts +++ b/apps/electron/layers/main/src/ui/index.ts @@ -10,7 +10,7 @@ import { getGoogleOauthCode } from './google-auth'; const handlers = require(join( process.env.PLUGIN_DIR ?? '../../plugins', './bookmark-block/server' -)) as NamespaceHandlers; +)).default as NamespaceHandlers; export const uiHandlers = { handleThemeChange: async (_, theme: (typeof nativeTheme)['themeSource']) => { diff --git a/apps/electron/scripts/plugins/build-plugins.mjs b/apps/electron/scripts/plugins/build-plugins.mjs index 6469f57707..5a331d7a0a 100755 --- a/apps/electron/scripts/plugins/build-plugins.mjs +++ b/apps/electron/scripts/plugins/build-plugins.mjs @@ -5,5 +5,5 @@ import { definePluginServerConfig } from './utils.mjs'; await build({ ...definePluginServerConfig('bookmark-block'), - external: ['cheerio', 'electron'], + external: ['cheerio', 'electron', 'puppeteer'], }); diff --git a/apps/electron/scripts/plugins/dev-plugins.mjs b/apps/electron/scripts/plugins/dev-plugins.mjs index 37a6a06af0..2b158636b3 100755 --- a/apps/electron/scripts/plugins/dev-plugins.mjs +++ b/apps/electron/scripts/plugins/dev-plugins.mjs @@ -5,7 +5,7 @@ import { definePluginServerConfig } from './utils.mjs'; const plugin = await context({ ...definePluginServerConfig('bookmark-block'), - external: ['cheerio', 'electron'], + external: ['cheerio', 'electron', 'puppeteer'], }); await plugin.watch(); diff --git a/plugins/bookmark-block/package.json b/plugins/bookmark-block/package.json index 14d1df6048..d669db325e 100644 --- a/plugins/bookmark-block/package.json +++ b/plugins/bookmark-block/package.json @@ -9,15 +9,13 @@ }, "dependencies": { "@toeverything/plugin-infra": "workspace:*", - "cheerio": "^1.0.0-rc.12" + "link-preview-js": "^3.0.4" }, "devDependencies": { - "electron": "=25.0.1", "react": "18.3.0-canary-16d053d59-20230506", "react-dom": "18.3.0-canary-16d053d59-20230506" }, "peerDependencies": { - "electron": "*", "react": "*", "react-dom": "*" }, diff --git a/plugins/bookmark-block/src/server.ts b/plugins/bookmark-block/src/server.ts index 33fa8bd6f9..3c1e6418af 100644 --- a/plugins/bookmark-block/src/server.ts +++ b/plugins/bookmark-block/src/server.ts @@ -1,11 +1,56 @@ -import { getMetaData } from './server/get-meta-data'; +import { getLinkPreview } from 'link-preview-js'; + +type MetaData = { + title?: string; + description?: string; + icon?: string; + image?: string; + [x: string]: string | string[] | undefined; +}; + +export interface PreviewType { + url: string; + title: string; + siteName: string | undefined; + description: string | undefined; + mediaType: string; + contentType: string | undefined; + images: string[]; + videos: { + url: string | undefined; + secureUrl: string | null | undefined; + type: string | null | undefined; + width: string | undefined; + height: string | undefined; + }[]; + favicons: string[]; +} export default { - getBookmarkDataByLink: async (_: unknown, url: string) => { - return getMetaData(url, { - shouldReGetHTML: metaData => { - return !metaData.title && !metaData.description; + getBookmarkDataByLink: async (_: unknown, url: string): Promise => { + const previewData = (await getLinkPreview(url, { + timeout: 6000, + headers: { + 'user-agent': 'googlebot', }, - }); + followRedirects: 'follow', + }).catch(() => { + return { + title: '', + siteName: '', + description: '', + images: [], + videos: [], + contentType: `text/html`, + favicons: [], + }; + })) as PreviewType; + + return { + title: previewData.title, + description: previewData.description, + icon: previewData.favicons[0], + image: previewData.images[0], + }; }, }; diff --git a/plugins/bookmark-block/src/server/get-meta-data/get-html.ts b/plugins/bookmark-block/src/server/get-meta-data/get-html.ts deleted file mode 100644 index 5ff9134f21..0000000000 --- a/plugins/bookmark-block/src/server/get-meta-data/get-html.ts +++ /dev/null @@ -1,49 +0,0 @@ -import { BrowserWindow } from 'electron'; - -import type { GetHTMLOptions } from './types'; - -async function getHTMLFromWindow(win: BrowserWindow): Promise { - return win.webContents - .executeJavaScript(`document.documentElement.outerHTML;`) - .then(html => html); -} - -// For normal web pages, obtaining html can be done directly, -// but for some dynamic web pages, obtaining html should wait for the complete loading of web pages. shouldReGetHTML should be used to judge whether to obtain html again -export async function getHTMLByURL( - url: string, - options: GetHTMLOptions -): Promise { - return new Promise(resolve => { - const { timeout = 10000, shouldReGetHTML } = options; - const window = new BrowserWindow({ - show: false, - }); - let html = ''; - window.loadURL(url); - - const timer = setTimeout(() => { - resolve(html); - window.close(); - }, timeout); - - async function loopHandle() { - html = await getHTMLFromWindow(window); - if (!shouldReGetHTML) { - return html; - } - - if (await shouldReGetHTML(html)) { - setTimeout(loopHandle, 1000); - } else { - window.close(); - clearTimeout(timer); - resolve(html); - } - } - - window.webContents.on('did-finish-load', async () => { - loopHandle(); - }); - }); -} diff --git a/plugins/bookmark-block/src/server/get-meta-data/index.ts b/plugins/bookmark-block/src/server/get-meta-data/index.ts deleted file mode 100644 index 6aebb2ea62..0000000000 --- a/plugins/bookmark-block/src/server/get-meta-data/index.ts +++ /dev/null @@ -1,107 +0,0 @@ -import type { CheerioAPI, Element } from 'cheerio'; -import { load } from 'cheerio'; - -import type { Context, MetaData, Options, RuleSet } from './types'; - -export * from './types'; - -import { getHTMLByURL } from './get-html'; -import { metaDataRules } from './rules'; -import type { GetMetaDataOptions } from './types'; - -function runRule(ruleSet: RuleSet, $: CheerioAPI, context: Context) { - let maxScore = 0; - let value; - - for (let currRule = 0; currRule < ruleSet.rules.length; currRule++) { - const [query, handler] = ruleSet.rules[currRule]; - const elements = Array.from($(query)); - - if (elements.length) { - for (const element of elements) { - let score = ruleSet.rules.length - currRule; - - if (ruleSet.scorer) { - const newScore = ruleSet.scorer(element as Element, score); - - if (newScore) { - score = newScore; - } - } - - if (score > maxScore) { - maxScore = score; - value = handler(element as Element); - } - } - } - } - - if (value) { - if (ruleSet.processor) { - value = ruleSet.processor(value, context); - } - - return value; - } - - if (ruleSet.defaultValue) { - return ruleSet.defaultValue(context); - } - - return undefined; -} - -async function getMetaDataByHTML( - html: string, - url: string, - options: GetMetaDataOptions -) { - const { customRules = {} } = options; - const rules: Record = { ...metaDataRules }; - Object.keys(customRules).forEach((key: string) => { - rules[key] = { - rules: [...metaDataRules[key].rules, ...customRules[key].rules], - defaultValue: - customRules[key].defaultValue || metaDataRules[key].defaultValue, - processor: customRules[key].processor || metaDataRules[key].processor, - }; - }); - - const metadata: MetaData = {}; - const context: Context = { - url, - ...options, - }; - - const $ = load(html); - - Object.keys(rules).forEach((key: string) => { - const ruleSet = rules[key]; - metadata[key] = runRule(ruleSet, $, context) || undefined; - }); - - return metadata; -} - -export async function getMetaData(url: string, options: Options = {}) { - const { customRules, forceImageHttps, shouldReGetHTML, ...other } = options; - const html = await getHTMLByURL(url, { - ...other, - shouldReGetHTML: async html => { - const meta = await getMetaDataByHTML(html, url, { - customRules, - forceImageHttps, - }); - return shouldReGetHTML ? await shouldReGetHTML(meta) : false; - }, - }).catch(() => { - // TODO: report error - return ''; - }); - - return await getMetaDataByHTML(html, url, { - customRules, - forceImageHttps, - }); -} diff --git a/plugins/bookmark-block/src/server/get-meta-data/rules.ts b/plugins/bookmark-block/src/server/get-meta-data/rules.ts deleted file mode 100644 index 9f57ac11f3..0000000000 --- a/plugins/bookmark-block/src/server/get-meta-data/rules.ts +++ /dev/null @@ -1,690 +0,0 @@ -import type { RuleSet } from './types'; -import { getProvider, makeUrlAbsolute, makeUrlSecure, parseUrl } from './utils'; - -export const metaDataRules: Record = { - title: { - rules: [ - [ - 'meta[property="og:title"][content]', - element => element.attribs['content'], - ], - ['meta[name="og:title"][content]', element => element.attribs['content']], - [ - 'meta[property="twitter:title"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="twitter:title"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="parsely-title"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="parsely-title"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="sailthru.title"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="sailthru.title"][content]', - element => element.attribs['content'], - ], - ['title', (element: any) => element.text], - ], - }, - description: { - rules: [ - [ - 'meta[property="og:description"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="og:description"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="description" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="description" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="sailthru.description"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="sailthru.description"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="twitter:description"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="twitter:description"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="summary" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="summary" i][content]', - element => element.attribs['content'], - ], - ], - }, - language: { - rules: [ - ['html[lang]', element => element.attribs['lang']], - [ - 'meta[property="language" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="language" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="og:locale"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="og:locale"][content]', - element => element.attribs['content'], - ], - ], - processor: (language: any) => language.split('-')[0], - }, - type: { - rules: [ - [ - 'meta[property="og:type"][content]', - element => element.attribs['content'], - ], - ['meta[name="og:type"][content]', element => element.attribs['content']], - [ - 'meta[property="parsely-type"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="parsely-type"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="medium"][content]', - element => element.attribs['content'], - ], - ['meta[name="medium"][content]', element => element.attribs['content']], - ], - }, - url: { - rules: [ - [ - 'meta[property="og:url"][content]', - element => element.attribs['content'], - ], - ['meta[name="og:url"][content]', element => element.attribs['content']], - [ - 'meta[property="al:web:url"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="al:web:url"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="parsely-link"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="parsely-link"][content]', - element => element.attribs['content'], - ], - ['a.amp-canurl', element => element.attribs['href']], - ['link[rel="canonical"][href]', element => element.attribs['href']], - ], - defaultValue: context => context.url, - processor: (url: any, context) => makeUrlAbsolute(context.url, url), - }, - provider: { - rules: [ - [ - 'meta[property="og:site_name"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="og:site_name"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="publisher" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="publisher" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="application-name" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="application-name" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="al:android:app_name"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="al:android:app_name"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="al:iphone:app_name"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="al:iphone:app_name"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="al:ipad:app_name"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="al:ipad:app_name"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="al:ios:app_name"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="al:ios:app_name"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="twitter:app:name:iphone"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="twitter:app:name:iphone"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="twitter:app:name:ipad"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="twitter:app:name:ipad"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="twitter:app:name:googleplay"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="twitter:app:name:googleplay"][content]', - element => element.attribs['content'], - ], - ], - defaultValue: context => getProvider(parseUrl(context.url)), - }, - keywords: { - rules: [ - [ - 'meta[property="keywords" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="keywords" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="parsely-tags"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="parsely-tags"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="sailthru.tags"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="sailthru.tags"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="article:tag" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="article:tag" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="book:tag" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="book:tag" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="topic" i][content]', - element => element.attribs['content'], - ], - ['meta[name="topic" i][content]', element => element.attribs['content']], - ], - processor: (keywords: any) => - keywords.split(',').map((keyword: string) => keyword.trim()), - }, - section: { - rules: [ - [ - 'meta[property="article:section"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="article:section"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="category"][content]', - element => element.attribs['content'], - ], - ['meta[name="category"][content]', element => element.attribs['content']], - ], - }, - author: { - rules: [ - [ - 'meta[property="author" i][content]', - element => element.attribs['content'], - ], - ['meta[name="author" i][content]', element => element.attribs['content']], - [ - 'meta[property="article:author"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="article:author"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="book:author"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="book:author"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="parsely-author"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="parsely-author"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="sailthru.author"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="sailthru.author"][content]', - element => element.attribs['content'], - ], - ['a[class*="author" i]', (element: any) => element.text], - ['[rel="author"]', (element: any) => element.text], - [ - 'meta[property="twitter:creator"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="twitter:creator"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="profile:username"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="profile:username"][content]', - element => element.attribs['content'], - ], - ], - }, - published: { - rules: [ - [ - 'meta[property="article:published_time"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="article:published_time"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="published_time"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="published_time"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="parsely-pub-date"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="parsely-pub-date"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="sailthru.date"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="sailthru.date"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="date" i][content]', - element => element.attribs['content'], - ], - ['meta[name="date" i][content]', element => element.attribs['content']], - [ - 'meta[property="release_date" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="release_date" i][content]', - element => element.attribs['content'], - ], - ['time[datetime]', element => element.attribs['datetime']], - ['time[datetime][pubdate]', element => element.attribs['datetime']], - ], - processor: (value: any) => - Date.parse(value.toString()) - ? new Date(value.toString()).toISOString() - : undefined, - }, - modified: { - rules: [ - [ - 'meta[property="og:updated_time"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="og:updated_time"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="article:modified_time"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="article:modified_time"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="updated_time" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="updated_time" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="modified_time"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="modified_time"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="revised"][content]', - element => element.attribs['content'], - ], - ['meta[name="revised"][content]', element => element.attribs['content']], - ], - processor: (value: any) => - Date.parse(value.toString()) - ? new Date(value.toString()).toISOString() - : undefined, - }, - robots: { - rules: [ - [ - 'meta[property="robots" i][content]', - element => element.attribs['content'], - ], - ['meta[name="robots" i][content]', element => element.attribs['content']], - ], - processor: (keywords: any) => - keywords.split(',').map((keyword: string) => keyword.trim()), - }, - copyright: { - rules: [ - [ - 'meta[property="copyright" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="copyright" i][content]', - element => element.attribs['content'], - ], - ], - }, - email: { - rules: [ - [ - 'meta[property="email" i][content]', - element => element.attribs['content'], - ], - ['meta[name="email" i][content]', element => element.attribs['content']], - [ - 'meta[property="reply-to" i][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="reply-to" i][content]', - element => element.attribs['content'], - ], - ], - }, - twitter: { - rules: [ - [ - 'meta[property="twitter:site"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="twitter:site"][content]', - element => element.attribs['content'], - ], - ], - }, - facebook: { - rules: [ - [ - 'meta[property="fb:pages"][content]', - element => element.attribs['content'], - ], - ['meta[name="fb:pages"][content]', element => element.attribs['content']], - ], - }, - image: { - rules: [ - [ - 'meta[property="og:image:secure_url"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="og:image:secure_url"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="og:image:url"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="og:image:url"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="og:image"][content]', - element => element.attribs['content'], - ], - ['meta[name="og:image"][content]', element => element.attribs['content']], - [ - 'meta[property="twitter:image"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="twitter:image"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="twitter:image:src"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="twitter:image:src"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="thumbnail"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="thumbnail"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="parsely-image-url"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="parsely-image-url"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="sailthru.image.full"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="sailthru.image.full"][content]', - element => element.attribs['content'], - ], - ], - processor: (imageUrl: any, context) => - context.forceImageHttps === true - ? makeUrlSecure(makeUrlAbsolute(context.url, imageUrl)) - : makeUrlAbsolute(context.url, imageUrl), - }, - icon: { - rules: [ - [ - 'link[rel="apple-touch-icon"][href]', - element => element.attribs['href'], - ], - [ - 'link[rel="apple-touch-icon-precomposed"][href]', - element => element.attribs['href'], - ], - ['link[rel="icon" i][href]', element => element.attribs['href']], - ['link[rel="fluid-icon"][href]', element => element.attribs['href']], - ['link[rel="shortcut icon"][href]', element => element.attribs['href']], - ['link[rel="Shortcut Icon"][href]', element => element.attribs['href']], - ['link[rel="mask-icon"][href]', element => element.attribs['href']], - ], - scorer: element => { - const sizes = element.attribs['sizes']; - if (sizes) { - const sizeMatches = sizes.match(/\d+/g); - if (sizeMatches) { - const parsed = parseInt(sizeMatches[0]); - if (!isNaN(parsed)) { - return parsed; - } - } - } - }, - defaultValue: context => makeUrlAbsolute(context.url, '/favicon.ico'), - processor: (iconUrl, context) => - context.forceImageHttps === true - ? makeUrlSecure(makeUrlAbsolute(context.url, iconUrl)) - : makeUrlAbsolute(context.url, iconUrl), - }, - video: { - rules: [ - [ - 'meta[property="og:video:secure_url"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="og:video:secure_url"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="og:video:url"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="og:video:url"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="og:video"][content]', - element => element.attribs['content'], - ], - ['meta[name="og:video"][content]', element => element.attribs['content']], - ], - processor: (imageUrl: any, context) => - context.forceImageHttps === true - ? makeUrlSecure(makeUrlAbsolute(context.url, imageUrl)) - : makeUrlAbsolute(context.url, imageUrl), - }, - audio: { - rules: [ - [ - 'meta[property="og:audio:secure_url"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="og:audio:secure_url"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="og:audio:url"][content]', - element => element.attribs['content'], - ], - [ - 'meta[name="og:audio:url"][content]', - element => element.attribs['content'], - ], - [ - 'meta[property="og:audio"][content]', - element => element.attribs['content'], - ], - ['meta[name="og:audio"][content]', element => element.attribs['content']], - ], - processor: (imageUrl: any, context) => - context.forceImageHttps === true - ? makeUrlSecure(makeUrlAbsolute(context.url, imageUrl)) - : makeUrlAbsolute(context.url, imageUrl), - }, -}; diff --git a/plugins/bookmark-block/src/server/get-meta-data/types.ts b/plugins/bookmark-block/src/server/get-meta-data/types.ts deleted file mode 100644 index 9662f8fccc..0000000000 --- a/plugins/bookmark-block/src/server/get-meta-data/types.ts +++ /dev/null @@ -1,43 +0,0 @@ -import type { Element } from 'cheerio'; - -export type MetaData = { - title?: string; - description?: string; - icon?: string; - image?: string; - keywords?: string[]; - language?: string; - type?: string; - url?: string; - provider?: string; - - [x: string]: string | string[] | undefined; -}; - -export type MetadataRule = [string, (el: Element) => string | null]; - -export type Context = { - url: string; -} & GetMetaDataOptions; - -export type RuleSet = { - rules: MetadataRule[]; - defaultValue?: (context: Context) => string | string[]; - scorer?: (el: Element, score: any) => any; - processor?: (input: any, context: Context) => any; -}; - -export type GetMetaDataOptions = { - customRules?: Record; - forceImageHttps?: boolean; -}; - -export type GetHTMLOptions = { - timeout?: number; - shouldReGetHTML?: (currentHTML: string) => boolean | Promise; -}; - -export type Options = { - shouldReGetHTML?: (metaData: MetaData) => boolean | Promise; -} & GetMetaDataOptions & - Omit; diff --git a/plugins/bookmark-block/src/server/get-meta-data/utils.ts b/plugins/bookmark-block/src/server/get-meta-data/utils.ts deleted file mode 100644 index ac61e810af..0000000000 --- a/plugins/bookmark-block/src/server/get-meta-data/utils.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { parse, resolve } from 'node:url'; - -export function makeUrlAbsolute(base: string, relative: string): string { - const relativeParsed = parse(relative); - - if (relativeParsed.host === null) { - return resolve(base, relative); - } - - return relative; -} - -export function makeUrlSecure(url: string): string { - return url.replace(/^http:/, 'https:'); -} - -export function parseUrl(url: string): string { - return parse(url).hostname || ''; -} - -export function getProvider(host: string): string { - return host - .replace(/www[a-zA-Z0-9]*\./, '') - .replace('.co.', '.') - .split('.') - .slice(0, -1) - .join(' '); -} diff --git a/scripts/esbuild/build-plugins.mjs b/scripts/esbuild/build-plugins.mjs deleted file mode 100755 index 6469f57707..0000000000 --- a/scripts/esbuild/build-plugins.mjs +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env node -import { build } from 'esbuild'; - -import { definePluginServerConfig } from './utils.mjs'; - -await build({ - ...definePluginServerConfig('bookmark-block'), - external: ['cheerio', 'electron'], -}); diff --git a/scripts/esbuild/dev-plugins.mjs b/scripts/esbuild/dev-plugins.mjs deleted file mode 100755 index 37a6a06af0..0000000000 --- a/scripts/esbuild/dev-plugins.mjs +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env node -import { context } from 'esbuild'; - -import { definePluginServerConfig } from './utils.mjs'; - -const plugin = await context({ - ...definePluginServerConfig('bookmark-block'), - external: ['cheerio', 'electron'], -}); - -await plugin.watch(); diff --git a/scripts/esbuild/utils.mjs b/scripts/esbuild/utils.mjs deleted file mode 100644 index 660b201c73..0000000000 --- a/scripts/esbuild/utils.mjs +++ /dev/null @@ -1,29 +0,0 @@ -import { resolve } from 'node:path'; -import { fileURLToPath } from 'node:url'; - -export const rootDir = fileURLToPath(new URL('../..', import.meta.url)); -export const electronOutputDir = resolve( - rootDir, - 'apps', - 'electron', - 'dist', - 'plugins' -); -export const pluginDir = resolve(rootDir, 'plugins'); - -/** - * - * @param pluginDirName {string} - * @return {import('esbuild').BuildOptions} - */ -export function definePluginServerConfig(pluginDirName) { - const pluginRootDir = resolve(pluginDir, pluginDirName); - const serverEntryFile = resolve(pluginRootDir, 'src/server.ts'); - const serverOutputDir = resolve(electronOutputDir, pluginDirName); - return { - entryPoints: [serverEntryFile], - platform: 'node', - outdir: serverOutputDir, - bundle: true, - }; -} diff --git a/scripts/setup/build-plugins.ts b/scripts/setup/build-plugins.ts index 735d28cd4d..83667b096e 100644 --- a/scripts/setup/build-plugins.ts +++ b/scripts/setup/build-plugins.ts @@ -2,5 +2,5 @@ import { beforeAll } from 'vitest'; beforeAll(async () => { console.log('Build plugins'); - await import('../esbuild/build-plugins.mjs'); + await import('../../apps/electron/scripts/plugins/build-plugins.mjs'); }); diff --git a/yarn.lock b/yarn.lock index 0c94ea6926..444ac8eb3e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -35,12 +35,10 @@ __metadata: resolution: "@affine/bookmark-block@workspace:plugins/bookmark-block" dependencies: "@toeverything/plugin-infra": "workspace:*" - cheerio: ^1.0.0-rc.12 - electron: =25.0.1 + link-preview-js: ^3.0.4 react: 18.3.0-canary-16d053d59-20230506 react-dom: 18.3.0-canary-16d053d59-20230506 peerDependencies: - electron: "*" react: "*" react-dom: "*" languageName: unknown @@ -10814,6 +10812,15 @@ __metadata: languageName: node linkType: hard +"abort-controller@npm:^3.0.0": + version: 3.0.0 + resolution: "abort-controller@npm:3.0.0" + dependencies: + event-target-shim: ^5.0.0 + checksum: 170bdba9b47b7e65906a28c8ce4f38a7a369d78e2271706f020849c1bfe0ee2067d4261df8bbb66eb84f79208fd5b710df759d64191db58cfba7ce8ef9c54b75 + languageName: node + linkType: hard + "accepts@npm:~1.3.5, accepts@npm:~1.3.8": version: 1.3.8 resolution: "accepts@npm:1.3.8" @@ -12494,6 +12501,22 @@ __metadata: languageName: node linkType: hard +"cheerio@npm:1.0.0-rc.11": + version: 1.0.0-rc.11 + resolution: "cheerio@npm:1.0.0-rc.11" + dependencies: + cheerio-select: ^2.1.0 + dom-serializer: ^2.0.0 + domhandler: ^5.0.3 + domutils: ^3.0.1 + htmlparser2: ^8.0.1 + parse5: ^7.0.0 + parse5-htmlparser2-tree-adapter: ^7.0.0 + tslib: ^2.4.0 + checksum: 7619edcbecafb70ca6ca842ce149307a84e8d451432a888d82959b2aa04e2090701658f25eac75821e0832cc1305bdbcf02f17175102fc1723f119f3c9ece17a + languageName: node + linkType: hard + "cheerio@npm:^1.0.0-rc.12": version: 1.0.0-rc.12 resolution: "cheerio@npm:1.0.0-rc.12" @@ -13289,6 +13312,15 @@ __metadata: languageName: node linkType: hard +"cross-fetch@npm:3.1.5": + version: 3.1.5 + resolution: "cross-fetch@npm:3.1.5" + dependencies: + node-fetch: 2.6.7 + checksum: f6b8c6ee3ef993ace6277fd789c71b6acf1b504fd5f5c7128df4ef2f125a429e29cd62dc8c127523f04a5f2fa4771ed80e3f3d9695617f441425045f505cf3bb + languageName: node + linkType: hard + "cross-fetch@npm:^3.0.6, cross-fetch@npm:^3.1.5": version: 3.1.6 resolution: "cross-fetch@npm:3.1.6" @@ -15415,6 +15447,13 @@ __metadata: languageName: node linkType: hard +"event-target-shim@npm:^5.0.0": + version: 5.0.1 + resolution: "event-target-shim@npm:5.0.1" + checksum: 1ffe3bb22a6d51bdeb6bf6f7cf97d2ff4a74b017ad12284cc9e6a279e727dc30a5de6bb613e5596ff4dc3e517841339ad09a7eec44266eccb1aa201a30448166 + languageName: node + linkType: hard + "eventemitter3@npm:^3.1.0": version: 3.1.2 resolution: "eventemitter3@npm:3.1.2" @@ -19911,6 +19950,18 @@ __metadata: languageName: node linkType: hard +"link-preview-js@npm:^3.0.4": + version: 3.0.4 + resolution: "link-preview-js@npm:3.0.4" + dependencies: + abort-controller: ^3.0.0 + cheerio: 1.0.0-rc.11 + cross-fetch: 3.1.5 + url: 0.11.0 + checksum: 8d3c0c8bc3b0b5a7b95586f4e98e1013ccfc8fbce3405fffb543ecb10a6d3abb66742f1845e366ca3c1ef80725619aa4b034d97937f4e152e63d11f8676bab9a + languageName: node + linkType: hard + "lint-staged@npm:^13.2.2": version: 13.2.2 resolution: "lint-staged@npm:13.2.2" @@ -23120,6 +23171,13 @@ __metadata: languageName: node linkType: hard +"punycode@npm:1.3.2": + version: 1.3.2 + resolution: "punycode@npm:1.3.2" + checksum: b8807fd594b1db33335692d1f03e8beeddde6fda7fbb4a2e32925d88d20a3aa4cd8dcc0c109ccaccbd2ba761c208dfaaada83007087ea8bfb0129c9ef1b99ed6 + languageName: node + linkType: hard + "punycode@npm:^1.3.2": version: 1.4.1 resolution: "punycode@npm:1.4.1" @@ -23205,6 +23263,13 @@ __metadata: languageName: node linkType: hard +"querystring@npm:0.2.0": + version: 0.2.0 + resolution: "querystring@npm:0.2.0" + checksum: 8258d6734f19be27e93f601758858c299bdebe71147909e367101ba459b95446fbe5b975bf9beb76390156a592b6f4ac3a68b6087cea165c259705b8b4e56a69 + languageName: node + linkType: hard + "queue-microtask@npm:^1.2.2, queue-microtask@npm:^1.2.3": version: 1.2.3 resolution: "queue-microtask@npm:1.2.3" @@ -26805,6 +26870,16 @@ __metadata: languageName: node linkType: hard +"url@npm:0.11.0": + version: 0.11.0 + resolution: "url@npm:0.11.0" + dependencies: + punycode: 1.3.2 + querystring: 0.2.0 + checksum: 50d100d3dd2d98b9fe3ada48cadb0b08aa6be6d3ac64112b867b56b19be4bfcba03c2a9a0d7922bfd7ac17d4834e88537749fe182430dfd9b68e520175900d90 + languageName: node + linkType: hard + "urlpattern-polyfill@npm:^8.0.0": version: 8.0.2 resolution: "urlpattern-polyfill@npm:8.0.2"