mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-11 20:08:37 +00:00
@@ -76,3 +76,55 @@ Generated by [AVA](https://avajs.dev).
|
||||
url: 'http://example.com/page',
|
||||
videos: [],
|
||||
}
|
||||
|
||||
> Snapshot 5
|
||||
|
||||
{
|
||||
charset: 'gbk',
|
||||
favicons: [
|
||||
'http://localhost:3010/api/worker/image-proxy?url=https%3A%2F%2Fexample.com%2Ffavicon.ico',
|
||||
],
|
||||
images: [],
|
||||
title: '你好,世界。',
|
||||
url: 'http://example.com/gb2312',
|
||||
videos: [],
|
||||
}
|
||||
|
||||
> Snapshot 6
|
||||
|
||||
{
|
||||
charset: 'shift_jis',
|
||||
favicons: [
|
||||
'http://localhost:3010/api/worker/image-proxy?url=https%3A%2F%2Fexample.com%2Ffavicon.ico',
|
||||
],
|
||||
images: [],
|
||||
title: 'こんにちは、世界。',
|
||||
url: 'http://example.com/shift-jis',
|
||||
videos: [],
|
||||
}
|
||||
|
||||
> Snapshot 7
|
||||
|
||||
{
|
||||
charset: 'big5',
|
||||
favicons: [
|
||||
'http://localhost:3010/api/worker/image-proxy?url=https%3A%2F%2Fexample.com%2Ffavicon.ico',
|
||||
],
|
||||
images: [],
|
||||
title: '你好,世界。',
|
||||
url: 'http://example.com/big5',
|
||||
videos: [],
|
||||
}
|
||||
|
||||
> Snapshot 8
|
||||
|
||||
{
|
||||
charset: 'euc-kr',
|
||||
favicons: [
|
||||
'http://localhost:3010/api/worker/image-proxy?url=https%3A%2F%2Fexample.com%2Ffavicon.ico',
|
||||
],
|
||||
images: [],
|
||||
title: '안녕하세요, 세계.',
|
||||
url: 'http://example.com/euc-kr',
|
||||
videos: [],
|
||||
}
|
||||
|
||||
Binary file not shown.
@@ -171,4 +171,56 @@ test('should preview link', async t => {
|
||||
|
||||
fetchSpy.restore();
|
||||
}
|
||||
|
||||
{
|
||||
const encoded = [
|
||||
{
|
||||
content: 'xOO6w6OsysC956Gj',
|
||||
charset: 'gb2312',
|
||||
},
|
||||
{
|
||||
content: 'grGC8YLJgr+CzYFBkKKKRYFC',
|
||||
charset: 'shift-jis',
|
||||
},
|
||||
{
|
||||
content: 'p0GmbqFBpUCsyaFD',
|
||||
charset: 'big5',
|
||||
},
|
||||
{
|
||||
content: 'vsiz58fPvLy/5CwgvLyw6C4=',
|
||||
charset: 'euc-kr',
|
||||
},
|
||||
];
|
||||
|
||||
for (const { content, charset } of encoded) {
|
||||
const before = Buffer.from(`<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=${charset}" />
|
||||
<meta property="og:title" content="`);
|
||||
const encoded = Buffer.from(content, 'base64');
|
||||
const after = Buffer.from(`" />
|
||||
</head>
|
||||
</html>
|
||||
`);
|
||||
const fakeHTML = new Response(Buffer.concat([before, encoded, after]));
|
||||
|
||||
Object.defineProperty(fakeHTML, 'url', {
|
||||
value: `http://example.com/${charset}`,
|
||||
});
|
||||
|
||||
const fetchSpy = Sinon.stub(global, 'fetch').resolves(fakeHTML);
|
||||
|
||||
await assertAndSnapshot(
|
||||
'/api/worker/link-preview',
|
||||
'should decode HTML content with charset',
|
||||
{
|
||||
status: 200,
|
||||
method: 'POST',
|
||||
body: { url: `http://example.com/${charset}` },
|
||||
}
|
||||
);
|
||||
|
||||
fetchSpy.restore();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@@ -24,6 +24,10 @@ import {
|
||||
parseJson,
|
||||
reduceUrls,
|
||||
} from './utils';
|
||||
import { decodeWithCharset } from './utils/encoding';
|
||||
|
||||
// cache for 30 minutes
|
||||
const CACHE_TTL = 1000 * 60 * 30;
|
||||
|
||||
@Public()
|
||||
@Controller('/api/worker')
|
||||
@@ -67,6 +71,25 @@ export class WorkerController {
|
||||
throw new BadRequest(`Invalid URL`);
|
||||
}
|
||||
|
||||
const cachedUrl = `image-proxy:${targetURL.toString()}`;
|
||||
const cachedResponse = await this.cache.get<string>(cachedUrl);
|
||||
if (cachedResponse) {
|
||||
const buffer = Buffer.from(cachedResponse, 'base64');
|
||||
// if cached response is empty, it means the request is rejected by server previously
|
||||
if (buffer.length === 0) {
|
||||
return resp.status(404).header(getCorsHeaders(origin)).send();
|
||||
}
|
||||
return resp
|
||||
.status(200)
|
||||
.header({
|
||||
'Access-Control-Allow-Origin': origin,
|
||||
Vary: 'Origin',
|
||||
'Access-Control-Allow-Methods': 'GET',
|
||||
'Content-Type': 'image/*',
|
||||
})
|
||||
.send(buffer);
|
||||
}
|
||||
|
||||
const response = await fetch(
|
||||
new Request(targetURL.toString(), {
|
||||
method: 'GET',
|
||||
@@ -75,8 +98,12 @@ export class WorkerController {
|
||||
);
|
||||
if (response.ok) {
|
||||
const contentType = response.headers.get('Content-Type');
|
||||
const contentDisposition = response.headers.get('Content-Disposition');
|
||||
if (contentType?.startsWith('image/')) {
|
||||
const buffer = Buffer.from(await response.arrayBuffer());
|
||||
await this.cache.set(cachedUrl, buffer.toString('base64'), {
|
||||
ttl: CACHE_TTL,
|
||||
});
|
||||
const contentDisposition = response.headers.get('Content-Disposition');
|
||||
return resp
|
||||
.status(200)
|
||||
.header({
|
||||
@@ -86,11 +113,17 @@ export class WorkerController {
|
||||
'Content-Type': contentType,
|
||||
'Content-Disposition': contentDisposition,
|
||||
})
|
||||
.send(Buffer.from(await response.arrayBuffer()));
|
||||
.send(buffer);
|
||||
} else {
|
||||
throw new BadRequest('Invalid content type');
|
||||
}
|
||||
} else {
|
||||
if (response.status >= 400 && response.status < 500) {
|
||||
// rejected by server, cache a empty response
|
||||
await this.cache.set(cachedUrl, Buffer.from([]).toString('base64'), {
|
||||
ttl: CACHE_TTL,
|
||||
});
|
||||
}
|
||||
this.logger.error('Failed to fetch image', {
|
||||
origin,
|
||||
url: imageURL,
|
||||
@@ -130,18 +163,19 @@ export class WorkerController {
|
||||
|
||||
this.logger.debug('Received request', { origin, method: request.method });
|
||||
|
||||
const targetBody = parseJson<LinkPreviewRequest>(request.body);
|
||||
const targetURL = fixUrl(targetBody?.url);
|
||||
const requestBody = parseJson<LinkPreviewRequest>(request.body);
|
||||
const targetURL = fixUrl(requestBody?.url);
|
||||
// not allow same site preview
|
||||
if (!targetURL || isOriginAllowed(targetURL.origin, this.allowedOrigin)) {
|
||||
this.logger.error('Invalid URL', { origin, url: targetBody?.url });
|
||||
this.logger.error('Invalid URL', { origin, url: requestBody?.url });
|
||||
throw new BadRequest('Invalid URL');
|
||||
}
|
||||
|
||||
this.logger.debug('Processing request', { origin, url: targetURL });
|
||||
|
||||
try {
|
||||
const cachedResponse = await this.cache.get<string>(targetURL.toString());
|
||||
const cachedUrl = `link-preview:${targetURL.toString()}`;
|
||||
const cachedResponse = await this.cache.get<string>(cachedUrl);
|
||||
if (cachedResponse) {
|
||||
return resp
|
||||
.status(200)
|
||||
@@ -155,12 +189,23 @@ export class WorkerController {
|
||||
const response = await fetch(targetURL, {
|
||||
headers: cloneHeader(request.headers),
|
||||
});
|
||||
this.logger.error('Fetched URL', {
|
||||
this.logger.debug('Fetched URL', {
|
||||
origin,
|
||||
url: targetURL,
|
||||
status: response.status,
|
||||
});
|
||||
|
||||
if (requestBody?.head) {
|
||||
return resp
|
||||
.status(
|
||||
response.status >= 200 && response.status < 400
|
||||
? 204
|
||||
: response.status
|
||||
)
|
||||
.header(getCorsHeaders(origin))
|
||||
.send();
|
||||
}
|
||||
|
||||
const res: LinkPreviewResponse = {
|
||||
url: response.url,
|
||||
images: [],
|
||||
@@ -170,6 +215,8 @@ export class WorkerController {
|
||||
const baseUrl = new URL(request.url, this.url.baseUrl).toString();
|
||||
|
||||
if (response.body) {
|
||||
const resp = await decodeWithCharset(response, res);
|
||||
|
||||
const rewriter = new HTMLRewriter()
|
||||
.on('meta', {
|
||||
element(element) {
|
||||
@@ -230,11 +277,11 @@ export class WorkerController {
|
||||
},
|
||||
});
|
||||
|
||||
await rewriter.transform(response).text();
|
||||
await rewriter.transform(resp).text();
|
||||
|
||||
res.images = await reduceUrls(baseUrl, res.images);
|
||||
|
||||
this.logger.error('Processed response with HTMLRewriter', {
|
||||
this.logger.debug('Processed response with HTMLRewriter', {
|
||||
origin,
|
||||
url: response.url,
|
||||
});
|
||||
@@ -259,7 +306,7 @@ export class WorkerController {
|
||||
responseSize: json.length,
|
||||
});
|
||||
|
||||
await this.cache.set(targetURL.toString(), res);
|
||||
await this.cache.set(cachedUrl, res, { ttl: CACHE_TTL });
|
||||
return resp
|
||||
.status(200)
|
||||
.header({
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
export type LinkPreviewRequest = {
|
||||
url: string;
|
||||
head?: boolean;
|
||||
};
|
||||
|
||||
export type LinkPreviewResponse = {
|
||||
|
||||
45
packages/backend/server/src/plugins/worker/utils/encoding.ts
Normal file
45
packages/backend/server/src/plugins/worker/utils/encoding.ts
Normal file
@@ -0,0 +1,45 @@
|
||||
import { HTMLRewriter } from 'htmlrewriter';
|
||||
|
||||
import { LinkPreviewResponse } from '../types';
|
||||
|
||||
export async function decodeWithCharset(
|
||||
response: Response,
|
||||
res: LinkPreviewResponse
|
||||
): Promise<Response> {
|
||||
let charset: string | undefined;
|
||||
const rewriter = new HTMLRewriter()
|
||||
.on('html', {
|
||||
element(element) {
|
||||
charset = element.getAttribute('lang') || undefined;
|
||||
},
|
||||
})
|
||||
.on('meta', {
|
||||
element(element) {
|
||||
const property =
|
||||
element.getAttribute('property') ??
|
||||
element.getAttribute('name') ??
|
||||
element.getAttribute('http-equiv');
|
||||
const content = element.getAttribute('content');
|
||||
if (property && content) {
|
||||
switch (property.toLowerCase()) {
|
||||
case 'content-type':
|
||||
charset = content
|
||||
.split(';')
|
||||
.find(x => x.includes('charset='))
|
||||
?.trim()
|
||||
?.split('=')[1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
},
|
||||
});
|
||||
const body = await rewriter.transform(response).arrayBuffer();
|
||||
|
||||
if (charset) {
|
||||
const decoder = new TextDecoder(charset);
|
||||
res.charset = decoder.encoding;
|
||||
return new Response(decoder.decode(body), response);
|
||||
} else {
|
||||
return new Response(body, response);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user