feat(server): worker improve (#10176)

fix AF-2225
This commit is contained in:
darkskygit
2025-02-14 09:47:57 +00:00
parent 1bf1832211
commit 981b4efecf
6 changed files with 207 additions and 10 deletions

View File

@@ -76,3 +76,55 @@ Generated by [AVA](https://avajs.dev).
url: 'http://example.com/page',
videos: [],
}
> Snapshot 5
{
charset: 'gbk',
favicons: [
'http://localhost:3010/api/worker/image-proxy?url=https%3A%2F%2Fexample.com%2Ffavicon.ico',
],
images: [],
title: '你好,世界。',
url: 'http://example.com/gb2312',
videos: [],
}
> Snapshot 6
{
charset: 'shift_jis',
favicons: [
'http://localhost:3010/api/worker/image-proxy?url=https%3A%2F%2Fexample.com%2Ffavicon.ico',
],
images: [],
title: 'こんにちは、世界。',
url: 'http://example.com/shift-jis',
videos: [],
}
> Snapshot 7
{
charset: 'big5',
favicons: [
'http://localhost:3010/api/worker/image-proxy?url=https%3A%2F%2Fexample.com%2Ffavicon.ico',
],
images: [],
title: '你好,世界。',
url: 'http://example.com/big5',
videos: [],
}
> Snapshot 8
{
charset: 'euc-kr',
favicons: [
'http://localhost:3010/api/worker/image-proxy?url=https%3A%2F%2Fexample.com%2Ffavicon.ico',
],
images: [],
title: '안녕하세요, 세계.',
url: 'http://example.com/euc-kr',
videos: [],
}

View File

@@ -171,4 +171,56 @@ test('should preview link', async t => {
fetchSpy.restore();
}
{
const encoded = [
{
content: 'xOO6w6OsysC956Gj',
charset: 'gb2312',
},
{
content: 'grGC8YLJgr+CzYFBkKKKRYFC',
charset: 'shift-jis',
},
{
content: 'p0GmbqFBpUCsyaFD',
charset: 'big5',
},
{
content: 'vsiz58fPvLy/5CwgvLyw6C4=',
charset: 'euc-kr',
},
];
for (const { content, charset } of encoded) {
const before = Buffer.from(`<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=${charset}" />
<meta property="og:title" content="`);
const encoded = Buffer.from(content, 'base64');
const after = Buffer.from(`" />
</head>
</html>
`);
const fakeHTML = new Response(Buffer.concat([before, encoded, after]));
Object.defineProperty(fakeHTML, 'url', {
value: `http://example.com/${charset}`,
});
const fetchSpy = Sinon.stub(global, 'fetch').resolves(fakeHTML);
await assertAndSnapshot(
'/api/worker/link-preview',
'should decode HTML content with charset',
{
status: 200,
method: 'POST',
body: { url: `http://example.com/${charset}` },
}
);
fetchSpy.restore();
}
}
});

View File

@@ -24,6 +24,10 @@ import {
parseJson,
reduceUrls,
} from './utils';
import { decodeWithCharset } from './utils/encoding';
// cache for 30 minutes
const CACHE_TTL = 1000 * 60 * 30;
@Public()
@Controller('/api/worker')
@@ -67,6 +71,25 @@ export class WorkerController {
throw new BadRequest(`Invalid URL`);
}
const cachedUrl = `image-proxy:${targetURL.toString()}`;
const cachedResponse = await this.cache.get<string>(cachedUrl);
if (cachedResponse) {
const buffer = Buffer.from(cachedResponse, 'base64');
// if cached response is empty, it means the request is rejected by server previously
if (buffer.length === 0) {
return resp.status(404).header(getCorsHeaders(origin)).send();
}
return resp
.status(200)
.header({
'Access-Control-Allow-Origin': origin,
Vary: 'Origin',
'Access-Control-Allow-Methods': 'GET',
'Content-Type': 'image/*',
})
.send(buffer);
}
const response = await fetch(
new Request(targetURL.toString(), {
method: 'GET',
@@ -75,8 +98,12 @@ export class WorkerController {
);
if (response.ok) {
const contentType = response.headers.get('Content-Type');
const contentDisposition = response.headers.get('Content-Disposition');
if (contentType?.startsWith('image/')) {
const buffer = Buffer.from(await response.arrayBuffer());
await this.cache.set(cachedUrl, buffer.toString('base64'), {
ttl: CACHE_TTL,
});
const contentDisposition = response.headers.get('Content-Disposition');
return resp
.status(200)
.header({
@@ -86,11 +113,17 @@ export class WorkerController {
'Content-Type': contentType,
'Content-Disposition': contentDisposition,
})
.send(Buffer.from(await response.arrayBuffer()));
.send(buffer);
} else {
throw new BadRequest('Invalid content type');
}
} else {
if (response.status >= 400 && response.status < 500) {
// rejected by server, cache a empty response
await this.cache.set(cachedUrl, Buffer.from([]).toString('base64'), {
ttl: CACHE_TTL,
});
}
this.logger.error('Failed to fetch image', {
origin,
url: imageURL,
@@ -130,18 +163,19 @@ export class WorkerController {
this.logger.debug('Received request', { origin, method: request.method });
const targetBody = parseJson<LinkPreviewRequest>(request.body);
const targetURL = fixUrl(targetBody?.url);
const requestBody = parseJson<LinkPreviewRequest>(request.body);
const targetURL = fixUrl(requestBody?.url);
// not allow same site preview
if (!targetURL || isOriginAllowed(targetURL.origin, this.allowedOrigin)) {
this.logger.error('Invalid URL', { origin, url: targetBody?.url });
this.logger.error('Invalid URL', { origin, url: requestBody?.url });
throw new BadRequest('Invalid URL');
}
this.logger.debug('Processing request', { origin, url: targetURL });
try {
const cachedResponse = await this.cache.get<string>(targetURL.toString());
const cachedUrl = `link-preview:${targetURL.toString()}`;
const cachedResponse = await this.cache.get<string>(cachedUrl);
if (cachedResponse) {
return resp
.status(200)
@@ -155,12 +189,23 @@ export class WorkerController {
const response = await fetch(targetURL, {
headers: cloneHeader(request.headers),
});
this.logger.error('Fetched URL', {
this.logger.debug('Fetched URL', {
origin,
url: targetURL,
status: response.status,
});
if (requestBody?.head) {
return resp
.status(
response.status >= 200 && response.status < 400
? 204
: response.status
)
.header(getCorsHeaders(origin))
.send();
}
const res: LinkPreviewResponse = {
url: response.url,
images: [],
@@ -170,6 +215,8 @@ export class WorkerController {
const baseUrl = new URL(request.url, this.url.baseUrl).toString();
if (response.body) {
const resp = await decodeWithCharset(response, res);
const rewriter = new HTMLRewriter()
.on('meta', {
element(element) {
@@ -230,11 +277,11 @@ export class WorkerController {
},
});
await rewriter.transform(response).text();
await rewriter.transform(resp).text();
res.images = await reduceUrls(baseUrl, res.images);
this.logger.error('Processed response with HTMLRewriter', {
this.logger.debug('Processed response with HTMLRewriter', {
origin,
url: response.url,
});
@@ -259,7 +306,7 @@ export class WorkerController {
responseSize: json.length,
});
await this.cache.set(targetURL.toString(), res);
await this.cache.set(cachedUrl, res, { ttl: CACHE_TTL });
return resp
.status(200)
.header({

View File

@@ -1,5 +1,6 @@
export type LinkPreviewRequest = {
url: string;
head?: boolean;
};
export type LinkPreviewResponse = {

View File

@@ -0,0 +1,45 @@
import { HTMLRewriter } from 'htmlrewriter';
import { LinkPreviewResponse } from '../types';
export async function decodeWithCharset(
response: Response,
res: LinkPreviewResponse
): Promise<Response> {
let charset: string | undefined;
const rewriter = new HTMLRewriter()
.on('html', {
element(element) {
charset = element.getAttribute('lang') || undefined;
},
})
.on('meta', {
element(element) {
const property =
element.getAttribute('property') ??
element.getAttribute('name') ??
element.getAttribute('http-equiv');
const content = element.getAttribute('content');
if (property && content) {
switch (property.toLowerCase()) {
case 'content-type':
charset = content
.split(';')
.find(x => x.includes('charset='))
?.trim()
?.split('=')[1];
break;
}
}
},
});
const body = await rewriter.transform(response).arrayBuffer();
if (charset) {
const decoder = new TextDecoder(charset);
res.charset = decoder.encoding;
return new Response(decoder.decode(body), response);
} else {
return new Response(body, response);
}
}