diff --git a/packages/backend/server/src/__tests__/__snapshots__/worker.e2e.ts.md b/packages/backend/server/src/__tests__/__snapshots__/worker.e2e.ts.md
index bd6979b853..3e91ea9277 100644
--- a/packages/backend/server/src/__tests__/__snapshots__/worker.e2e.ts.md
+++ b/packages/backend/server/src/__tests__/__snapshots__/worker.e2e.ts.md
@@ -76,3 +76,55 @@ Generated by [AVA](https://avajs.dev).
url: 'http://example.com/page',
videos: [],
}
+
+> Snapshot 5
+
+ {
+ charset: 'gbk',
+ favicons: [
+ 'http://localhost:3010/api/worker/image-proxy?url=https%3A%2F%2Fexample.com%2Ffavicon.ico',
+ ],
+ images: [],
+ title: '你好,世界。',
+ url: 'http://example.com/gb2312',
+ videos: [],
+ }
+
+> Snapshot 6
+
+ {
+ charset: 'shift_jis',
+ favicons: [
+ 'http://localhost:3010/api/worker/image-proxy?url=https%3A%2F%2Fexample.com%2Ffavicon.ico',
+ ],
+ images: [],
+ title: 'こんにちは、世界。',
+ url: 'http://example.com/shift-jis',
+ videos: [],
+ }
+
+> Snapshot 7
+
+ {
+ charset: 'big5',
+ favicons: [
+ 'http://localhost:3010/api/worker/image-proxy?url=https%3A%2F%2Fexample.com%2Ffavicon.ico',
+ ],
+ images: [],
+ title: '你好,世界。',
+ url: 'http://example.com/big5',
+ videos: [],
+ }
+
+> Snapshot 8
+
+ {
+ charset: 'euc-kr',
+ favicons: [
+ 'http://localhost:3010/api/worker/image-proxy?url=https%3A%2F%2Fexample.com%2Ffavicon.ico',
+ ],
+ images: [],
+ title: '안녕하세요, 세계.',
+ url: 'http://example.com/euc-kr',
+ videos: [],
+ }
diff --git a/packages/backend/server/src/__tests__/__snapshots__/worker.e2e.ts.snap b/packages/backend/server/src/__tests__/__snapshots__/worker.e2e.ts.snap
index 7d2ba02596..3136f79eda 100644
Binary files a/packages/backend/server/src/__tests__/__snapshots__/worker.e2e.ts.snap and b/packages/backend/server/src/__tests__/__snapshots__/worker.e2e.ts.snap differ
diff --git a/packages/backend/server/src/__tests__/worker.e2e.ts b/packages/backend/server/src/__tests__/worker.e2e.ts
index f03a6c2008..8ccfd423d1 100644
--- a/packages/backend/server/src/__tests__/worker.e2e.ts
+++ b/packages/backend/server/src/__tests__/worker.e2e.ts
@@ -171,4 +171,56 @@ test('should preview link', async t => {
fetchSpy.restore();
}
+
+ {
+ const encoded = [
+ {
+ content: 'xOO6w6OsysC956Gj',
+ charset: 'gb2312',
+ },
+ {
+ content: 'grGC8YLJgr+CzYFBkKKKRYFC',
+ charset: 'shift-jis',
+ },
+ {
+ content: 'p0GmbqFBpUCsyaFD',
+ charset: 'big5',
+ },
+ {
+ content: 'vsiz58fPvLy/5CwgvLyw6C4=',
+ charset: 'euc-kr',
+ },
+ ];
+
+ for (const { content, charset } of encoded) {
+ const before = Buffer.from(`
+
+
+
+
+
+ `);
+ const fakeHTML = new Response(Buffer.concat([before, encoded, after]));
+
+ Object.defineProperty(fakeHTML, 'url', {
+ value: `http://example.com/${charset}`,
+ });
+
+ const fetchSpy = Sinon.stub(global, 'fetch').resolves(fakeHTML);
+
+ await assertAndSnapshot(
+ '/api/worker/link-preview',
+ 'should decode HTML content with charset',
+ {
+ status: 200,
+ method: 'POST',
+ body: { url: `http://example.com/${charset}` },
+ }
+ );
+
+ fetchSpy.restore();
+ }
+ }
});
diff --git a/packages/backend/server/src/plugins/worker/controller.ts b/packages/backend/server/src/plugins/worker/controller.ts
index 636a7d2f4e..ef92cfec66 100644
--- a/packages/backend/server/src/plugins/worker/controller.ts
+++ b/packages/backend/server/src/plugins/worker/controller.ts
@@ -24,6 +24,10 @@ import {
parseJson,
reduceUrls,
} from './utils';
+import { decodeWithCharset } from './utils/encoding';
+
+// cache for 30 minutes
+const CACHE_TTL = 1000 * 60 * 30;
@Public()
@Controller('/api/worker')
@@ -67,6 +71,25 @@ export class WorkerController {
throw new BadRequest(`Invalid URL`);
}
+ const cachedUrl = `image-proxy:${targetURL.toString()}`;
+ const cachedResponse = await this.cache.get(cachedUrl);
+ if (cachedResponse) {
+ const buffer = Buffer.from(cachedResponse, 'base64');
+ // if cached response is empty, it means the request is rejected by server previously
+ if (buffer.length === 0) {
+ return resp.status(404).header(getCorsHeaders(origin)).send();
+ }
+ return resp
+ .status(200)
+ .header({
+ 'Access-Control-Allow-Origin': origin,
+ Vary: 'Origin',
+ 'Access-Control-Allow-Methods': 'GET',
+ 'Content-Type': 'image/*',
+ })
+ .send(buffer);
+ }
+
const response = await fetch(
new Request(targetURL.toString(), {
method: 'GET',
@@ -75,8 +98,12 @@ export class WorkerController {
);
if (response.ok) {
const contentType = response.headers.get('Content-Type');
- const contentDisposition = response.headers.get('Content-Disposition');
if (contentType?.startsWith('image/')) {
+ const buffer = Buffer.from(await response.arrayBuffer());
+ await this.cache.set(cachedUrl, buffer.toString('base64'), {
+ ttl: CACHE_TTL,
+ });
+ const contentDisposition = response.headers.get('Content-Disposition');
return resp
.status(200)
.header({
@@ -86,11 +113,17 @@ export class WorkerController {
'Content-Type': contentType,
'Content-Disposition': contentDisposition,
})
- .send(Buffer.from(await response.arrayBuffer()));
+ .send(buffer);
} else {
throw new BadRequest('Invalid content type');
}
} else {
+ if (response.status >= 400 && response.status < 500) {
+ // rejected by server, cache a empty response
+ await this.cache.set(cachedUrl, Buffer.from([]).toString('base64'), {
+ ttl: CACHE_TTL,
+ });
+ }
this.logger.error('Failed to fetch image', {
origin,
url: imageURL,
@@ -130,18 +163,19 @@ export class WorkerController {
this.logger.debug('Received request', { origin, method: request.method });
- const targetBody = parseJson(request.body);
- const targetURL = fixUrl(targetBody?.url);
+ const requestBody = parseJson(request.body);
+ const targetURL = fixUrl(requestBody?.url);
// not allow same site preview
if (!targetURL || isOriginAllowed(targetURL.origin, this.allowedOrigin)) {
- this.logger.error('Invalid URL', { origin, url: targetBody?.url });
+ this.logger.error('Invalid URL', { origin, url: requestBody?.url });
throw new BadRequest('Invalid URL');
}
this.logger.debug('Processing request', { origin, url: targetURL });
try {
- const cachedResponse = await this.cache.get(targetURL.toString());
+ const cachedUrl = `link-preview:${targetURL.toString()}`;
+ const cachedResponse = await this.cache.get(cachedUrl);
if (cachedResponse) {
return resp
.status(200)
@@ -155,12 +189,23 @@ export class WorkerController {
const response = await fetch(targetURL, {
headers: cloneHeader(request.headers),
});
- this.logger.error('Fetched URL', {
+ this.logger.debug('Fetched URL', {
origin,
url: targetURL,
status: response.status,
});
+ if (requestBody?.head) {
+ return resp
+ .status(
+ response.status >= 200 && response.status < 400
+ ? 204
+ : response.status
+ )
+ .header(getCorsHeaders(origin))
+ .send();
+ }
+
const res: LinkPreviewResponse = {
url: response.url,
images: [],
@@ -170,6 +215,8 @@ export class WorkerController {
const baseUrl = new URL(request.url, this.url.baseUrl).toString();
if (response.body) {
+ const resp = await decodeWithCharset(response, res);
+
const rewriter = new HTMLRewriter()
.on('meta', {
element(element) {
@@ -230,11 +277,11 @@ export class WorkerController {
},
});
- await rewriter.transform(response).text();
+ await rewriter.transform(resp).text();
res.images = await reduceUrls(baseUrl, res.images);
- this.logger.error('Processed response with HTMLRewriter', {
+ this.logger.debug('Processed response with HTMLRewriter', {
origin,
url: response.url,
});
@@ -259,7 +306,7 @@ export class WorkerController {
responseSize: json.length,
});
- await this.cache.set(targetURL.toString(), res);
+ await this.cache.set(cachedUrl, res, { ttl: CACHE_TTL });
return resp
.status(200)
.header({
diff --git a/packages/backend/server/src/plugins/worker/types.ts b/packages/backend/server/src/plugins/worker/types.ts
index 27765946c9..004767a943 100644
--- a/packages/backend/server/src/plugins/worker/types.ts
+++ b/packages/backend/server/src/plugins/worker/types.ts
@@ -1,5 +1,6 @@
export type LinkPreviewRequest = {
url: string;
+ head?: boolean;
};
export type LinkPreviewResponse = {
diff --git a/packages/backend/server/src/plugins/worker/utils/encoding.ts b/packages/backend/server/src/plugins/worker/utils/encoding.ts
new file mode 100644
index 0000000000..0d86c4bb92
--- /dev/null
+++ b/packages/backend/server/src/plugins/worker/utils/encoding.ts
@@ -0,0 +1,45 @@
+import { HTMLRewriter } from 'htmlrewriter';
+
+import { LinkPreviewResponse } from '../types';
+
+export async function decodeWithCharset(
+ response: Response,
+ res: LinkPreviewResponse
+): Promise {
+ let charset: string | undefined;
+ const rewriter = new HTMLRewriter()
+ .on('html', {
+ element(element) {
+ charset = element.getAttribute('lang') || undefined;
+ },
+ })
+ .on('meta', {
+ element(element) {
+ const property =
+ element.getAttribute('property') ??
+ element.getAttribute('name') ??
+ element.getAttribute('http-equiv');
+ const content = element.getAttribute('content');
+ if (property && content) {
+ switch (property.toLowerCase()) {
+ case 'content-type':
+ charset = content
+ .split(';')
+ .find(x => x.includes('charset='))
+ ?.trim()
+ ?.split('=')[1];
+ break;
+ }
+ }
+ },
+ });
+ const body = await rewriter.transform(response).arrayBuffer();
+
+ if (charset) {
+ const decoder = new TextDecoder(charset);
+ res.charset = decoder.encoding;
+ return new Response(decoder.decode(body), response);
+ } else {
+ return new Response(body, response);
+ }
+}