mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-07-02 02:00:49 +08:00
feat(server): cleanup image (#15145)
#### PR Dependency Tree * **PR #15145** 👈 This tree was auto-generated by [Charcoal](https://github.com/danerwilliams/charcoal) <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit ## Release Notes * **Chores** * Improved Docker build cleanup by deduplicating identical static files using content hashing and hardlinks to reduce package size. * Expanded pruning of unnecessary runtime and build artifacts (including Prisma-related files) and broader removal of disposable `node_modules` contents. * Updated cleanup flow to focus on deduplication and targeted artifact removal for faster, leaner deployments. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import { spawnSync } from 'node:child_process';
|
||||
import crypto from 'node:crypto';
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
@@ -42,6 +43,160 @@ async function rmrf(targetPath) {
|
||||
await fs.rm(targetPath, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
async function fileSize(filePath) {
|
||||
const stat = await fs.lstat(filePath).catch(() => null);
|
||||
return stat?.isFile() ? stat.size : 0;
|
||||
}
|
||||
|
||||
async function walkFiles(rootDir) {
|
||||
if (!(await exists(rootDir))) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const files = [];
|
||||
const stack = [rootDir];
|
||||
|
||||
while (stack.length) {
|
||||
const current = stack.pop();
|
||||
let entries;
|
||||
try {
|
||||
entries = await fs.readdir(current, { withFileTypes: true });
|
||||
} catch (err) {
|
||||
debug(`skip unreadable dir ${current}: ${err?.message ?? String(err)}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
entries.sort((a, b) => a.name.localeCompare(b.name));
|
||||
|
||||
for (const dirent of entries) {
|
||||
const fullPath = path.join(current, dirent.name);
|
||||
if (dirent.isDirectory()) {
|
||||
stack.push(fullPath);
|
||||
} else if (dirent.isFile()) {
|
||||
files.push(fullPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
files.sort();
|
||||
return files;
|
||||
}
|
||||
|
||||
async function sha256(filePath) {
|
||||
const hash = crypto.createHash('sha256');
|
||||
const handle = await fs.open(filePath, 'r');
|
||||
try {
|
||||
for await (const chunk of handle.readableWebStream()) {
|
||||
hash.update(Buffer.from(chunk));
|
||||
}
|
||||
} finally {
|
||||
await handle.close().catch(() => {});
|
||||
}
|
||||
return hash.digest('hex');
|
||||
}
|
||||
|
||||
async function hardlinkDuplicate(canonicalPath, duplicatePath) {
|
||||
const tempPath = path.join(
|
||||
path.dirname(duplicatePath),
|
||||
`.docker-clean-link-${process.pid}-${Date.now()}-${path.basename(
|
||||
duplicatePath
|
||||
)}`
|
||||
);
|
||||
|
||||
try {
|
||||
await fs.link(canonicalPath, tempPath);
|
||||
await fs.rename(tempPath, duplicatePath);
|
||||
return true;
|
||||
} catch (err) {
|
||||
await fs.rm(tempPath, { force: true }).catch(() => {});
|
||||
debug(
|
||||
`failed to hardlink ${duplicatePath} -> ${canonicalPath}: ${
|
||||
err?.message ?? String(err)
|
||||
}`
|
||||
);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function hasCompatibleHardlinkMetadata(canonicalStat, duplicateStat) {
|
||||
return (
|
||||
canonicalStat.mode === duplicateStat.mode &&
|
||||
canonicalStat.uid === duplicateStat.uid &&
|
||||
canonicalStat.gid === duplicateStat.gid
|
||||
);
|
||||
}
|
||||
|
||||
async function hardlinkDuplicateFiles(rootDir) {
|
||||
const files = await walkFiles(rootDir);
|
||||
const bySize = new Map();
|
||||
|
||||
for (const filePath of files) {
|
||||
const size = await fileSize(filePath);
|
||||
if (size === 0) {
|
||||
continue;
|
||||
}
|
||||
const sizedFiles = bySize.get(size);
|
||||
if (sizedFiles) {
|
||||
sizedFiles.push(filePath);
|
||||
} else {
|
||||
bySize.set(size, [filePath]);
|
||||
}
|
||||
}
|
||||
|
||||
let linked = 0;
|
||||
let savedBytes = 0;
|
||||
|
||||
for (const [size, sizedFiles] of bySize) {
|
||||
if (sizedFiles.length < 2) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const byHash = new Map();
|
||||
for (const filePath of sizedFiles) {
|
||||
let digest;
|
||||
try {
|
||||
digest = await sha256(filePath);
|
||||
} catch (err) {
|
||||
debug(`failed to hash ${filePath}: ${err?.message ?? String(err)}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const canonicalPath = byHash.get(digest);
|
||||
if (!canonicalPath) {
|
||||
byHash.set(digest, filePath);
|
||||
continue;
|
||||
}
|
||||
|
||||
const [canonicalStat, duplicateStat] = await Promise.all([
|
||||
fs.lstat(canonicalPath).catch(() => null),
|
||||
fs.lstat(filePath).catch(() => null),
|
||||
]);
|
||||
|
||||
if (
|
||||
!canonicalStat ||
|
||||
!duplicateStat ||
|
||||
!hasCompatibleHardlinkMetadata(canonicalStat, duplicateStat)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (
|
||||
canonicalStat.dev === duplicateStat.dev &&
|
||||
canonicalStat.ino === duplicateStat.ino
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (await hardlinkDuplicate(canonicalPath, filePath)) {
|
||||
linked += 1;
|
||||
savedBytes += size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { linked, savedBytes };
|
||||
}
|
||||
|
||||
async function deleteFilesByExtension(rootDir, extension) {
|
||||
if (!(await exists(rootDir))) {
|
||||
return 0;
|
||||
@@ -90,6 +245,110 @@ async function deleteFilesByExtension(rootDir, extension) {
|
||||
return deleted;
|
||||
}
|
||||
|
||||
async function deleteFilesByPredicate(rootDir, shouldDelete) {
|
||||
if (!(await exists(rootDir))) {
|
||||
return { deleted: 0, bytes: 0 };
|
||||
}
|
||||
|
||||
let deleted = 0;
|
||||
let bytes = 0;
|
||||
const stack = [rootDir];
|
||||
|
||||
while (stack.length) {
|
||||
const current = stack.pop();
|
||||
let entries;
|
||||
try {
|
||||
entries = await fs.readdir(current, { withFileTypes: true });
|
||||
} catch (err) {
|
||||
debug(`skip unreadable dir ${current}: ${err?.message ?? String(err)}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const dirent of entries) {
|
||||
const fullPath = path.join(current, dirent.name);
|
||||
if (dirent.isDirectory()) {
|
||||
stack.push(fullPath);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!dirent.isFile() || !shouldDelete(fullPath, dirent.name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const size = await fileSize(fullPath);
|
||||
try {
|
||||
await fs.unlink(fullPath);
|
||||
deleted += 1;
|
||||
bytes += size;
|
||||
} catch (err) {
|
||||
debug(`failed to delete ${fullPath}: ${err?.message ?? String(err)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { deleted, bytes };
|
||||
}
|
||||
|
||||
async function deleteDirsByName(rootDir, names, shouldPreserve = () => false) {
|
||||
if (!(await exists(rootDir))) {
|
||||
return { deleted: 0, bytes: 0 };
|
||||
}
|
||||
|
||||
let deleted = 0;
|
||||
let bytes = 0;
|
||||
const stack = [rootDir];
|
||||
|
||||
while (stack.length) {
|
||||
const current = stack.pop();
|
||||
let entries;
|
||||
try {
|
||||
entries = await fs.readdir(current, { withFileTypes: true });
|
||||
} catch (err) {
|
||||
debug(`skip unreadable dir ${current}: ${err?.message ?? String(err)}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const dirent of entries) {
|
||||
if (!dirent.isDirectory()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const fullPath = path.join(current, dirent.name);
|
||||
if (shouldPreserve(fullPath)) {
|
||||
stack.push(fullPath);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (names.has(dirent.name)) {
|
||||
const dirBytes = await directoryBytes(fullPath);
|
||||
try {
|
||||
await rmrf(fullPath);
|
||||
deleted += 1;
|
||||
bytes += dirBytes;
|
||||
} catch (err) {
|
||||
debug(`failed to delete ${fullPath}: ${err?.message ?? String(err)}`);
|
||||
}
|
||||
} else {
|
||||
stack.push(fullPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { deleted, bytes };
|
||||
}
|
||||
|
||||
async function directoryBytes(rootDir) {
|
||||
let bytes = 0;
|
||||
for (const filePath of await walkFiles(rootDir)) {
|
||||
bytes += await fileSize(filePath);
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
function formatMiB(bytes) {
|
||||
return `${(bytes / 1024 / 1024).toFixed(1)}MiB`;
|
||||
}
|
||||
|
||||
function normalizeTargetKey(arch, variant) {
|
||||
// BuildKit: TARGETARCH=arm TARGETVARIANT=v7
|
||||
if (arch === 'arm' && variant === 'v7') {
|
||||
@@ -309,6 +568,138 @@ async function prunePrismaEngines(appRoot, targetKey) {
|
||||
}
|
||||
}
|
||||
|
||||
async function prunePrismaRuntimeArtifacts(nodeModulesDir) {
|
||||
const prismaClientRuntimeDir = path.join(
|
||||
nodeModulesDir,
|
||||
'@prisma',
|
||||
'client',
|
||||
'runtime'
|
||||
);
|
||||
const prismaClientCopyRuntimeDir = path.join(
|
||||
nodeModulesDir,
|
||||
'prisma',
|
||||
'prisma-client',
|
||||
'runtime'
|
||||
);
|
||||
|
||||
let deleted = 0;
|
||||
let bytes = 0;
|
||||
|
||||
for (const runtimeDir of [
|
||||
prismaClientRuntimeDir,
|
||||
prismaClientCopyRuntimeDir,
|
||||
]) {
|
||||
const result = await deleteFilesByPredicate(
|
||||
runtimeDir,
|
||||
(_filePath, name) => {
|
||||
return (
|
||||
name.startsWith('query_engine_bg.') ||
|
||||
name.startsWith('query_compiler_bg.')
|
||||
);
|
||||
}
|
||||
);
|
||||
deleted += result.deleted;
|
||||
bytes += result.bytes;
|
||||
}
|
||||
|
||||
return { deleted, bytes };
|
||||
}
|
||||
|
||||
function isNodeModulesPackageRoot(nodeModulesDir, dirPath) {
|
||||
const relative = path.relative(nodeModulesDir, dirPath);
|
||||
if (!relative || relative.startsWith('..') || path.isAbsolute(relative)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const segments = ['node_modules', ...relative.split(path.sep)];
|
||||
const targetIndex = segments.length - 1;
|
||||
|
||||
for (let i = 0; i < segments.length - 1; i += 1) {
|
||||
if (segments[i] !== 'node_modules') {
|
||||
continue;
|
||||
}
|
||||
|
||||
const packageIndex = i + 1;
|
||||
if (!segments[packageIndex]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (segments[packageIndex].startsWith('@')) {
|
||||
if (targetIndex === packageIndex + 1) {
|
||||
return true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (targetIndex === packageIndex) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
async function pruneNodeModulesArtifacts(nodeModulesDir) {
|
||||
const disposableDirs = new Set([
|
||||
'.github',
|
||||
'.husky',
|
||||
'benchmark',
|
||||
'benchmarks',
|
||||
'coverage',
|
||||
'example',
|
||||
'examples',
|
||||
'test',
|
||||
'testing',
|
||||
'tests',
|
||||
'__tests__',
|
||||
]);
|
||||
const disposableFilenames = new Set([
|
||||
'.npmignore',
|
||||
'.yarn-metadata.json',
|
||||
'CHANGELOG',
|
||||
'CHANGELOG.md',
|
||||
'HISTORY.md',
|
||||
'README',
|
||||
'README.md',
|
||||
]);
|
||||
const disposableExtensions = [
|
||||
'.cts',
|
||||
'.d.cts',
|
||||
'.d.mts',
|
||||
'.d.ts',
|
||||
'.markdown',
|
||||
'.md',
|
||||
'.mts',
|
||||
'.ts',
|
||||
'.tsbuildinfo',
|
||||
'.tsx',
|
||||
];
|
||||
|
||||
const dirResult = await deleteDirsByName(
|
||||
nodeModulesDir,
|
||||
disposableDirs,
|
||||
dirPath => isNodeModulesPackageRoot(nodeModulesDir, dirPath)
|
||||
);
|
||||
const fileResult = await deleteFilesByPredicate(
|
||||
nodeModulesDir,
|
||||
(_filePath, name) => {
|
||||
if (name.toLowerCase().startsWith('license')) {
|
||||
return false;
|
||||
}
|
||||
return (
|
||||
disposableFilenames.has(name) ||
|
||||
disposableExtensions.some(extension => name.endsWith(extension))
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
deletedDirs: dirResult.deleted,
|
||||
deletedFiles: fileResult.deleted,
|
||||
bytes: dirResult.bytes + fileResult.bytes,
|
||||
};
|
||||
}
|
||||
|
||||
const targetKey = normalizeTargetKey(TARGETARCH, TARGETVARIANT);
|
||||
|
||||
log(`root=${APP_ROOT} target=${targetKey || '(unknown)'}`);
|
||||
@@ -330,6 +721,15 @@ const deletedNodeModulesMaps = await deleteFilesByExtension(
|
||||
debug(`deleted static maps: ${deletedStaticMaps}`);
|
||||
debug(`deleted node_modules maps: ${deletedNodeModulesMaps}`);
|
||||
|
||||
const staticDedupe = await hardlinkDuplicateFiles(
|
||||
path.join(APP_ROOT, 'static')
|
||||
);
|
||||
log(
|
||||
`hardlinked duplicate static files: ${staticDedupe.linked}, saved ${formatMiB(
|
||||
staticDedupe.savedBytes
|
||||
)}`
|
||||
);
|
||||
|
||||
const distDir = path.join(APP_ROOT, 'dist');
|
||||
await pruneServerNative(distDir, serverNativeArch(targetKey));
|
||||
|
||||
@@ -340,9 +740,26 @@ await pruneOptionalNativeDeps(
|
||||
|
||||
await prunePrismaEngines(APP_ROOT, targetKey);
|
||||
|
||||
const nodeModulesDir = path.join(APP_ROOT, 'node_modules');
|
||||
|
||||
const prismaRuntimeArtifacts =
|
||||
await prunePrismaRuntimeArtifacts(nodeModulesDir);
|
||||
log(
|
||||
`deleted prisma runtime artifacts: ${prismaRuntimeArtifacts.deleted}, saved ${formatMiB(
|
||||
prismaRuntimeArtifacts.bytes
|
||||
)}`
|
||||
);
|
||||
|
||||
const nodeModulesArtifacts = await pruneNodeModulesArtifacts(nodeModulesDir);
|
||||
log(
|
||||
`deleted node_modules artifacts: ${nodeModulesArtifacts.deletedFiles} files, ${
|
||||
nodeModulesArtifacts.deletedDirs
|
||||
} dirs, saved ${formatMiB(nodeModulesArtifacts.bytes)}`
|
||||
);
|
||||
|
||||
await Promise.all([
|
||||
rmrf(path.join(APP_ROOT, 'node_modules', 'typescript')).catch(() => {}),
|
||||
rmrf(path.join(APP_ROOT, 'node_modules', '@types')).catch(() => {}),
|
||||
rmrf(path.join(nodeModulesDir, 'typescript')).catch(() => {}),
|
||||
rmrf(path.join(nodeModulesDir, '@types')).catch(() => {}),
|
||||
rmrf(path.join(APP_ROOT, 'src')).catch(() => {}),
|
||||
rmrf(path.join(APP_ROOT, '.gitignore')).catch(() => {}),
|
||||
rmrf(path.join(APP_ROOT, '.dockerignore')).catch(() => {}),
|
||||
|
||||
Reference in New Issue
Block a user